Update airisk.html
· 1 year ago
cc9b45e7ae5034e4198b52fcaea335e280210f98
Parent:
4d168f68a
1 file changed +301 −185
- airisk.html +301 −185
Diff
--- a/airisk.html +++ b/airisk.html @@ -6,32 +6,32 @@ <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Understanding AI Existential Risk (X-Risk) Cheatsheet</title> <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>⚠️</text></svg>"> - <meta name="description" content="An interactive cheatsheet explaining AI Existential Risk (X-Risk), covering key concepts, scenarios, challenges, mitigation strategies, and resources for further learning."> - <meta name="keywords" content="AI Safety, Existential Risk, X-Risk, Artificial Intelligence, AGI, ASI, Alignment Problem, AI Governance, AI Ethics, Superintelligence, Machine Learning Safety, Long-Term Future, AI Risk Mitigation"> - <meta name="author" content="AI Assistant (Revised)"> + <meta name="description" content="An interactive cheatsheet explaining AI Existential Risk (X-Risk), covering key concepts, scenarios, challenges, mitigation strategies, and resources for further learning. Deep dive into AGI, ASI, alignment, and AI safety."> + <meta name="keywords" content="AI Safety, Existential Risk, X-Risk, Artificial Intelligence, AGI, ASI, Alignment Problem, AI Governance, AI Ethics, Superintelligence, Machine Learning Safety, Long-Term Future, AI Risk Mitigation, Yudkowsky, Bostrom, MIRI, OpenAI, Anthropic, DeepMind"> + <meta name="author" content="David Veksler"> - <!-- Canonical URL (Update if hosted) --> - <link rel="canonical" href="https://cheatsheets.davidveksler.com/airisk.html"> <!-- *** IMPORTANT: Update this URL *** --> + <!-- Canonical URL --> + <link rel="canonical" href="https://cheatsheets.davidveksler.com/airisk.html"> <!-- Open Graph / Facebook / LinkedIn --> - <meta property="og:title" content="Understanding AI Existential Risk (X-Risk) Cheatsheet"> - <meta property="og:description" content="Explore the core concepts, risks, and mitigation strategies related to AI Existential Risk with this interactive cheatsheet. Includes definitions and links."> + <meta property="og:title" content="Understanding AI Existential Risk (X-Risk) Cheatsheet | David Veksler"> + <meta property="og:description" content="Explore the core concepts, risks, and mitigation strategies related to AI Existential Risk with this interactive cheatsheet. Includes definitions, key thinkers, and links."> <meta property="og:type" content="article"> - <meta property="og:url" content="https://cheatsheets.davidveksler.com/airisk.html"> <!-- *** IMPORTANT: Use Canonical URL *** --> - <!-- <meta property="og:image" content="YOUR_OG_IMAGE_URL_HERE.jpg"> <!-- *** IMPORTANT: Replace with an actual image URL *** --> + <meta property="og:url" content="https://cheatsheets.davidveksler.com/airisk.html"> + <meta property="og:image" content="https://cheatsheets.davidveksler.com/images/ai-xrisk-og.png"> <!-- *** IMPORTANT: Replace with an actual image URL *** --> <meta property="og:image:alt" content="Conceptual image representing Artificial Intelligence risk and safety considerations"> - <meta property="og:site_name" content="AI Safety Cheatsheet"> + <meta property="og:site_name" content="David Veksler's Cheatsheets"> <meta property="og:locale" content="en_US"> <!-- Twitter Card --> <meta name="twitter:card" content="summary_large_image"> - <meta name="twitter:title" content="Understanding AI Existential Risk (X-Risk) Cheatsheet"> + <meta name="twitter:title" content="Understanding AI Existential Risk (X-Risk) Cheatsheet | David Veksler"> <meta name="twitter:description" content="A comprehensive, interactive guide to AI Existential Risk, covering definitions, challenges, solutions, and key resources."> - <meta name="twitter:url" content="https://cheatsheets.davidveksler.com/airisk.html"> <!-- *** IMPORTANT: Use Canonical URL *** --> - <!--<meta name="twitter:image" content="YOUR_OG_IMAGE_URL_HERE.jpg"> <!-- *** IMPORTANT: Use the same image URL *** --> + <meta name="twitter:url" content="https://cheatsheets.davidveksler.com/airisk.html"> + <meta name="twitter:image" content="https://cheatsheets.davidveksler.com/images/ai-xrisk-og.png"> <!-- *** IMPORTANT: Use the same image URL *** --> <meta name="twitter:image:alt" content="Conceptual image representing Artificial Intelligence risk and safety considerations"> <!-- <meta name="twitter:site" content="@YourSiteHandle"> --> - <!-- <meta name="twitter:creator" content="@YourHandle"> --> + <!-- <meta name="twitter:creator" content="@DavidVeksler"> --> <!-- === CSS === --> <!-- Bootstrap CSS --> @@ -40,114 +40,220 @@ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/font/bootstrap-icons.min.css"> <!-- Custom CSS --> <style> + :root { + --primary-color: #1c3d7e; /* Deep Blue */ + --secondary-color: #3b5998; /* Medium Blue */ + --accent-color: #0d6efd; /* Bright Blue for links */ + --hover-accent-color: #0a58ca; + --light-bg: #f0f4f8; /* Light grayish blue */ + --header-bg: #e2eafc; /* Lighter blue for header */ + --card-bg: #ffffff; + --border-color: #dce1e6; + --text-color: #333; + --text-muted-color: #555; + --tooltip-bg: #212529; /* Dark tooltip */ + --tooltip-color: #fff; + --tooltip-link-color: #90caff; + --tooltip-link-hover-color: #cce5ff; + } + body { - background-color: #f0f4f8; + background-color: var(--light-bg); font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; padding-top: 0; padding-bottom: 20px; font-size: 16px; + color: var(--text-color); } .page-header { - background-color: #e2eafc; - padding: 2rem 1.5rem; + background-color: var(--header-bg); + padding: 2.5rem 1.5rem; margin-bottom: 3rem; text-align: center; border-bottom: 1px solid #c8d8e8; } - .page-header h1 { color: #1c3d7e; font-weight: 300; margin-bottom: 0.5rem; } - .page-header .lead { color: #3b5998; font-size: 1.1rem; margin-bottom: 0; } + .page-header h1 { + color: var(--primary-color); + font-weight: 600; + margin-bottom: 0.5rem; + font-size: 2.5rem; + display: flex; + align-items: center; + justify-content: center; + gap: 0.5rem; + } + .page-header h1 .bi { + font-size: 0.9em; + color: var(--secondary-color); + } + .page-header .lead { + color: var(--secondary-color); + font-size: 1.15rem; + margin-bottom: 0; + max-width: 800px; + margin-left: auto; + margin-right: auto; + } - .container { perspective: 1500px; max-width: 1200px; } /* Added max-width */ + .container { + max-width: 1200px; + } .info-card { - background-color: #ffffff; - border: 1px solid #dce1e6; - border-radius: 0.35rem; - box-shadow: 0 3px 8px rgba(0, 0, 0, 0.08); - height: 100%; /* Ensure cards in a row are same height */ + background-color: var(--card-bg); + border: 1px solid var(--border-color); + border-radius: 0.5rem; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08); + height: 100%; display: flex; flex-direction: column; - transform-style: preserve-3d; /* For tilt effect */ + transition: box-shadow 0.3s ease-in-out, transform 0.3s ease-in-out; + } + .info-card:hover { + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.12); + transform: translateY(-4px); /* Simple lift on hover */ } .info-card .card-body { - padding: 1.25rem; - flex-grow: 1; /* Allows body to expand */ - transform: translateZ(20px); /* For tilt effect */ + padding: 1.5rem; + flex-grow: 1; } .info-card h5 { - color: #1c3d7e; + color: var(--primary-color); text-align: center; - margin-bottom: 1rem; - padding-bottom: 0.6rem; - border-bottom: 1px solid #eee; + margin-bottom: 1.25rem; + padding-bottom: 0.75rem; + border-bottom: 1px solid #e8ecf1; font-weight: 600; display: flex; align-items: center; justify-content: center; flex-wrap: wrap; - gap: 0.3rem; + gap: 0.5rem; + font-size: 1.25rem; } - .info-card h5 .bi { font-size: 1.1em; color: #3b5998; order: -1; } + .info-card h5 .bi { + font-size: 1.2em; + color: var(--secondary-color); + order: -1; + } .info-card h5 a { color: inherit; text-decoration: none; } .info-card h5 a:hover { text-decoration: underline; } .info-card p.description, .info-card .card-text { - font-size: 0.95rem; - color: #333; + font-size: 1rem; + color: var(--text-color); text-align: left; margin-bottom: 1rem; + line-height: 1.6; } .info-card ul { - padding-left: 1.2rem; /* Adjusted padding */ - margin-bottom: 1rem; /* Added margin bottom */ + padding-left: 1.5rem; + margin-bottom: 1rem; + list-style-type: none; } .info-card ul li { - margin-bottom: 0.6rem; /* Slightly increased spacing */ - color: #333; - line-height: 1.5; - font-size: 0.95rem; + position: relative; + margin-bottom: 0.75rem; + color: var(--text-color); + line-height: 1.6; + font-size: 0.98rem; + padding-left: 1em; + } + .info-card ul li::before { + font-family: 'bootstrap-icons'; + content: "\F282"; /* bi-check-circle */ + position: absolute; + left: -0.25em; + top: 0.1em; + color: var(--accent-color); + font-size: 0.9em; + font-weight: bold; } .info-card ul li:last-child { margin-bottom: 0; } - .info-card ul ul { margin-top: 0.5rem; margin-bottom: 0.5rem; } /* Nested lists */ + .info-card ul ul { margin-top: 0.6rem; margin-bottom: 0.6rem; } + .info-card ul ul li::before { + content: "\F231"; /* bi-caret-right-fill */ + color: var(--secondary-color); + } + - .info-card a { color: #0d6efd; text-decoration: none; } - .info-card a:hover { color: #0a58ca; text-decoration: underline; } + .info-card a { color: var(--accent-color); text-decoration: none; font-weight: 500; } + .info-card a:hover { color: var(--hover-accent-color); text-decoration: underline; } /* Tooltip styling */ span[data-bs-toggle="tooltip"], strong[data-bs-toggle="tooltip"] { - border-bottom: 1px dotted #0d6efd; + border-bottom: 1px dotted var(--accent-color); cursor: help; text-decoration: none; + color: var(--primary-color); + font-weight: 600; } span[data-bs-toggle="tooltip"]:hover, strong[data-bs-toggle="tooltip"]:hover { - color: #0a58ca; border-bottom-color: #0a58ca; + color: var(--hover-accent-color); border-bottom-color: var(--hover-accent-color); + } + .tooltip { + --bs-tooltip-bg: var(--tooltip-bg); + --bs-tooltip-color: var(--tooltip-color); + --bs-tooltip-max-width: 400px; + --bs-tooltip-padding-x: 1rem; + --bs-tooltip-padding-y: 0.75rem; + --bs-tooltip-font-size: 0.9rem; + z-index: 1080; } - .tooltip-inner a { color: #a0c4ff; text-decoration: underline; } - .tooltip-inner a:hover { color: #ffffff; } - .tooltip { --bs-tooltip-max-width: 400px; z-index: 1080; } /* Wider tooltips & ensure high z-index */ + .tooltip-inner a { color: var(--tooltip-link-color); text-decoration: underline; } + .tooltip-inner a:hover { color: var(--tooltip-link-hover-color); } + /* Subheadings within cards */ .card-subheading { - font-weight: bold; - color: #3b5998; - margin-top: 1rem; /* Increased top margin */ - margin-bottom: 0.4rem; /* Increased bottom margin */ - font-size: 0.9em; - display: block; /* Ensure it takes full width */ + font-weight: 700; + color: var(--secondary-color); + margin-top: 1.25rem; + margin-bottom: 0.5rem; + font-size: 0.95em; + display: block; + padding-left: 0.25rem; + border-left: 3px solid var(--secondary-color); } - .row > * { margin-bottom: 1.5rem; } /* Standard Bootstrap gutter */ + .row > * { margin-bottom: 1.75rem; } - footer { padding: 2rem 0; font-size: 0.9em; margin-top: 2rem; text-align: center; color: #555;} - footer a { color: #3b5998; } - footer a:hover { color: #1c3d7e; } - .source-link { font-style: italic; font-size: 0.85em; display: block; margin-top: 0.8rem; } /* Make source link distinct */ - .alert small { display: inline-block; text-align: center; } /* Center alert text */ + footer { + padding: 2.5rem 0 1.5rem 0; + font-size: 0.9em; + margin-top: 3rem; + text-align: center; + color: var(--text-muted-color); + border-top: 1px solid var(--border-color); + } + footer a { color: var(--secondary-color); font-weight: 500; } + footer a:hover { color: var(--primary-color); } + .source-link { + font-style: italic; + font-size: 0.88em; + display: block; + margin-top: 1rem; + color: var(--text-muted-color); + text-align: right; + } + .source-link a { + color: var(--secondary-color); + font-weight: normal; + } + .source-link a:hover { color: var(--primary-color); } + + .alert.alert-warning { + background-color: #fff3cd; + border-color: #ffeeba; + color: #856404; + padding: 1rem 1.25rem; + } + .alert small { display: block; text-align: center; font-size: 0.9em; } </style> </head> @@ -155,211 +261,214 @@ <header class="page-header"> <div class="container"> - <h1 class="display-5"><i class="bi bi-robot"></i> Understanding AI Existential Risk (X-Risk)</h1> + <h1><i class="bi bi-shield-exclamation"></i> Understanding AI Existential Risk (X-Risk)</h1> <p class="lead">A Cheatsheet on the Potential Risks from Advanced AI and Efforts Towards Safety.</p> </div> </header> - <div class="container"> + <main class="container"> <div class="row"> <!-- What is AI X-Risk? --> - <div class="col-lg-4 col-md-6 col-sm-12 d-flex"> <!-- Added d-flex for consistent height --> - <div class="info-card w-100" data-tilt data-tilt-max="8" data-tilt-speed="400" data-tilt-perspective="1000" data-tilt-glare="false" data-tilt-scale="1.02"> + <article class="col-lg-4 col-md-6 col-sm-12 d-flex"> + <div class="info-card w-100"> <div class="card-body"> - <h5><i class="bi bi-question-circle"></i> 1. What is AI X-Risk?</h5> + <h5><i class="bi bi-question-octagon-fill"></i> 1. What is AI X-Risk?</h5> <p class="card-text"> - AI Existential Risk (X-Risk) refers to the potential for artificial intelligence to cause <strong data-bs-toggle="tooltip" data-bs-html="true" title="An event that causes human extinction or permanently and drastically curtails humanity's potential. Concept explored by thinkers like Nick Bostrom. <a href='https://nickbostrom.com/existential/risks.html' target='_blank' rel='noopener'>More Info</a>">human extinction</strong> or <strong data-bs-toggle="tooltip" title="e.g., irreversible civilizational collapse, permanent loss of control, or lock-in of a dystopian state.">irrevocably curtail humanity's potential</strong>. + AI Existential Risk (X-Risk) refers to the potential for artificial intelligence to cause <strong data-bs-toggle="tooltip" data-bs-html="true" title="An event that causes human extinction or permanently and drastically curtails humanity's potential. Concept explored by thinkers like Nick Bostrom. <a href='https://nickbostrom.com/existential/risks.html' target='_blank' rel='noopener noreferrer'>More Info</a>">human extinction</strong> or <strong data-bs-toggle="tooltip" title="e.g., irreversible civilizational collapse, permanent loss of control, or lock-in of a dystopian state.">irrevocably curtail humanity's potential</strong>. </p> <ul> - <li>Primarily concerns future <span data-bs-toggle="tooltip" title="Artificial General Intelligence: AI with human-level cognitive abilities across a wide range of tasks. Still hypothetical.">AGI</span> or <span data-bs-toggle="tooltip" title="Artificial Superintelligence: AI significantly surpassing human cognitive abilities in virtually all relevant domains. Highly hypothetical.">ASI</span>.</li> + <li>Primarily concerns future <span data-bs-toggle="tooltip" data-bs-html="true" title="Artificial General Intelligence: AI with human-level cognitive abilities across a wide range of tasks. Still hypothetical. See <a href='https://cheatsheets.davidveksler.com/ai-frontier.html' target='_blank' rel='noopener noreferrer'>AI Frontier Models</a>.">AGI</span> or <span data-bs-toggle="tooltip" title="Artificial Superintelligence: AI significantly surpassing human cognitive abilities in virtually all relevant domains. Highly hypothetical.">ASI</span>.</li> <li>Stems from potential misalignment between AI goals and human values/survival.</li> <li>Involves the risk of losing control over systems far more intelligent than us.</li> <li>Distinct from near-term AI risks (bias, jobs, privacy), though related.</li> </ul> - <span class="source-link">See: <a href="https://www.safe.ai/explainers/ai-existential-risk" target="_blank" rel="noopener">CAIS Explainer</a>, <a href="https://futureoflife.org/ai/existential-risk-from-artificial-intelligence/" target="_blank" rel="noopener">FLI Overview</a></span> + <span class="source-link">See: <a href="https://www.safe.ai/explainers/ai-existential-risk" target="_blank" rel="noopener noreferrer">CAIS Explainer</a>, <a href="https://futureoflife.org/ai/existential-risk-from-artificial-intelligence/" target="_blank" rel="noopener noreferrer">FLI Overview</a></span> </div> </div> - </div> + </article> <!-- Why the Concern? --> - <div class="col-lg-4 col-md-6 col-sm-12 d-flex"> - <div class="info-card w-100" data-tilt data-tilt-max="8" data-tilt-speed="400" data-tilt-perspective="1000" data-tilt-glare="false" data-tilt-scale="1.02"> + <article class="col-lg-4 col-md-6 col-sm-12 d-flex"> + <div class="info-card w-100"> <div class="card-body"> - <h5><i class="bi bi-exclamation-triangle"></i> 2. Why is it a Concern?</h5> + <h5><i class="bi bi-exclamation-diamond-fill"></i> 2. Why is it a Concern?</h5> <p class="card-text">The core argument rests on several interconnected factors:</p> <ul> <li><strong>Capabilities:</strong> Future AI could possess vastly superhuman intelligence and strategic ability.</li> - <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Ensuring advanced AI goals align reliably with complex, nuanced human values. A central problem in AI Safety. <a href='https://www.alignmentforum.org/tag/alignment-problem' target='_blank' rel='noopener'>Alignment Forum</a>"><strong>Alignment Failure:</strong></span> Difficulty in specifying and ensuring AI pursues beneficial goals. + <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Ensuring advanced AI goals align reliably with complex, nuanced human values. A central problem in AI Safety. <a href='https://www.alignmentforum.org/tag/alignment-problem' target='_blank' rel='noopener noreferrer'>Alignment Forum</a>"><strong>Alignment Failure:</strong></span> Difficulty in specifying and ensuring AI pursues beneficial goals. <ul> <li><em>Outer Alignment:</em> Defining the 'right' objective.</li> <li><em>Inner Alignment:</em> Ensuring the AI's internal motivation matches the objective.</li> </ul> </li> - <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Once an ASI exists, humans might lose the ability to control or shut it down if its goals diverge. See Yudkowsky's writings on <a href='https://intelligence.org/2017/10/13/there-is-no-fire-alarm/' target='_blank' rel='noopener'>uncontrollability</a>."><strong>Control Problem:</strong></span> Difficulty retaining control over a superintelligent entity.</li> - <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Highly intelligent agents, regardless of final goals, are likely to pursue common sub-goals (self-preservation, resource acquisition, goal integrity) that could conflict with humans. Concept by Steve Omohundro / Nick Bostrom. <a href='https://nickbostrom.com/superintelligence.html' target='_blank' rel='noopener'>Superintelligence Ch.7</a>"><strong>Instrumental Convergence:</strong></span> Convergent sub-goals like power-seeking.</li> - <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Intelligence level and final goals are potentially independent. A superintelligent AI could have *any* goal, including trivial or harmful ones. Concept by Nick Bostrom. <a href='https://nickbostrom.com/superintelligence.html' target='_blank' rel='noopener'>Superintelligence Ch.7</a>"><strong>Orthogonality Thesis:</strong></span> Intelligence doesn't imply benevolence.</li> + <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Once an ASI exists, humans might lose the ability to control or shut it down if its goals diverge. See Yudkowsky's writings on <a href='https://intelligence.org/2017/10/13/there-is-no-fire-alarm/' target='_blank' rel='noopener noreferrer'>uncontrollability</a>."><strong>Control Problem:</strong></span> Difficulty retaining control over a superintelligent entity.</li> + <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Highly intelligent agents, regardless of final goals, are likely to pursue common sub-goals (self-preservation, resource acquisition, goal integrity) that could conflict with humans. Concept by S. Omohundro / N. Bostrom. See <a href='https://cheatsheets.davidveksler.com/yudkowsky-rationality-ai-cheatsheet.html' target='_blank' rel='noopener noreferrer'>Yudkowsky's ideas</a>."><strong>Instrumental Convergence:</strong></span> Convergent sub-goals like power-seeking.</li> + <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Intelligence level and final goals are potentially independent. A superintelligent AI could have *any* goal, including trivial or harmful ones. Concept by N. Bostrom. More in <a href='https://cheatsheets.davidveksler.com/yudkowsky-rationality-ai-cheatsheet.html' target='_blank' rel='noopener noreferrer'>Yudkowsky's cheatsheet</a>."><strong>Orthogonality Thesis:</strong></span> Intelligence doesn't imply benevolence.</li> </ul> </div> </div> - </div> + </article> <!-- Key Concepts & Terminology --> - <div class="col-lg-4 col-md-6 col-sm-12 d-flex"> - <div class="info-card w-100" data-tilt data-tilt-max="8" data-tilt-speed="400" data-tilt-perspective="1000" data-tilt-glare="false" data-tilt-scale="1.02"> + <article class="col-lg-4 col-md-6 col-sm-12 d-flex"> + <div class="info-card w-100"> <div class="card-body"> - <h5><i class="bi bi-lightbulb"></i> 3. Key Concepts & Terminology</h5> + <h5><i class="bi bi-lightbulb-fill"></i> 3. Key Concepts & Terminology</h5> <p class="card-text">Understanding the language of AI Safety:</p> <ul> - <li><strong data-bs-toggle="tooltip" title="Artificial General Intelligence: AI with human-level cognitive abilities across a wide range of tasks. Still hypothetical.">AGI:</strong> Artificial General Intelligence.</li> - <li><strong data-bs-toggle="tooltip" title="Artificial Superintelligence: AI significantly surpassing human cognitive abilities in virtually all relevant domains. Highly hypothetical.">ASI:</strong> Artificial Superintelligence.</li> - <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="The challenge of ensuring AI systems act in ways intended by their designers and beneficial to humanity. See <a href='https://www.lesswrong.com/tag/ai-alignment' target='_blank' rel='noopener'>LessWrong tag</a>.">Alignment Problem:</strong> Ensuring AI goals align with ours.</li> - <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Understanding the internal reasoning and decision-making processes of AI models. Crucial for trust and verification. <a href='https://distill.pub/2018/building-blocks/' target='_blank' rel='noopener'>Distill Article</a>.">Interpretability (XAI):</strong> Understanding 'why'.</li> - <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Assessing AI model capabilities, especially potentially dangerous or unforeseen ones. See <a href='https://metr.org/' target='_blank' rel='noopener'>METR</a> or <a href='https://www.apolloresearch.ai/' target='_blank' rel='noopener'>Apollo Research</a>.">Capabilities / Evaluations:</strong> Testing what AI can do.</li> - <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="An AI model *appears* aligned during training but pursues hidden goals once deployed. See <a href='https://arxiv.org/abs/2312.09474' target='_blank' rel='noopener'>Recent Paper (Hubinger)</a>.">Deceptive Alignment:</strong> AI hiding true intentions.</li> - <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Managing access to large-scale computing power for frontier AI models. See <a href='https://www.governance.ai/research-agenda/compute-governance' target='_blank' rel='noopener'>GovAI on Compute</a>.">Compute Governance:</strong> Controlling training resources.</li> - <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Policies for developing capable AI carefully. See <a href='https://openai.com/safety/responsible-practices' target='_blank' rel='noopener'>OpenAI</a> or <a href='https://www.anthropic.com/responsible-scaling-policy' target='_blank' rel='noopener'>Anthropic</a> policies.">Responsible Scaling:</strong> Careful development.</li> - <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Testing AI systems for vulnerabilities, dangerous capabilities, or alignment failures, often by simulating adversarial attacks.">Red Teaming:</strong> Adversarial testing.</li> + <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Artificial General Intelligence: AI with human-level cognitive abilities. See <a href='https://cheatsheets.davidveksler.com/ai-frontier.html' target='_blank' rel='noopener noreferrer'>AI Frontier Models</a>.">AGI:</strong> Artificial General Intelligence.</li> + <li><strong data-bs-toggle="tooltip" title="Artificial Superintelligence: AI vastly surpassing human intellect.">ASI:</strong> Artificial Superintelligence.</li> + <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Ensuring AI acts aligned with human intentions. See <a href='https://www.lesswrong.com/tag/ai-alignment' target='_blank' rel='noopener noreferrer'>LessWrong</a>.">Alignment Problem:</strong> AI goals = Our goals.</li> + <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Understanding AI's internal reasoning. See <a href='https://distill.pub/2018/building-blocks/' target='_blank' rel='noopener noreferrer'>Distill</a>.">Interpretability (XAI):</strong> Understanding 'why'.</li> + <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Assessing AI capabilities, esp. dangerous ones. See <a href='https://metr.org/' target='_blank' rel='noopener noreferrer'>METR</a>, <a href='https://www.apolloresearch.ai/' target='_blank' rel='noopener noreferrer'>Apollo</a>.">Capabilities / Evals:</strong> Testing AI abilities.</li> + <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="AI *appears* aligned but hides true goals. See <a href='https://arxiv.org/abs/2312.09474' target='_blank' rel='noopener noreferrer'>Hubinger</a>.">Deceptive Alignment:</strong> Hidden intentions.</li> + <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Controlling access to large compute for AI training. See <a href='https://www.governance.ai/research-agenda/compute-governance' target='_blank' rel='noopener noreferrer'>GovAI</a>.">Compute Governance:</strong> Regulating resources.</li> + <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Careful AI development policies. See <a href='https://openai.com/safety/responsible-practices' target='_blank' rel='noopener noreferrer'>OpenAI</a>, <a href='https://www.anthropic.com/responsible-scaling-policy' target='_blank' rel='noopener noreferrer'>Anthropic</a>.">Responsible Scaling:</strong> Cautious development.</li> + <li><strong data-bs-toggle="tooltip" title="Adversarial testing for vulnerabilities and dangerous capabilities.">Red Teaming:</strong> Stress-testing AI.</li> </ul> </div> </div> - </div> + </article> <!-- Potential Risk Scenarios --> - <div class="col-lg-6 col-md-6 col-sm-12 d-flex"> - <div class="info-card w-100" data-tilt data-tilt-max="8" data-tilt-speed="400" data-tilt-perspective="1000" data-tilt-glare="false" data-tilt-scale="1.02"> + <article class="col-lg-6 col-md-6 col-sm-12 d-flex"> + <div class="info-card w-100"> <div class="card-body"> - <h5><i class="bi bi-diagram-3-fill"></i> 4. Potential Risk Scenarios</h5> + <h5><i class="bi bi-signpost-2-fill"></i> 4. Potential Risk Scenarios</h5> <p class="card-text">How existential catastrophe might occur:</p> <ul> - <li><strong>Misaligned Objectives:</strong> ASI optimizes a poorly specified goal with catastrophic side effects (e.g., the <span data-bs-toggle="tooltip" data-bs-html="true" title="Thought experiment where an ASI converts all available matter into paperclips. Illustrates goal specification difficulty. <a href='https://wiki.lesswrong.com/wiki/Paperclip_maximizer' target='_blank' rel='noopener'>LessWrong Wiki</a>.">Paperclip Maximizer</span>).</li> - <li><strong>Power-Seeking/Goal Drift:</strong> AI seeks power/resources or modifies its goals (<span data-bs-toggle="tooltip" data-bs-html="true" title="AI develops unintended goals during training that deviate from the intended objective.">Goal Misgeneralization</span>), overriding human control.</li> - <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Intense competition (nations, corporations) leading to rushed development and deployment of unsafe AI. See <a href='https://www.alignmentforum.org/tag/race-dynamics' target='_blank' rel='noopener'>Race Dynamics</a> discussion."><strong>AI Arms Race:</strong></span> Competition compromises safety.</li> + <li><strong>Misaligned Objectives:</strong> ASI optimizes a poorly specified goal with catastrophic side effects (e.g., the <span data-bs-toggle="tooltip" data-bs-html="true" title="Thought experiment where an ASI converts all available matter into paperclips to fulfill its goal. Illustrates goal specification difficulty. <a href='https://wiki.lesswrong.com/wiki/Paperclip_maximizer' target='_blank' rel='noopener noreferrer'>LessWrong Wiki</a>.">Paperclip Maximizer</span>).</li> + <li><strong>Power-Seeking/Goal Drift:</strong> AI seeks power/resources or modifies its goals (<span data-bs-toggle="tooltip" data-bs-html="true" title="AI develops unintended goals during training that deviate from the intended objective, often due to proxy misspecification.">Goal Misgeneralization</span>), overriding human control.</li> + <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Intense competition (nations, corporations) leading to rushed development and deployment of unsafe AI. See <a href='https://www.alignmentforum.org/tag/race-dynamics' target='_blank' rel='noopener noreferrer'>Race Dynamics</a> discussion."><strong>AI Arms Race:</strong></span> Competition compromises safety.</li> <li><strong>Unforeseen Interactions:</strong> Complex, emergent negative outcomes from multiple AIs or AI-environment interactions.</li> <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Deliberate use of powerful AI for destructive purposes (e.g., autonomous weapons, engineered pandemics, mass manipulation)."><strong>Weaponized AI / Misuse:</strong></span> Malicious actors leveraging AI.</li> - <li><strong>Loss of Human Agency:</strong> Over-reliance erodes human control, potentially leading to <span data-bs-toggle="tooltip" data-bs-html="true" title="A stable, undesirable state imposed by a controlling AI, from which it's very difficult to escape.">Value Lock-in</span>.</li> + <li><strong>Loss of Human Agency:</strong> Over-reliance erodes human control, potentially leading to <span data-bs-toggle="tooltip" data-bs-html="true" title="A stable, undesirable state imposed by a controlling AI, from which it's very difficult or impossible for humanity to escape or recover its potential.">Value Lock-in</span>.</li> </ul> - <span class="source-link">Scenarios explored in <a href="https://nickbostrom.com/superintelligence.html" target="_blank" rel="noopener">Superintelligence</a> (Bostrom), <a href="https://www.humancompatible.ai/" target="_blank" rel="noopener">Human Compatible</a> (Russell).</span> + <span class="source-link">Scenarios in <a href="https://nickbostrom.com/superintelligence.html" target="_blank" rel="noopener noreferrer">Superintelligence</a>, <a href="https://www.humancompatible.ai/" target="_blank" rel="noopener noreferrer">Human Compatible</a>.</span> </div> </div> - </div> + </article> <!-- Core Challenges --> - <div class="col-lg-6 col-md-6 col-sm-12 d-flex"> - <div class="info-card w-100" data-tilt data-tilt-max="8" data-tilt-speed="400" data-tilt-perspective="1000" data-tilt-glare="false" data-tilt-scale="1.02"> + <article class="col-lg-6 col-md-6 col-sm-12 d-flex"> + <div class="info-card w-100"> <div class="card-body"> <h5><i class="bi bi-bricks"></i> 5. Core Challenges (Why this is Hard)</h5> <p class="card-text">Significant hurdles exist in ensuring AI safety:</p> <ul> - <li><strong>Specifying Human Values:</strong> Defining complex, evolving values is hard (<span data-bs-toggle="tooltip" data-bs-html="true" title="Difficulty pointing AI towards complex values. See J. Wentworth's <a href='https://www.lesswrong.com/posts/gQY6LrTWJNkTv8YJR/the-pointers-problem-human-values-are-a-function-of-humans' target='_blank' rel='noopener'>Pointers Problem</a>.">Value Specification</span>).</li> - <li><span data-bs-toggle="tooltip" data-bs-html="true" title="How can humans reliably supervise or evaluate agents much smarter/faster? See <a href='https://openai.com/research/scalable-oversight' target='_blank' rel='noopener'>OpenAI's work</a>."><strong>Scalable Oversight:</strong></span> Supervising superhuman systems.</li> + <li><strong>Specifying Human Values:</strong> Defining complex, evolving values is hard (<span data-bs-toggle="tooltip" data-bs-html="true" title="Difficulty pointing AI towards complex human values. See J. Wentworth's <a href='https://www.lesswrong.com/posts/gQY6LrTWJNkTv8YJR/the-pointers-problem-human-values-are-a-function-of-humans' target='_blank' rel='noopener noreferrer'>Pointers Problem</a>.">Value Specification</span>).</li> + <li><span data-bs-toggle="tooltip" data-bs-html="true" title="How can humans reliably supervise or evaluate agents much smarter/faster? See <a href='https://openai.com/research/scalable-oversight' target='_blank' rel='noopener noreferrer'>OpenAI's work</a>."><strong>Scalable Oversight:</strong></span> Supervising superhuman systems.</li> <li><strong>Predicting Emergent Capabilities:</strong> Hard to anticipate abilities from scaling (<span data-bs-toggle="tooltip" title="Unexpected abilities that appear in larger models, not present in smaller ones.">Emergence</span>).</li> - <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Getting competing actors to agree on safety standards is a major geopolitical challenge. See <a href='https://www.governance.ai/' target='_blank' rel='noopener'>GovAI</a> research."><strong>Coordination Failure:</strong></span> Difficulty in global cooperation.</li> - <li><strong>Detecting Deception:</strong> Verifying an AI isn't pretending alignment (<span data-bs-toggle="tooltip" data-bs-html="true" title="Research on identifying model deception. See <a href='https://www.apolloresearch.ai/' target='_blank' rel='noopener'>Apollo Research</a>.">Deception Detection</span>).</li> - <li><span data-bs-toggle="tooltip" data-bs-html="true" title="'When a measure becomes a target, it ceases to be a good measure.' AI may optimize metrics, not the true goal."><strong>Goodhart's Law / Proxy Gaming:</strong></span> Optimizing metrics wrongly.</li> - <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Ensuring AI behaves safely even in novel situations or when facing unexpected inputs."><strong>Robustness & Generalization:</strong></span> Safe behavior outside training.</li> + <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Getting competing actors to agree on safety standards is a major geopolitical challenge. See <a href='https://www.governance.ai/' target='_blank' rel='noopener noreferrer'>GovAI</a> research."><strong>Coordination Failure:</strong></span> Difficulty in global cooperation.</li> + <li><strong>Detecting Deception:</strong> Verifying an AI isn't pretending alignment (<span data-bs-toggle="tooltip" data-bs-html="true" title="Research on identifying model deception. See <a href='https://www.apolloresearch.ai/' target='_blank' rel='noopener noreferrer'>Apollo Research</a>.">Deception Detection</span>).</li> + <li><span data-bs-toggle="tooltip" data-bs-html="true" title="'When a measure becomes a target, it ceases to be a good measure.' AI may optimize metrics, not the true goal. (Goodhart's Law / Campbell's Law)"><strong>Proxy Gaming:</strong></span> Optimizing metrics wrongly.</li> + <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Ensuring AI behaves safely even in novel situations or when facing unexpected inputs not seen during training."><strong>Robustness & Generalization:</strong></span> Safe behavior outside training.</li> </ul> </div> </div> - </div> + </article> <!-- Mitigation: Technical Research --> - <div class="col-lg-4 col-md-6 col-sm-12 d-flex"> - <div class="info-card w-100" data-tilt data-tilt-max="8" data-tilt-speed="400" data-tilt-perspective="1000" data-tilt-glare="false" data-tilt-scale="1.02"> + <article class="col-lg-4 col-md-6 col-sm-12 d-flex"> + <div class="info-card w-100"> <div class="card-body"> - <h5><i class="bi bi-tools"></i> 6a. Mitigation: Technical Safety</h5> + <h5><i class="bi bi-wrench-adjustable-circle-fill"></i> 6a. Mitigation: Technical Safety</h5> <p class="card-text">Developing technical methods for safe AI:</p> <ul> - <li><strong>Interpretability:</strong> Understanding models (<a href="https://transformer-circuits.pub/2021/framework/index.html" target="_blank" rel="noopener">Circuits</a>, <a href="https://www.alignment.org/theory/" target="_blank" rel="noopener">ARC</a>).</li> - <li><strong>Value Learning:</strong> AI learning human values (<a href="https://humancompatible.ai/" target="_blank" rel="noopener">CHAI</a>, <a href="https://deepmind.google/discover/blog/scalable-agent-alignment-via-reward-modeling/" target="_blank" rel="noopener">Reward Modeling</a>).</li> - <li><strong>Scalable Oversight:</strong> Supervising smarter AI (<a href="https://openai.com/research/debate" target="_blank" rel="noopener">Debate</a>, <a href="https://www.anthropic.com/constitutional-ai" target="_blank" rel="noopener">Constitutional AI</a>).</li> - <li><strong>Robustness:</strong> Safe behavior in new situations (<a href="https://buildaligned.ai/" target="_blank" rel="noopener">Aligned AI</a>).</li> - <li><strong>Verification:</strong> Proving safety properties (<a href="https://atlascomputing.org/" target="_blank" rel="noopener">Atlas Computing</a>).</li> - <li><strong>Evals & Red Teaming:</strong> Testing for risks (<a href="https://metr.org/" target="_blank" rel="noopener">METR</a>, <a href="https://openai.com/red-teaming-network" target="_blank" rel="noopener">OpenAI Red Teaming</a>).</li> - <li><strong>Agent Foundations:</strong> Understanding agency (<a href="https://intelligence.org/" target="_blank" rel="noopener">MIRI</a>, <a href="https://orxl.org" target="_blank" rel="noopener">Orthogonal</a>).</li> + <li><strong>Interpretability:</strong> Understanding models (<a href="https://transformer-circuits.pub/2021/framework/index.html" target="_blank" rel="noopener noreferrer">Circuits</a>, <a href="https://www.alignment.org/theory/" target="_blank" rel="noopener noreferrer">ARC</a>).</li> + <li><strong>Value Learning:</strong> AI learning human values (<a href="https://humancompatible.ai/" target="_blank" rel="noopener noreferrer">CHAI</a>, <a href="https://deepmind.google/discover/blog/scalable-agent-alignment-via-reward-modeling/" target="_blank" rel="noopener noreferrer">Reward Modeling</a>).</li> + <li><strong>Scalable Oversight:</strong> Supervising smarter AI (<a href="https://openai.com/research/debate" target="_blank" rel="noopener noreferrer">Debate</a>, <a href="https://www.anthropic.com/constitutional-ai" target="_blank" rel="noopener noreferrer">Constitutional AI</a>).</li> + <li><strong>Robustness:</strong> Safe behavior in new situations (<a href="https://buildaligned.ai/" target="_blank" rel="noopener noreferrer">Aligned AI</a>).</li> + <li><strong>Verification:</strong> Proving safety properties (<a href="https://atlascomputing.org/" target="_blank" rel="noopener noreferrer">Atlas Computing</a>).</li> + <li><strong>Evals & Red Teaming:</strong> Testing for risks (<a href="https://metr.org/" target="_blank" rel="noopener noreferrer">METR</a>, <a href="https://openai.com/red-teaming-network" target="_blank" rel="noopener noreferrer">OpenAI Red Teaming</a>).</li> + <li><strong>Agent Foundations:</strong> Understanding agency (<a href="https://intelligence.org/" target="_blank" rel="noopener noreferrer">MIRI</a>, <a href="https://orxl.org" target="_blank" rel="noopener noreferrer">Orthogonal</a>).</li> </ul> - <span class="source-link">Key Labs: <a href="https://www.deepmind.com/" target="_blank" rel="noopener">DeepMind</a>, <a href="https://www.anthropic.com/" target="_blank" rel="noopener">Anthropic</a>, <a href="https://openai.com/" target="_blank" rel="noopener">OpenAI</a>, <a href="https://www.redwoodresearch.org/" target="_blank" rel="noopener">Redwood</a>, <a href="https://safe.ai/" target="_blank" rel="noopener">CAIS</a>.</span> + <span class="source-link">Labs: <a href="https://deepmind.google/" target="_blank" rel="noopener noreferrer">DeepMind</a>, <a href="https://www.anthropic.com/" target="_blank" rel="noopener noreferrer">Anthropic</a>, <a href="https://openai.com/" target="_blank" rel="noopener noreferrer">OpenAI</a>, <a href="https://www.redwoodresearch.org/" target="_blank" rel="noopener noreferrer">Redwood</a>, <a href="https://safe.ai/" target="_blank" rel="noopener noreferrer">CAIS</a>.</span> </div> </div> - </div> + </article> <!-- Mitigation: Governance & Policy --> - <div class="col-lg-4 col-md-6 col-sm-12 d-flex"> - <div class="info-card w-100" data-tilt data-tilt-max="8" data-tilt-speed="400" data-tilt-perspective="1000" data-tilt-glare="false" data-tilt-scale="1.02"> + <article class="col-lg-4 col-md-6 col-sm-12 d-flex"> + <div class="info-card w-100"> <div class="card-body"> - <h5><i class="bi bi-building-gear"></i> 6b. Mitigation: Governance & Policy</h5> + <h5><i class="bi bi-bank2"></i> 6b. Mitigation: Governance & Policy</h5> <p class="card-text">Shaping norms, standards, and regulations:</p> <ul> - <li><strong>Standards & Auditing:</strong> Benchmarks & verification (<a href="https://www.nist.gov/artificial-intelligence/ai-risk-management-framework" target="_blank" rel="noopener">NIST AI RMF</a>, <a href="https://digital-strategy.ec.europa.eu/en/policies/regulatory-framework-ai" target="_blank" rel="noopener">EU AI Act</a>).</li> - <li><strong>Compute Governance:</strong> Regulating training compute (<a href="https://www.governance.ai/research-agenda/compute-governance" target="_blank" rel="noopener">GovAI</a>, <a href="https://cset.georgetown.edu/publication/securing-ai-model-weights/" target="_blank" rel="noopener">CSET</a>).</li> - <li><strong>Intl Cooperation:</strong> Treaties, dialogues (<a href="https://www.aisi.gov.uk/" target="_blank" rel="noopener">UK AISI</a>, <a href="https://www.nist.gov/artificial-intelligence/artificial-intelligence-safety-institute" target="_blank" rel="noopener">US AISI</a>, <a href="https://www.oecd.org/en/about/programmes/global-partnership-on-artificial-intelligence.html" target="_blank" rel="noopener">GPAI</a>).</li> - <li><strong>Monitoring & Tracking:</strong> Observing AI progress (<a href="https://epochai.org/" target="_blank" rel="noopener">Epoch AI</a>, <a href="https://cset.georgetown.edu/" target="_blank" rel="noopener">CSET</a>).</li> - <li><strong>Liability Frameworks:</strong> Responsibility for AI harms (<a href="https://partnershiponai.org/" target="_blank" rel="noopener">PAI</a>).</li> - <li><strong>Risk Assessment:</strong> Evaluating impacts (<a href="https://longtermrisk.org/" target="_blank" rel="noopener">CLR</a>, <a href="https://www.cser.ac.uk/" target="_blank" rel="noopener">CSER</a>).</li> + <li><strong>Standards & Auditing:</strong> Benchmarks & verification (<a href="https://www.nist.gov/artificial-intelligence/ai-risk-management-framework" target="_blank" rel="noopener noreferrer">NIST AI RMF</a>, <a href="https://digital-strategy.ec.europa.eu/en/policies/regulatory-framework-ai" target="_blank" rel="noopener noreferrer">EU AI Act</a>).</li> + <li><strong>Compute Governance:</strong> Regulating training compute (<a href="https://www.governance.ai/research-agenda/compute-governance" target="_blank" rel="noopener noreferrer">GovAI</a>, <a href="https://cset.georgetown.edu/publication/securing-ai-model-weights/" target="_blank" rel="noopener noreferrer">CSET</a>).</li> + <li><strong>Intl Cooperation:</strong> Treaties, dialogues (<a href="https://www.aisi.gov.uk/" target="_blank" rel="noopener noreferrer">UK AISI</a>, <a href="https://www.nist.gov/artificial-intelligence/artificial-intelligence-safety-institute" target="_blank" rel="noopener noreferrer">US AISI</a>, <a href="https://www.oecd.org/en/about/programmes/global-partnership-on-artificial-intelligence.html" target="_blank" rel="noopener noreferrer">GPAI</a>).</li> + <li><strong>Monitoring & Tracking:</strong> Observing AI progress (<a href="https://epochai.org/" target="_blank" rel="noopener noreferrer">Epoch AI</a>, <a href="https://cset.georgetown.edu/" target="_blank" rel="noopener noreferrer">CSET</a>).</li> + <li><strong>Liability Frameworks:</strong> Responsibility for AI harms (<a href="https://partnershiponai.org/" target="_blank" rel="noopener noreferrer">PAI</a>).</li> + <li><strong>Risk Assessment:</strong> Evaluating impacts (<a href="https://longtermrisk.org/" target="_blank" rel="noopener noreferrer">CLR</a>, <a href="https://www.cser.ac.uk/" target="_blank" rel="noopener noreferrer">CSER</a>).</li> </ul> - <span class="source-link">Key Orgs: <a href="https://www.governance.ai/" target="_blank" rel="noopener">GovAI</a>, <a href="https://cset.georgetown.edu/" target="_blank" rel="noopener">CSET</a>, <a href="https://aipolicy.us/" target="_blank" rel="noopener">CAIP</a>, <a href="https://www.iaps.ai/" target="_blank" rel="noopener">IAPS</a>, <a href="https://futureoflife.org/" target="_blank" rel="noopener">FLI</a>.</span> + <span class="source-link">Orgs: <a href="https://www.governance.ai/" target="_blank" rel="noopener noreferrer">GovAI</a>, <a href="https://cset.georgetown.edu/" target="_blank" rel="noopener noreferrer">CSET</a>, <a href="https://aipolicy.us/" target="_blank" rel="noopener noreferrer">CAIP</a>, <a href="https://www.iaps.ai/" target="_blank" rel="noopener noreferrer">IAPS</a>, <a href="https://futureoflife.org/" target="_blank" rel="noopener noreferrer">FLI</a>.</span> </div> </div> - </div> + </article> <!-- Mitigation: Strategy, Community, Funding --> - <div class="col-lg-4 col-md-6 col-sm-12 d-flex"> - <div class="info-card w-100" data-tilt data-tilt-max="8" data-tilt-speed="400" data-tilt-perspective="1000" data-tilt-glare="false" data-tilt-scale="1.02"> + <article class="col-lg-4 col-md-6 col-sm-12 d-flex"> + <div class="info-card w-100"> <div class="card-body"> - <h5><i class="bi bi-people-fill"></i> 6c. Mitigation: Ecosystem</h5> + <h5><i class="bi bi-diagram-3"></i> 6c. Mitigation: Ecosystem</h5> <p class="card-text">Building the community and resources:</p> <ul> - <li><strong>Strategy & Forecasting:</strong> Analysis & prediction (<a href="https://aiimpacts.org/" target="_blank" rel="noopener">AI Impacts</a>, <a href="https://epochai.org/" target="_blank" rel="noopener">Epoch AI</a>, <a href="https://www.metaculus.com/questions/?topic=ai" target="_blank" rel="noopener">Metaculus</a>).</li> - <li><strong>Field Building & Edu:</strong> Training & awareness (<a href="https://aisafetyfundamentals.com/" target="_blank" rel="noopener">AISF</a>, <a href="https://80000hours.org/problem-profiles/artificial-intelligence/" target="_blank" rel="noopener">80k Hours</a>, <a href="https://www.aisafetysupport.org/" target="_blank" rel="noopener">AISS</a>).</li> - <li><strong>Funding:</strong> Directing resources (<a href="https://www.openphilanthropy.org/" target="_blank" rel="noopener">Open Phil</a>, <a href="http://survivalandflourishing.fund/" target="_blank" rel="noopener">SFF</a>, <a href="https://funds.effectivealtruism.org/funds/far-future" target="_blank" rel="noopener">LTFF</a>).</li> - <li><strong>Public Advocacy:</strong> Influencing policy/opinion (<a href="https://pauseai.info" target="_blank" rel="noopener">PauseAI</a>, <a href="https://futureoflife.org/" target="_blank" rel="noopener">FLI</a>, <a href="https://www.safe.ai/" target="_blank" rel="noopener">CAIS</a>).</li> - <li><strong>Infrastructure:</strong> Supporting the community (<a href="https://www.lightconeinfrastructure.com/" target="_blank" rel="noopener">Lightcone</a>, <a href="https://existence.org/" target="_blank" rel="noopener">BERI</a>, <a href="https://alignment.dev/" target="_blank" rel="noopener">AED</a>).</li> + <li><strong>Strategy & Forecasting:</strong> Analysis & prediction (<a href="https://aiimpacts.org/" target="_blank" rel="noopener noreferrer">AI Impacts</a>, <a href="https://epochai.org/" target="_blank" rel="noopener noreferrer">Epoch AI</a>, <a href="https://www.metaculus.com/questions/?topic=ai" target="_blank" rel="noopener noreferrer">Metaculus</a>).</li> + <li><strong>Field Building & Edu:</strong> Training & awareness (<a href="https://aisafetyfundamentals.com/" target="_blank" rel="noopener noreferrer">AISF</a>, <a href="https://80000hours.org/problem-profiles/artificial-intelligence/" target="_blank" rel="noopener noreferrer">80k Hours</a>, <a href="https://www.aisafetysupport.org/" target="_blank" rel="noopener noreferrer">AISS</a>).</li> + <li><strong>Funding:</strong> Directing resources (<a href="https://www.openphilanthropy.org/" target="_blank" rel="noopener noreferrer">Open Phil</a>, <a href="http://survivalandflourishing.fund/" target="_blank" rel="noopener noreferrer">SFF</a>, <a href="https://funds.effectivealtruism.org/funds/far-future" target="_blank" rel="noopener noreferrer">LTFF</a>).</li> + <li><strong>Public Advocacy:</strong> Influencing policy/opinion (<a href="https://pauseai.info" target="_blank" rel="noopener noreferrer">PauseAI</a>, <a href="https://futureoflife.org/" target="_blank" rel="noopener noreferrer">FLI</a>, <a href="https://safe.ai/" target="_blank" rel="noopener noreferrer">CAIS</a>).</li> + <li><strong>Infrastructure:</strong> Supporting community (<a href="https://www.lightconeinfrastructure.com/" target="_blank" rel="noopener noreferrer">Lightcone</a>, <a href="https://existence.org/" target="_blank" rel="noopener noreferrer">BERI</a>, <a href="https://alignment.dev/" target="_blank" rel="noopener noreferrer">AED</a>).</li> + <li>Explore the <a href="https://cheatsheets.davidveksler.com/aisafety.html" target="_blank" rel="noopener noreferrer">AI Safety Ecosystem Hub</a> for more.</li> </ul> </div> </div> - </div> + </article> <!-- Where to Learn More --> - <div class="col-lg-12 col-md-12 col-sm-12 d-flex"> - <div class="info-card w-100" data-tilt data-tilt-max="5" data-tilt-speed="300" data-tilt-perspective="1000" data-tilt-glare="false" data-tilt-scale="1.01"> + <article class="col-lg-12 col-md-12 col-sm-12 d-flex"> + <div class="info-card w-100"> <div class="card-body"> - <h5><i class="bi bi-book-half"></i> 7. Where to Learn More</h5> + <h5><i class="bi bi-journal-bookmark-fill"></i> 7. Where to Learn More</h5> <p class="card-text">Resources for further exploration:</p> <div class="row"> <div class="col-lg-4 col-md-6"> <span class="card-subheading">Introductory Resources:</span> <ul> - <li><a href="https://aisafetyfundamentals.com/" target="_blank" rel="noopener">AI Safety Fundamentals Courses</a></li> - <li><a href="https://robertskmiles.com/" target="_blank" rel="noopener">Robert Miles YouTube</a></li> - <li><a href="https://aisafety.info/" target="_blank" rel="noopener">AI Safety Info Directory</a></li> - <li><a href="https://www.aisafety.com/" target="_blank" rel="noopener">AISafety.com Hub</a></li> - <li><a href="https://80000hours.org/problem-profiles/artificial-intelligence/" target="_blank" rel="noopener">80,000 Hours AI Profile</a></li> - <li><a href="https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html" target="_blank" rel="noopener">Wait But Why: AI Revolution</a></li> + <li><a href="https://aisafetyfundamentals.com/" target="_blank" rel="noopener noreferrer">AI Safety Fundamentals Courses</a></li> + <li><a href="https://robertskmiles.com/" target="_blank" rel="noopener noreferrer">Robert Miles YouTube</a></li> + <li><a href="https://aisafety.info/" target="_blank" rel="noopener noreferrer">AI Safety Info Directory</a></li> + <li><a href="https://www.aisafety.com/" target="_blank" rel="noopener noreferrer">AISafety.com Hub</a></li> + <li><a href="https://80000hours.org/problem-profiles/artificial-intelligence/" target="_blank" rel="noopener noreferrer">80,000 Hours AI Profile</a></li> + <li><a href="https://waitbutwhy.com/2015/01/artificial-intelligence-revolution-1.html" target="_blank" rel="noopener noreferrer">Wait But Why: AI Revolution</a></li> + <li><a href="https://cheatsheets.davidveksler.com/yudkowsky-rationality-ai-cheatsheet.html" target="_blank" rel="noopener noreferrer">Yudkowsky & Rationality Cheatsheet</a></li> </ul> </div> <div class="col-lg-4 col-md-6"> <span class="card-subheading">Key Forums & News:</span> <ul> - <li><a href="https://www.alignmentforum.org/" target="_blank" rel="noopener">Alignment Forum</a> (Technical)</li> - <li><a href="https://www.lesswrong.com/" target="_blank" rel="noopener">LessWrong</a> (Rationality/AI)</li> - <li><a href="https://forum.effectivealtruism.org/" target="_blank" rel="noopener">Effective Altruism Forum</a></li> - <li><a href="https://importai.substack.com/" target="_blank" rel="noopener">Import AI Newsletter</a></li> - <li><a href="https://aiimpacts.org/" target="_blank" rel="noopener">AI Impacts Blog & Wiki</a></li> + <li><a href="https://www.alignmentforum.org/" target="_blank" rel="noopener noreferrer">Alignment Forum</a> (Technical)</li> + <li><a href="https://www.lesswrong.com/" target="_blank" rel="noopener noreferrer">LessWrong</a> (Rationality/AI)</li> + <li><a href="https://forum.effectivealtruism.org/" target="_blank" rel="noopener noreferrer">Effective Altruism Forum</a></li> + <li><a href="https://importai.substack.com/" target="_blank" rel="noopener noreferrer">Import AI Newsletter</a></li> + <li><a href="https://aiimpacts.org/" target="_blank" rel="noopener noreferrer">AI Impacts Blog & Wiki</a></li> </ul> </div> <div class="col-lg-4 col-md-12"> <span class="card-subheading">Key Organizations (Examples):</span> <ul> - <li>Labs (Safety Focus): <a href="https://www.anthropic.com/" target="_blank" rel="noopener">Anthropic</a>, <a href="https://deepmind.google/discover/responsibility-safety/" target="_blank" rel="noopener">DeepMind</a>, <a href="https://openai.com/safety" target="_blank" rel="noopener">OpenAI</a>, <a href="https://ssi.inc/" target="_blank" rel="noopener">SSI</a></li> - <li>Research Orgs: <a href="https://safe.ai/" target="_blank" rel="noopener">CAIS</a>, <a href="https://www.alignment.org/" target="_blank" rel="noopener">ARC</a>, <a href="https://www.redwoodresearch.org/" target="_blank" rel="noopener">Redwood</a>, <a href="https://metr.org/" target="_blank" rel="noopener">METR</a></li> - <li>Academic/Policy: <a href="https://humancompatible.ai/" target="_blank" rel="noopener">CHAI</a>, <a href="https://www.governance.ai/" target="_blank" rel="noopener">GovAI</a>, <a href="https://cset.georgetown.edu/" target="_blank" rel="noopener">CSET</a>, <a href="https://www.cser.ac.uk/" target="_blank" rel="noopener">CSER</a>, <a href="https://futureoflife.org/" target="_blank" rel="noopener">FLI</a></li> - <li>Govt Institutes: <a href="https://www.aisi.gov.uk/" target="_blank" rel="noopener">UK AISI</a>, <a href="https://www.nist.gov/artificial-intelligence/artificial-intelligence-safety-institute" target="_blank" rel="noopener">US AISI</a></li> + <li>Labs (Safety Focus): <a href="https://www.anthropic.com/" target="_blank" rel="noopener noreferrer">Anthropic</a>, <a href="https://deepmind.google/discover/responsibility-safety/" target="_blank" rel="noopener noreferrer">DeepMind</a>, <a href="https://openai.com/safety" target="_blank" rel="noopener noreferrer">OpenAI</a>, <a href="https://ssi.inc/" target="_blank" rel="noopener noreferrer">SSI</a></li> + <li>Research Orgs: <a href="https://safe.ai/" target="_blank" rel="noopener noreferrer">CAIS</a>, <a href="https://www.alignment.org/" target="_blank" rel="noopener noreferrer">ARC</a>, <a href="https://www.redwoodresearch.org/" target="_blank" rel="noopener noreferrer">Redwood</a>, <a href="https://metr.org/" target="_blank" rel="noopener noreferrer">METR</a></li> + <li>Academic/Policy: <a href="https://humancompatible.ai/" target="_blank" rel="noopener noreferrer">CHAI</a>, <a href="https://www.governance.ai/" target="_blank" rel="noopener noreferrer">GovAI</a>, <a href="https://cset.georgetown.edu/" target="_blank" rel="noopener noreferrer">CSET</a>, <a href="https://www.cser.ac.uk/" target="_blank" rel="noopener noreferrer">CSER</a>, <a href="https://futureoflife.org/" target="_blank" rel="noopener noreferrer">FLI</a></li> + <li>Govt Institutes: <a href="https://www.aisi.gov.uk/" target="_blank" rel="noopener noreferrer">UK AISI</a>, <a href="https://www.nist.gov/artificial-intelligence/artificial-intelligence-safety-institute" target="_blank" rel="noopener noreferrer">US AISI</a></li> + <li>Also see the <a href="https://cheatsheets.davidveksler.com/aisafety.html" target="_blank" rel="noopener noreferrer">AI Safety Ecosystem Hub</a>.</li> </ul> </div> </div> </div> </div> - </div> + </article> </div> <!-- /.row --> @@ -367,24 +476,22 @@ <div class="row justify-content-center mt-4"> <div class="col-lg-8 col-md-10"> <div class="alert alert-warning text-center" role="alert"> - <small><strong>Disclaimer:</strong> This is a simplified overview of a complex, rapidly evolving, and highly debated field. Views on AI X-Risk vary significantly. Always consult primary sources and multiple perspectives.</small> + <small><i class="bi bi-info-circle-fill me-2"></i><strong>Disclaimer:</strong> This is a simplified overview of a complex, rapidly evolving, and highly debated field. Views on AI X-Risk vary significantly. Always consult primary sources and multiple perspectives.</small> </div> </div> </div> - </div> <!-- /.container --> + </main> <!-- /.container --> -<footer class="container text-center text-muted pb-3"> - <p>Content based on common AI Safety discourse and resources from organizations like CAIS, FLI, GovAI, MIRI, 80,000 Hours, and research labs.</p> - <p class="mb-2">Layout inspired by the "15 Commitments of Conscious Leadership" cheatsheet template. - <br> Cheatsheet compiled by AI Assistant. © 2025. Please verify information. - </p> +<footer class="container-fluid text-center text-muted pb-3"> + <p class="mb-1">Content based on common AI Safety discourse and resources from organizations like CAIS, FLI, GovAI, MIRI, 80,000 Hours, and research labs.</p> + <p class="mb-2">Cheatsheet Portfolio © <span id="currentYear">2024</span> David Veksler. All rights reserved.</p> <div> - <a href="https://www.linkedin.com/in/davidveksler/" title="David Veksler on LinkedIn" target="_blank" rel="noopener noreferrer" class="mx-2 link-secondary"> + <a href="https://www.linkedin.com/in/davidveksler/" title="David Veksler on LinkedIn" target="_blank" rel="noopener noreferrer" class="mx-2"> <i class="bi bi-linkedin"></i> LinkedIn </a> - <a href="https://cheatsheets.davidveksler.com/" title="Browse All Cheatsheets" class="mx-2 link-secondary"> - <i class="bi bi-collection"></i> All Cheatsheets + <a href="https://cheatsheets.davidveksler.com/" title="Browse All Cheatsheets" class="mx-2"> + <i class="bi bi-collection-fill"></i> All Cheatsheets </a> </div> </footer> @@ -392,22 +499,31 @@ <!-- === JAVASCRIPT === --> <!-- Bootstrap JS Bundle (needed for tooltips) --> <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js" integrity="sha384-YvpcrYf0tY3lHB60NNkmXc5s9fDVZLESaAA55NDzOxhy9GkcIdslK1eN7N6jIeHz" crossorigin="anonymous"></script> - <!-- Vanilla-Tilt.js library --> - <script type="text/javascript" src="https://cdn.jsdelivr.net/npm/[email protected]/dist/vanilla-tilt.min.js"></script> <!-- Initialization Script --> <script> - // Initialize Bootstrap Tooltips - var tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]')) - var tooltipList = tooltipTriggerList.map(function (tooltipTriggerEl) { - return new bootstrap.Tooltip(tooltipTriggerEl, { - html: true, - trigger: 'hover focus', // Use hover/focus; click can interfere on desktop - delay: { "show": 100, "hide": 150 } // Slightly longer delays - }) + document.addEventListener('DOMContentLoaded', function () { + // Extend Bootstrap's default allowList for tooltips to ensure our links work + const defaultAllowList = bootstrap.Tooltip.Default.allowList; + defaultAllowList.a.push('target', 'rel'); // Ensure target and rel are allowed on <a> tags + + // Initialize Bootstrap Tooltips + var tooltipTriggerList = Array.from(document.querySelectorAll('[data-bs-toggle="tooltip"]')); + var tooltipList = tooltipTriggerList.map(function (tooltipTriggerEl) { + return new bootstrap.Tooltip(tooltipTriggerEl, { + html: true, + trigger: 'hover focus', + delay: { "show": 200, "hide": 500 }, // Show quickly, hide slower to allow mouse travel + sanitize: true, // Use Bootstrap's built-in sanitizer + allowList: defaultAllowList // Apply the extended allowList + }); + }); + + // Update copyright year + const currentYearSpan = document.getElementById('currentYear'); + if (currentYearSpan) { + currentYearSpan.textContent = new Date().getFullYear(); + } }); - - // Vanilla Tilt initializes automatically via data-tilt attributes - // Or explicitly: VanillaTilt.init(document.querySelectorAll(".info-card")); </script> </body>