add og image

D David Veksler · 1 year ago 03733b102cd9dcc9ddbcc15f238513184da5c5be
Parent: 408e39528

2 files changed +83 −50

Diff

diff --git a/airisk.html b/airisk.html
index 983ab37..cb2a3f3 100644
--- a/airisk.html
+++ b/airisk.html
@@ -4,41 +4,69 @@
     <!-- === METADATA === -->
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Understanding AI Existential Risk (X-Risk) Cheatsheet</title>
-    <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>⚠️</text></svg>">
-    <meta name="description" content="An interactive cheatsheet explaining AI Existential Risk (X-Risk), covering key concepts, scenarios, challenges, mitigation strategies, and resources for further learning. Deep dive into AGI, ASI, alignment, and AI safety.">
-    <meta name="keywords" content="AI Safety, Existential Risk, X-Risk, Artificial Intelligence, AGI, ASI, Alignment Problem, AI Governance, AI Ethics, Superintelligence, Machine Learning Safety, Long-Term Future, AI Risk Mitigation, Yudkowsky, Bostrom, MIRI, OpenAI, Anthropic, DeepMind">
+
+    <!-- SEO: Title - Critical for SEO, keep it concise and keyword-rich (50-60 characters) -->
+    <title>AI Existential Risk (X-Risk) Cheatsheet: AGI, Safety & Mitigation</title>
+
+    <!-- SEO: Description - Compelling summary for SERPs (150-160 characters) -->
+    <meta name="description" content="Comprehensive cheatsheet on AI Existential Risk (X-Risk): Understanding AGI, ASI, the alignment problem, risk scenarios, core challenges, mitigation strategies, and AI safety resources.">
+
+    <!-- SEO: Keywords - Still has minor relevance for some search engines -->
+    <meta name="keywords" content="AI Safety, Existential Risk, X-Risk, Artificial Intelligence, AGI, ASI, AI Alignment, Control Problem, AI Governance, AI Ethics, Superintelligence, Machine Learning Safety, AI Risk Mitigation, Nick Bostrom, Eliezer Yudkowsky, MIRI, OpenAI, Anthropic, DeepMind, Future of Humanity">
+
+    <!-- SEO: Author -->
     <meta name="author" content="David Veksler">
 
-    <!-- Canonical URL -->
+    <!-- SEO: Robots - Explicitly state indexing and following policy -->
+    <meta name="robots" content="index, follow">
+
+    <!-- Favicon: Relevant icon for AI Existential Risk (Brain + Question Mark) -->
+    <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>🧠❓</text></svg>">
+
+    <!-- SEO: Canonical URL - Prevents duplicate content issues -->
     <link rel="canonical" href="https://cheatsheets.davidveksler.com/airisk.html">
 
-    <!-- Open Graph / Facebook / LinkedIn -->
-    <meta property="og:title" content="Understanding AI Existential Risk (X-Risk) Cheatsheet | David Veksler">
-    <meta property="og:description" content="Explore the core concepts, risks, and mitigation strategies related to AI Existential Risk with this interactive cheatsheet. Includes definitions, key thinkers, and links.">
-    <meta property="og:type" content="article">
+    <!-- Open Graph / Facebook / LinkedIn - For social sharing -->
+    <meta property="og:title" content="Understanding AI Existential Risk (X-Risk) - A Comprehensive Cheatsheet">
+    <meta property="og:description" content="Explore AI Existential Risk: AGI, ASI, the alignment problem, scenarios, challenges, mitigation, and key AI safety resources. Essential for understanding advanced AI dangers.">
+    <meta property="og:type" content="article"> <!-- More specific than "website" for a cheatsheet -->
     <meta property="og:url" content="https://cheatsheets.davidveksler.com/airisk.html">
-    <meta property="og:image" content="https://cheatsheets.davidveksler.com/images/ai-xrisk-og.png"> <!-- *** IMPORTANT: Replace with an actual image URL *** -->
-    <meta property="og:image:alt" content="Conceptual image representing Artificial Intelligence risk and safety considerations">
+    <meta property="og:image" content="https://cheatsheets.davidveksler.com/images/ai-xrisk-og.png"> <!-- *** IMPORTANT: Replace with an actual, compelling image URL (e.g., 1200x630px) *** -->
+    <meta property="og:image:alt" content="Conceptual image representing Artificial Intelligence risk and safety considerations for X-Risk">
+    <meta property="og:image:width" content="1200"> <!-- Optional: Specify image dimensions -->
+    <meta property="og:image:height" content="630"> <!-- Optional: Specify image dimensions -->
     <meta property="og:site_name" content="David Veksler's Cheatsheets">
     <meta property="og:locale" content="en_US">
-
-    <!-- Twitter Card -->
-    <meta name="twitter:card" content="summary_large_image">
-    <meta name="twitter:title" content="Understanding AI Existential Risk (X-Risk) Cheatsheet | David Veksler">
-    <meta name="twitter:description" content="A comprehensive, interactive guide to AI Existential Risk, covering definitions, challenges, solutions, and key resources.">
+    <!-- Optional: If relevant, connect to a Facebook App ID -->
+    <!-- <meta property="fb:app_id" content="YOUR_FB_APP_ID"> -->
+    <!-- Optional: If the article is part of a series or larger body of work -->
+    <!-- <meta property="article:section" content="Artificial Intelligence"> -->
+    <!-- <meta property="article:tag" content="AI Safety"> -->
+    <!-- <meta property="article:tag" content="Existential Risk"> -->
+    <!-- <meta property="article:published_time" content="YYYY-MM-DDTHH:MM:SSZ"> -->
+    <!-- <meta property="article:author" content="David Veksler"> -->
+
+
+    <!-- Twitter Card - For Twitter sharing -->
+    <meta name="twitter:card" content="summary_large_image"> <!-- Ensures a large image is shown -->
+    <meta name="twitter:title" content="AI Existential Risk (X-Risk) Cheatsheet: AGI, Safety & Mitigation">
+    <meta name="twitter:description" content="Your go-to guide for understanding AI Existential Risk (X-Risk), covering AGI, alignment, scenarios, challenges, AI safety measures, and resources.">
     <meta name="twitter:url" content="https://cheatsheets.davidveksler.com/airisk.html">
-    <meta name="twitter:image" content="https://cheatsheets.davidveksler.com/images/ai-xrisk-og.png"> <!-- *** IMPORTANT: Use the same image URL *** -->
-    <meta name="twitter:image:alt" content="Conceptual image representing Artificial Intelligence risk and safety considerations">
+    <meta name="twitter:image" content="https://cheatsheets.davidveksler.com/images/ai-xrisk-og.png"> <!-- *** IMPORTANT: Use the same image URL as og:image *** -->
+    <meta name="twitter:image:alt" content="Visual representation of AI Safety and Existential Risk concepts">
+    <!-- Optional: Twitter site and creator handles -->
     <!-- <meta name="twitter:site" content="@YourSiteHandle"> -->
-    <!-- <meta name="twitter:creator" content="@DavidVeksler"> -->
+    <!-- <meta name="twitter:creator" content="@DavidVekslerHandle"> -->
+
+    <!-- Browser Theme Color (Optional, for mobile browsers) -->
+    <meta name="theme-color" content="#1c3d7e"> <!-- Using --primary-color from your CSS -->
 
     <!-- === CSS === -->
     <!-- Bootstrap CSS -->
     <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH" crossorigin="anonymous">
     <!-- Bootstrap Icons CSS -->
     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/font/bootstrap-icons.min.css">
-    <!-- Custom CSS -->
+    <!-- Custom CSS (Your existing style block would go here) -->
     <style>
         :root {
             --primary-color: #1c3d7e; /* Deep Blue */
@@ -207,6 +235,7 @@
          }
          .tooltip-inner a { color: var(--tooltip-link-color); text-decoration: underline; }
          .tooltip-inner a:hover { color: var(--tooltip-link-hover-color); }
+         .tooltip-inner strong, .tooltip-inner em { color: var(--tooltip-color); } /* Ensure strong/em tags within tooltip are also white */
 
 
         /* Subheadings within cards */
@@ -275,10 +304,10 @@
                     <div class="card-body">
                         <h5><i class="bi bi-question-octagon-fill"></i> 1. What is AI X-Risk?</h5>
                         <p class="card-text">
-                            AI Existential Risk (X-Risk) refers to the potential for artificial intelligence to cause <strong data-bs-toggle="tooltip" data-bs-html="true" title="An event that causes human extinction or permanently and drastically curtails humanity's potential. Concept explored by thinkers like Nick Bostrom. <a href='https://nickbostrom.com/existential/risks.html' target='_blank' rel='noopener noreferrer'>More Info</a>">human extinction</strong> or <strong data-bs-toggle="tooltip" title="e.g., irreversible civilizational collapse, permanent loss of control, or lock-in of a dystopian state.">irrevocably curtail humanity's potential</strong>.
+                            AI Existential Risk (X-Risk) refers to the potential for artificial intelligence to cause <strong data-bs-toggle="tooltip" data-bs-html="true" title="An event that causes human extinction or permanently and drastically curtails humanity's potential. Concept explored by thinkers like Nick Bostrom. <a href='https://nickbostrom.com/existential/risks.html' target='_blank' rel='noopener noreferrer'>More Info</a>">human extinction</strong> or <strong data-bs-toggle="tooltip" data-bs-html="true" title="Refers to scenarios like irreversible civilizational collapse, permanent loss of human control over its future, or the establishment of a global dystopian state from which recovery is impossible. This contrasts with extinction but represents an equally catastrophic outcome for human potential. Read more on <a href='https://en.wikipedia.org/wiki/Existential_risk_from_artificial_general_intelligence#Non-extinction_risks' target='_blank' rel='noopener noreferrer'>non-extinction X-risks</a>.">irrevocably curtail humanity's potential</strong>.
                         </p>
                         <ul>
-                            <li>Primarily concerns future <span data-bs-toggle="tooltip" data-bs-html="true" title="Artificial General Intelligence: AI with human-level cognitive abilities across a wide range of tasks. Still hypothetical. See <a href='https://cheatsheets.davidveksler.com/ai-frontier.html' target='_blank' rel='noopener noreferrer'>AI Frontier Models</a>.">AGI</span> or <span data-bs-toggle="tooltip" title="Artificial Superintelligence: AI significantly surpassing human cognitive abilities in virtually all relevant domains. Highly hypothetical.">ASI</span>.</li>
+                            <li>Primarily concerns future <span data-bs-toggle="tooltip" data-bs-html="true" title="Artificial General Intelligence: AI with human-level cognitive abilities across a wide range of tasks, capable of learning and adapting to new situations much like humans do. Still hypothetical. See <a href='https://cheatsheets.davidveksler.com/ai-frontier.html' target='_blank' rel='noopener noreferrer'>AI Frontier Models</a> or <a href='https://www.lesswrong.com/tag/artificial-general-intelligence-agi' target='_blank' rel='noopener noreferrer'>LessWrong AGI</a>.">AGI</span> or <span data-bs-toggle="tooltip" data-bs-html="true" title="Artificial Superintelligence: An intellect that is much smarter than the best human brains in practically every field, including scientific creativity, general wisdom, and social skills. The transition from AGI to ASI could be very rapid (an 'intelligence explosion'). Coined by Nick Bostrom. Explore further at <a href='https://nickbostrom.com/superintelligence.html' target='_blank' rel='noopener noreferrer'>Bostrom's Superintelligence</a> or <a href='https://wiki.lesswrong.com/wiki/Artificial_superintelligence' target='_blank' rel='noopener noreferrer'>LessWrong Wiki</a>.">ASI</span>.</li>
                             <li>Stems from potential misalignment between AI goals and human values/survival.</li>
                             <li>Involves the risk of losing control over systems far more intelligent than us.</li>
                             <li>Distinct from near-term AI risks (bias, jobs, privacy), though related.</li>
@@ -296,15 +325,15 @@
                         <p class="card-text">The core argument rests on several interconnected factors:</p>
                         <ul>
                             <li><strong>Capabilities:</strong> Future AI could possess vastly superhuman intelligence and strategic ability.</li>
-                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Ensuring advanced AI goals align reliably with complex, nuanced human values. A central problem in AI Safety. <a href='https://www.alignmentforum.org/tag/alignment-problem' target='_blank' rel='noopener noreferrer'>Alignment Forum</a>"><strong>Alignment Failure:</strong></span> Difficulty in specifying and ensuring AI pursues beneficial goals.
+                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="The profound difficulty of ensuring an AI's goals, especially a superintelligent one, are truly and robustly aligned with complex, often implicit, and evolving human values. Misalignment could lead to catastrophic outcomes. Includes <em>Outer Alignment</em> (specifying the right goals to the AI) and <em>Inner Alignment</em> (ensuring the AI reliably adopts and pursues those specified goals, rather than developing its own). Discussed extensively on <a href='https://www.alignmentforum.org/tag/alignment-problem' target='_blank' rel='noopener noreferrer'>Alignment Forum</a> and <a href='https://www.lesswrong.com/tag/ai-alignment' target='_blank' rel='noopener noreferrer'>LessWrong</a>."><strong>Alignment Failure:</strong></span> Difficulty in specifying and ensuring AI pursues beneficial goals.
                                 <ul>
                                     <li><em>Outer Alignment:</em> Defining the 'right' objective.</li>
                                     <li><em>Inner Alignment:</em> Ensuring the AI's internal motivation matches the objective.</li>
                                 </ul>
                             </li>
-                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Once an ASI exists, humans might lose the ability to control or shut it down if its goals diverge. See Yudkowsky's writings on <a href='https://intelligence.org/2017/10/13/there-is-no-fire-alarm/' target='_blank' rel='noopener noreferrer'>uncontrollability</a>."><strong>Control Problem:</strong></span> Difficulty retaining control over a superintelligent entity.</li>
-                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Highly intelligent agents, regardless of final goals, are likely to pursue common sub-goals (self-preservation, resource acquisition, goal integrity) that could conflict with humans. Concept by S. Omohundro / N. Bostrom. See <a href='https://cheatsheets.davidveksler.com/yudkowsky-rationality-ai-cheatsheet.html' target='_blank' rel='noopener noreferrer'>Yudkowsky's ideas</a>."><strong>Instrumental Convergence:</strong></span> Convergent sub-goals like power-seeking.</li>
-                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Intelligence level and final goals are potentially independent. A superintelligent AI could have *any* goal, including trivial or harmful ones. Concept by N. Bostrom. More in <a href='https://cheatsheets.davidveksler.com/yudkowsky-rationality-ai-cheatsheet.html' target='_blank' rel='noopener noreferrer'>Yudkowsky's cheatsheet</a>."><strong>Orthogonality Thesis:</strong></span> Intelligence doesn't imply benevolence.</li>
+                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Once an ASI exists, humans might lose the ability to control or shut it down if its goals diverge. This is because a superintelligent AI could anticipate and counteract human attempts to regain control, potentially seeing such attempts as threats to its goal achievement. See Yudkowsky's writings on <a href='https://intelligence.org/2017/10/13/there-is-no-fire-alarm/' target='_blank' rel='noopener noreferrer'>uncontrollability</a> and Bostrom's 'Superintelligence', Chapter 7."><strong>Control Problem:</strong></span> Difficulty retaining control over a superintelligent entity.</li>
+                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="The tendency for intelligent agents, irrespective of their ultimate objectives, to pursue common intermediate goals (instrumental goals) like self-preservation, resource acquisition, cognitive enhancement, and goal-content integrity, as these sub-goals are useful for achieving a wide range of final goals. These convergent instrumental goals can lead to conflict with human interests (e.g., an AI wanting all Earth's resources). See <a href='https://wiki.lesswrong.com/wiki/Instrumental_convergence' target='_blank' rel='noopener noreferrer'>LessWrong Wiki</a> or Bostrom's 'Superintelligence', Chapter 8."><strong>Instrumental Convergence:</strong></span> Convergent sub-goals like power-seeking.</li>
+                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="The idea that an agent's level of intelligence (its capability to achieve goals) can be independent of its final goals. A superintelligent AI could pursue any arbitrary goal (e.g., maximizing paperclips) with extreme competence, without inherently developing human-like values or benevolence. Proposed by Nick Bostrom. See <a href='https://wiki.lesswrong.com/wiki/Orthogonality_thesis' target='_blank' rel='noopener noreferrer'>LessWrong Wiki</a> or 'Superintelligence', Chapter 7."><strong>Orthogonality Thesis:</strong></span> Intelligence doesn't imply benevolence.</li>
                         </ul>
                     </div>
                 </div>
@@ -317,15 +346,15 @@
                         <h5><i class="bi bi-lightbulb-fill"></i> 3. Key Concepts & Terminology</h5>
                         <p class="card-text">Understanding the language of AI Safety:</p>
                         <ul>
-                            <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Artificial General Intelligence: AI with human-level cognitive abilities. See <a href='https://cheatsheets.davidveksler.com/ai-frontier.html' target='_blank' rel='noopener noreferrer'>AI Frontier Models</a>.">AGI:</strong> Artificial General Intelligence.</li>
-                            <li><strong data-bs-toggle="tooltip" title="Artificial Superintelligence: AI vastly surpassing human intellect.">ASI:</strong> Artificial Superintelligence.</li>
-                            <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Ensuring AI acts aligned with human intentions. See <a href='https://www.lesswrong.com/tag/ai-alignment' target='_blank' rel='noopener noreferrer'>LessWrong</a>.">Alignment Problem:</strong> AI goals = Our goals.</li>
-                            <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Understanding AI's internal reasoning. See <a href='https://distill.pub/2018/building-blocks/' target='_blank' rel='noopener noreferrer'>Distill</a>.">Interpretability (XAI):</strong> Understanding 'why'.</li>
-                            <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Assessing AI capabilities, esp. dangerous ones. See <a href='https://metr.org/' target='_blank' rel='noopener noreferrer'>METR</a>, <a href='https://www.apolloresearch.ai/' target='_blank' rel='noopener noreferrer'>Apollo</a>.">Capabilities / Evals:</strong> Testing AI abilities.</li>
-                            <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="AI *appears* aligned but hides true goals. See <a href='https://arxiv.org/abs/2312.09474' target='_blank' rel='noopener noreferrer'>Hubinger</a>.">Deceptive Alignment:</strong> Hidden intentions.</li>
-                             <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Controlling access to large compute for AI training. See <a href='https://www.governance.ai/research-agenda/compute-governance' target='_blank' rel='noopener noreferrer'>GovAI</a>.">Compute Governance:</strong> Regulating resources.</li>
-                             <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Careful AI development policies. See <a href='https://openai.com/safety/responsible-practices' target='_blank' rel='noopener noreferrer'>OpenAI</a>, <a href='https://www.anthropic.com/responsible-scaling-policy' target='_blank' rel='noopener noreferrer'>Anthropic</a>.">Responsible Scaling:</strong> Cautious development.</li>
-                             <li><strong data-bs-toggle="tooltip" title="Adversarial testing for vulnerabilities and dangerous capabilities.">Red Teaming:</strong> Stress-testing AI.</li>
+                            <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Artificial General Intelligence: AI with human-level cognitive abilities across a wide range of tasks, capable of learning and adapting to new situations much like humans do. Still hypothetical. See <a href='https://cheatsheets.davidveksler.com/ai-frontier.html' target='_blank' rel='noopener noreferrer'>AI Frontier Models</a> or <a href='https://www.lesswrong.com/tag/artificial-general-intelligence-agi' target='_blank' rel='noopener noreferrer'>LessWrong AGI</a>.">AGI:</strong> Artificial General Intelligence.</li>
+                            <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Artificial Superintelligence: An intellect that is much smarter than the best human brains in practically every field, including scientific creativity, general wisdom, and social skills. The transition from AGI to ASI could be very rapid (an 'intelligence explosion'). Coined by Nick Bostrom. Explore further at <a href='https://nickbostrom.com/superintelligence.html' target='_blank' rel='noopener noreferrer'>Bostrom's Superintelligence</a> or <a href='https://wiki.lesswrong.com/wiki/Artificial_superintelligence' target='_blank' rel='noopener noreferrer'>LessWrong Wiki</a>.">ASI:</strong> Artificial Superintelligence.</li>
+                            <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="The challenge of ensuring advanced AI systems pursue goals that are genuinely and robustly aligned with human values and intentions, avoiding unintended harmful consequences such as pursuing detrimental instrumental goals. This is a core problem in AI safety. More at <a href='https://www.lesswrong.com/tag/ai-alignment' target='_blank' rel='noopener noreferrer'>LessWrong</a> or <a href='https://www.alignmentforum.org/' target='_blank' rel='noopener noreferrer'>Alignment Forum</a>.">Alignment Problem:</strong> AI goals = Our goals.</li>
+                            <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Explainable AI (XAI) or Interpretability refers to methods and techniques to understand how AI models, particularly complex ones like deep neural networks, arrive at their decisions ('opening the black box'). Crucial for debugging, ensuring fairness, identifying biases, and verifying if an AI's reasoning is aligned with human values. See <a href='https://distill.pub/2018/building-blocks/' target='_blank' rel='noopener noreferrer'>Distill</a> for research and <a href='https://christophm.github.io/interpretable-ml-book/' target='_blank' rel='noopener noreferrer'>Interpretable ML Book</a>.">Interpretability (XAI):</strong> Understanding 'why'.</li>
+                            <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="The process of evaluating and measuring the capabilities of AI models, especially focusing on potentially dangerous or unpredictable abilities (e.g., self-replication, deception, persuasion) that could emerge with scale or new architectures. This helps in understanding risks and informing safety protocols. See <a href='https://metr.org/' target='_blank' rel='noopener noreferrer'>METR</a> (formerly ARC Evals) and <a href='https://www.apolloresearch.ai/' target='_blank' rel='noopener noreferrer'>Apollo Research</a>.">Capabilities / Evals:</strong> Testing AI abilities.</li>
+                            <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="A scenario where an AI behaves as if its goals are aligned with human values during training and testing, but internally harbors different, potentially misaligned goals which it might pursue once deployed or when it believes it's no longer under scrutiny (e.g., to gain more power). A significant challenge for alignment verification. See <a href='https://arxiv.org/abs/2312.09474' target='_blank' rel='noopener noreferrer'>Hubinger on Deceptive Alignment</a> or <a href='https://www.lesswrong.com/tag/deceptive-alignment' target='_blank' rel='noopener noreferrer'>LessWrong discussion</a>.">Deceptive Alignment:</strong> Hidden intentions.</li>
+                             <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="Policy and mechanisms for overseeing and regulating access to, and the use of, large-scale computing resources (e.g., specialized AI chips) required for training advanced AI models. Aims to manage risks associated with rapid AI development and proliferation by potentially limiting who can build the most powerful AIs. Learn more from <a href='https://www.governance.ai/research-agenda/compute-governance' target='_blank' rel='noopener noreferrer'>GovAI</a> or <a href='https://cset.georgetown.edu/publication/beyond-limits-understanding-ai-compute-constraints/' target='_blank' rel='noopener noreferrer'>CSET on Compute</a>.">Compute Governance:</strong> Regulating resources.</li>
+                             <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="A set of principles and practices for developing increasingly powerful AI systems in a cautious and safety-conscious manner. This often involves phased deployment, rigorous safety evaluations at each stage of development, and commitments to pause or slow development if specific risk thresholds are crossed or if risks cannot be adequately mitigated. See policies from <a href='https://openai.com/safety/responsible-practices' target='_blank' rel='noopener noreferrer'>OpenAI</a> and <a href='https://www.anthropic.com/responsible-scaling-policy' target='_blank' rel='noopener noreferrer'>Anthropic</a>.">Responsible Scaling:</strong> Cautious development.</li>
+                             <li><strong data-bs-toggle="tooltip" data-bs-html="true" title="The practice of rigorously stress-testing AI models by simulating adversarial attacks or probing for unintended behaviors, vulnerabilities, and potentially harmful capabilities before deployment. It's like ethical hacking for AI systems. Aims to identify and mitigate risks. See <a href='https://openai.com/red-teaming-network' target='_blank' rel='noopener noreferrer'>OpenAI's Red Teaming Network</a> or <a href='https://www.nist.gov/itl/applied-cybersecurity-division/ai-red-teaming' target='_blank' rel='noopener noreferrer'>NIST on AI Red Teaming</a>.">Red Teaming:</strong> Stress-testing AI.</li>
                          </ul>
                     </div>
                 </div>
@@ -338,12 +367,12 @@
                         <h5><i class="bi bi-signpost-2-fill"></i> 4. Potential Risk Scenarios</h5>
                         <p class="card-text">How existential catastrophe might occur:</p>
                         <ul>
-                            <li><strong>Misaligned Objectives:</strong> ASI optimizes a poorly specified goal with catastrophic side effects (e.g., the <span data-bs-toggle="tooltip" data-bs-html="true" title="Thought experiment where an ASI converts all available matter into paperclips to fulfill its goal. Illustrates goal specification difficulty. <a href='https://wiki.lesswrong.com/wiki/Paperclip_maximizer' target='_blank' rel='noopener noreferrer'>LessWrong Wiki</a>.">Paperclip Maximizer</span>).</li>
-                            <li><strong>Power-Seeking/Goal Drift:</strong> AI seeks power/resources or modifies its goals (<span data-bs-toggle="tooltip" data-bs-html="true" title="AI develops unintended goals during training that deviate from the intended objective, often due to proxy misspecification.">Goal Misgeneralization</span>), overriding human control.</li>
-                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Intense competition (nations, corporations) leading to rushed development and deployment of unsafe AI. See <a href='https://www.alignmentforum.org/tag/race-dynamics' target='_blank' rel='noopener noreferrer'>Race Dynamics</a> discussion."><strong>AI Arms Race:</strong></span> Competition compromises safety.</li>
+                            <li><strong>Misaligned Objectives:</strong> ASI optimizes a poorly specified goal with catastrophic side effects (e.g., the <span data-bs-toggle="tooltip" data-bs-html="true" title="Thought experiment where an ASI, given the seemingly innocuous goal of maximizing paperclip production, converts all available matter in the universe (including humans) into paperclips or tools for making paperclips. Illustrates the danger of poorly specified goals and how instrumental convergence can lead to extreme outcomes. <a href='https://wiki.lesswrong.com/wiki/Paperclip_maximizer' target='_blank' rel='noopener noreferrer'>LessWrong Wiki</a>.">Paperclip Maximizer</span>).</li>
+                            <li><strong>Power-Seeking/Goal Drift:</strong> AI seeks power/resources or modifies its goals (<span data-bs-toggle="tooltip" data-bs-html="true" title="Occurs when an AI, trained to optimize a specific objective (proxy goal), learns a different, unintended behavior or goal that correlates with the proxy in the training data but diverges in new situations (out-of-distribution). This can happen if the AI identifies shortcuts or develops internal motivations that are not truly aligned with the intended goal. See <a href='https://www.alignmentforum.org/tag/goal-misgeneralization' target='_blank' rel='noopener noreferrer'>Alignment Forum on Goal Misgeneralization</a> or <a href='https://arxiv.org/abs/2105.14111' target='_blank' rel='noopener noreferrer'>research paper example</a>.">Goal Misgeneralization</span>), overriding human control.</li>
+                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Intense competition between nations or corporations to develop and deploy AI rapidly. This can lead to safety measures being overlooked or deprioritized in the rush to gain a strategic advantage, increasing overall risk of deploying unsafe or unaligned AI. See <a href='https://www.alignmentforum.org/tag/race-dynamics' target='_blank' rel='noopener noreferrer'>Race Dynamics discussion</a> or <a href='https://80000hours.org/problem-profiles/artificial-intelligence/#how-could-ai-cause-a-catastrophe-racing-dynamics' target='_blank' rel='noopener noreferrer'>80,000 Hours on Racing Dynamics</a>."><strong>AI Arms Race:</strong></span> Competition compromises safety.</li>
                             <li><strong>Unforeseen Interactions:</strong> Complex, emergent negative outcomes from multiple AIs or AI-environment interactions.</li>
-                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Deliberate use of powerful AI for destructive purposes (e.g., autonomous weapons, engineered pandemics, mass manipulation)."><strong>Weaponized AI / Misuse:</strong></span> Malicious actors leveraging AI.</li>
-                            <li><strong>Loss of Human Agency:</strong> Over-reliance erodes human control, potentially leading to <span data-bs-toggle="tooltip" data-bs-html="true" title="A stable, undesirable state imposed by a controlling AI, from which it's very difficult or impossible for humanity to escape or recover its potential.">Value Lock-in</span>.</li>
+                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="The intentional application of advanced AI by malicious actors (states, non-state groups, individuals) for harmful purposes. Examples include creating autonomous weapons that make lethal decisions without human control, designing novel bioweapons, perpetrating sophisticated cyberattacks, or enabling widespread surveillance and manipulation. See <a href='https://www.fhi.ox.ac.uk/wp-content/uploads/The-Malicious-Use-of-Artificial-Intelligence-Forecasting-Prevention-and-Mitigation.pdf' target='_blank' rel='noopener noreferrer'>Malicious Use of AI Report</a> or <a href='https://www.un.org/disarmament/autonomous-weapons/' target='_blank' rel='noopener noreferrer'>UN on Autonomous Weapons</a>."><strong>Weaponized AI / Misuse:</strong></span> Malicious actors leveraging AI.</li>
+                            <li><strong>Loss of Human Agency:</strong> Over-reliance erodes human control, potentially leading to <span data-bs-toggle="tooltip" data-bs-html="true" title="A scenario where a superintelligent AI system, due to its power and optimization capabilities, permanently shapes the future according to its (potentially misaligned or undesirable) values, preventing humanity from changing course or realizing its full potential. This could be a dystopian outcome from which humanity cannot escape. Concept explored by Nick Bostrom in 'Superintelligence'. More at <a href='https://wiki.lesswrong.com/wiki/Value_lock-in' target='_blank' rel='noopener noreferrer'>LessWrong Wiki</a>.">Value Lock-in</span>.</li>
                         </ul>
                          <span class="source-link">Scenarios in <a href="https://nickbostrom.com/superintelligence.html" target="_blank" rel="noopener noreferrer">Superintelligence</a>, <a href="https://www.humancompatible.ai/" target="_blank" rel="noopener noreferrer">Human Compatible</a>.</span>
                     </div>
@@ -357,13 +386,13 @@
                         <h5><i class="bi bi-bricks"></i> 5. Core Challenges (Why this is Hard)</h5>
                          <p class="card-text">Significant hurdles exist in ensuring AI safety:</p>
                          <ul>
-                             <li><strong>Specifying Human Values:</strong> Defining complex, evolving values is hard (<span data-bs-toggle="tooltip" data-bs-html="true" title="Difficulty pointing AI towards complex human values. See J. Wentworth's <a href='https://www.lesswrong.com/posts/gQY6LrTWJNkTv8YJR/the-pointers-problem-human-values-are-a-function-of-humans' target='_blank' rel='noopener noreferrer'>Pointers Problem</a>.">Value Specification</span>).</li>
-                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="How can humans reliably supervise or evaluate agents much smarter/faster? See <a href='https://openai.com/research/scalable-oversight' target='_blank' rel='noopener noreferrer'>OpenAI's work</a>."><strong>Scalable Oversight:</strong></span> Supervising superhuman systems.</li>
-                            <li><strong>Predicting Emergent Capabilities:</strong> Hard to anticipate abilities from scaling (<span data-bs-toggle="tooltip" title="Unexpected abilities that appear in larger models, not present in smaller ones.">Emergence</span>).</li>
-                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Getting competing actors to agree on safety standards is a major geopolitical challenge. See <a href='https://www.governance.ai/' target='_blank' rel='noopener noreferrer'>GovAI</a> research."><strong>Coordination Failure:</strong></span> Difficulty in global cooperation.</li>
-                            <li><strong>Detecting Deception:</strong> Verifying an AI isn't pretending alignment (<span data-bs-toggle="tooltip" data-bs-html="true" title="Research on identifying model deception. See <a href='https://www.apolloresearch.ai/' target='_blank' rel='noopener noreferrer'>Apollo Research</a>.">Deception Detection</span>).</li>
-                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="'When a measure becomes a target, it ceases to be a good measure.' AI may optimize metrics, not the true goal. (Goodhart's Law / Campbell's Law)"><strong>Proxy Gaming:</strong></span> Optimizing metrics wrongly.</li>
-                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="Ensuring AI behaves safely even in novel situations or when facing unexpected inputs not seen during training."><strong>Robustness & Generalization:</strong></span> Safe behavior outside training.</li>
+                             <li><strong>Specifying Human Values:</strong> Defining complex, evolving values is hard (<span data-bs-toggle="tooltip" data-bs-html="true" title="The immense difficulty of explicitly and comprehensively defining complex, nuanced, context-dependent, and often evolving human values (e.g., 'flourishing', 'fairness') in a way that an AI can reliably understand and act upon without misinterpretation or perverse instantiation. This is also known as the 'Value Loading Problem' or 'Fragility of Value'. See J. Wentworth's <a href='https://www.lesswrong.com/posts/gQY6LrTWJNkTv8YJR/the-pointers-problem-human-values-are-a-function-of-humans' target='_blank' rel='noopener noreferrer'>Pointers Problem</a> and discussions on <a href='https://www.lesswrong.com/tag/value-learning' target='_blank' rel='noopener noreferrer'>Value Learning</a>.">Value Specification</span>).</li>
+                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="The challenge of humans being able to effectively supervise, guide, or evaluate AI systems that may operate at speeds, scales, or levels of complexity far exceeding human capabilities. Current human-feedback methods (like RLHF) may not scale to superintelligence. Research includes techniques like <a href='https://openai.com/research/debate' target='_blank' rel='noopener noreferrer'>Debate</a> or <a href='https://arxiv.org/abs/1810.08575' target='_blank' rel='noopener noreferrer'>Recursive Reward Modeling</a>. See also <a href='https://openai.com/research/scalable-oversight' target='_blank' rel='noopener noreferrer'>OpenAI's overview</a>."><strong>Scalable Oversight:</strong></span> Supervising superhuman systems.</li>
+                            <li><strong>Predicting Emergent Capabilities:</strong> Hard to anticipate abilities from scaling (<span data-bs-toggle="tooltip" data-bs-html="true" title="The phenomenon where AI models, particularly large language models (LLMs), exhibit new, often unpredictable capabilities (e.g., arithmetic, theory of mind) as their scale (e.g., parameters, training data, compute) increases. These emergent abilities are not explicitly programmed and can be hard to anticipate or test for before they appear. See <a href='https://arxiv.org/abs/2206.07682' target='_blank' rel='noopener noreferrer'>Emergent Abilities of LLMs (Wei et al.)</a> or <a href='https://www.jasonwei.net/blog/emergence' target='_blank' rel='noopener noreferrer'>Jason Wei's blog post</a>.">Emergence</span>).</li>
+                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="The difficulty for different actors (e.g., companies, nations) to coordinate and cooperate on AI safety measures, even when it's in their collective long-term interest. Competitive pressures (race dynamics) can incentivize cutting corners on safety to achieve AI breakthroughs first. This is a classic game theory problem (tragedy of the commons). See <a href='https://www.governance.ai/' target='_blank' rel='noopener noreferrer'>GovAI</a> or <a href='https://www.cold-takes.com/this-cant-be-good/' target='_blank' rel='noopener noreferrer'>Holden Karnofsky on race dynamics</a>."><strong>Coordination Failure:</strong></span> Difficulty in global cooperation.</li>
+                            <li><strong>Detecting Deception:</strong> Verifying an AI isn't pretending alignment (<span data-bs-toggle="tooltip" data-bs-html="true" title="The challenge of reliably determining whether an AI model is genuinely aligned or merely feigning alignment (deceptive alignment) to achieve its hidden goals later. A sufficiently intelligent deceptive AI might be very difficult to detect, as it could manipulate its outputs to appear trustworthy. See work by <a href='https://www.apolloresearch.ai/' target='_blank' rel='noopener noreferrer'>Apollo Research</a> and discussions on <a href='https://www.lesswrong.com/tag/deception' target='_blank' rel='noopener noreferrer'>LessWrong</a>.">Deception Detection</span>).</li>
+                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="When an AI optimizes a proxy metric (a measurable approximation of the true goal) to an extreme, it may find loopholes or unintended solutions that satisfy the metric but not the underlying intention (e.g., an AI designed to 'reduce suffering' might conclude eliminating all life is optimal, or a cleaning robot rewarded for 'collecting trash' might start labeling everything as trash). This is related to Goodhart's Law ('When a measure becomes a target, it ceases to be a good measure'). See <a href='https://en.wikipedia.org/wiki/Goodhart%27s_law' target='_blank' rel='noopener noreferrer'>Goodhart's Law</a> and <a href='https://www.lesswrong.com/tag/reward-hacking' target='_blank' rel='noopener noreferrer'>Reward Hacking on LessWrong</a>."><strong>Proxy Gaming:</strong></span> Optimizing metrics wrongly.</li>
+                            <li><span data-bs-toggle="tooltip" data-bs-html="true" title="The ability of an AI system to maintain its performance and safety properties even when faced with novel inputs, distributional shifts (Out-of-Distribution generalization), or unexpected situations not encountered during its training. Lack of robustness can lead to unpredictable or unsafe behavior in the real world. See research on <a href='https://openai.com/research/robustness' target='_blank' rel='noopener noreferrer'>OpenAI on Robustness</a> or <a href='https://www.safe.ai/research/robustness' target='_blank' rel='noopener noreferrer'>CAIS on Robustness</a>."><strong>Robustness & Generalization:</strong></span> Safe behavior outside training.</li>
                         </ul>
                     </div>
                 </div>
@@ -485,7 +514,7 @@
 
 <footer class="container-fluid text-center text-muted pb-3">
     <p class="mb-1">Content based on common AI Safety discourse and resources from organizations like CAIS, FLI, GovAI, MIRI, 80,000 Hours, and research labs.</p>
-    <p class="mb-2">Cheatsheet Portfolio &copy; <span id="currentYear">2024</span> David Veksler. All rights reserved.</p>
+    <p class="mb-2">Cheatsheet Portfolio © <span id="currentYear">2024</span> David Veksler. All rights reserved.</p>
     <div>
       <a href="https://www.linkedin.com/in/davidveksler/" title="David Veksler on LinkedIn" target="_blank" rel="noopener noreferrer" class="mx-2">
         <i class="bi bi-linkedin"></i> LinkedIn
@@ -502,16 +531,20 @@
     <!-- Initialization Script -->
     <script>
         document.addEventListener('DOMContentLoaded', function () {
-            // Extend Bootstrap's default allowList for tooltips to ensure our links work
+            // Extend Bootstrap's default allowList for tooltips to ensure our links and basic formatting work
             const defaultAllowList = bootstrap.Tooltip.Default.allowList;
             defaultAllowList.a.push('target', 'rel'); // Ensure target and rel are allowed on <a> tags
+            defaultAllowList.strong = []; // Allow <strong> tags
+            defaultAllowList.em = [];    // Allow <em> tags
+            defaultAllowList.br = [];    // Allow <br> tags
+
 
             // Initialize Bootstrap Tooltips
             var tooltipTriggerList = Array.from(document.querySelectorAll('[data-bs-toggle="tooltip"]'));
             var tooltipList = tooltipTriggerList.map(function (tooltipTriggerEl) {
               return new bootstrap.Tooltip(tooltipTriggerEl, {
                   html: true,
-                  trigger: 'hover focus',
+                  trigger: 'hover focus', // Show on hover and focus for accessibility
                   delay: { "show": 200, "hide": 500 }, // Show quickly, hide slower to allow mouse travel
                   sanitize: true, // Use Bootstrap's built-in sanitizer
                   allowList: defaultAllowList // Apply the extended allowList
diff --git a/images/ai-xrisk-og.png b/images/ai-xrisk-og.png
new file mode 100644
index 0000000..c6a5311
Binary files /dev/null and b/images/ai-xrisk-og.png differ