Update compression-algorithms.html
· 1 year ago
c11fa73678aa3614fa788e98ecf437d517a2425b
Parent:
11f5bb2e5
1 file changed +1495 −905
- compression-algorithms.html +1495 −905
Diff
--- a/compression-algorithms.html +++ b/compression-algorithms.html @@ -3,965 +3,1555 @@ <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <title>Data Compression Algorithms Cheatsheet</title> + <title>Data Compression Cheatsheet: Algorithms & Techniques</title> + + <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>🤏</text></svg>"> + + <!-- SEO Meta Description --> + <meta name="description" content="A comprehensive and interactive cheatsheet for Data Compression algorithms. Explore lossless (Huffman, LZW, Zstd) and lossy (JPEG, MP3, H.264) techniques, theory, practical tips, and evaluation metrics."> + <!-- Keywords --> + <meta name="keywords" content="Data Compression, Cheatsheet, Lossless Compression, Lossy Compression, Huffman Coding, LZW, Zstd, Deflate, Brotli, JPEG, MP3, H.264, AV1, Compression Algorithms, Information Theory, Compression Ratio, Entropy"> + + <!-- Canonical URL --> + <link rel="canonical" href="https://cheatsheets.davidveksler.com/compression-algorithms.html"> <!-- REPLACE with actual URL --> + + <!-- Social Media Metadata (Open Graph) --> + <meta property="og:title" content="Interactive Data Compression Cheatsheet: Algorithms & Techniques"> + <meta property="og:description" content="Explore lossless (Huffman, LZW, Zstd) and lossy (JPEG, MP3, H.264) compression techniques, theory, practical tips, and evaluation metrics with this interactive guide."> + <meta property="og:type" content="article"> + <meta property="og:url" content="https://cheatsheets.davidveksler.com/compression-algorithms.html"> <!-- REPLACE --> + <!-- <meta property="og:image" content="https://yourdomain.com/images/compression-cheatsheet-og.png"> REPLACE with an image URL --> + <meta property="og:image:alt" content="A visual summary of data compression concepts and algorithms."> + <meta property="og:site_name" content="Your Site Name"> <!-- REPLACE --> + + <!-- Twitter Card Metadata --> + <meta name="twitter:card" content="summary_large_image"> + <meta name="twitter:title" content="Interactive Data Compression Cheatsheet: Algorithms & Techniques"> + <meta name="twitter:description" content="Explore lossless (Huffman, LZW, Zstd) and lossy (JPEG, MP3, H.264) compression techniques, theory, practical tips, and evaluation metrics."> + <!-- <meta name="twitter:image" content="https://yourdomain.com/images/compression-cheatsheet-twitter.png"> --> + <meta name="twitter:creator" content="@heroiclife"> + + <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet"> + <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/font/bootstrap-icons.min.css"> + <style> + :root { + --bs-primary: #0D6EFD; /* Bootstrap Blue */ + --bs-primary-dark: #0A58CA; + --bs-primary-light: #E6F2FF; + --bs-body-bg: #f0f2f5; + --card-border-color: #cfd8dc; + --card-shadow-color: rgba(0, 120, 212, 0.1); + --text-color-main: #212529; + --text-color-secondary: #495057; + --text-color-highlight: var(--bs-primary-dark); + --blueprint-grid-color: rgba(0, 120, 212, 0.07); + + /* Category Colors */ + --color-foundations: #20c997; /* Teal */ + --color-lossless: #0078d4; /* Energetic Blue */ + --color-lossy: #fd7e14; /* Vibrant Orange */ + --color-practical: #198754; /* Green */ + --color-techniques: #6f42c1; /* Purple */ + --color-standards: #d63384; /* Pink */ + --color-table: #6c757d; /* Gray */ + --color-apple: #A2AAAD; /* Apple's Silver/Gray for specific tags or subtle highlights */ + + + --category-color: var(--bs-primary); /* Default */ + } + body { - font-family: Arial, sans-serif; - line-height: 1.6; - margin: 0; - padding: 0; - background-color: #f4f4f4; - color: #333; + background-color: var(--bs-body-bg); + background-image: linear-gradient(to right, var(--blueprint-grid-color) 1px, transparent 1px), + linear-gradient(to bottom, var(--blueprint-grid-color) 1px, transparent 1px); + background-size: 70px 70px; + font-family: "Segoe UI", "Roboto", Helvetica, Arial, sans-serif; + padding-bottom: 3rem; + font-size: 16px; + color: var(--text-color-main); } - header { - background: #333; - color: #fff; - padding: 1rem 0; + + .page-header { + background: linear-gradient(135deg, var(--bs-primary-light), #e9ecef); + padding: 3rem 1.5rem; text-align: center; + border-bottom: 4px solid var(--bs-primary); + margin-bottom: 2rem; + box-shadow: 0 6px 20px rgba(0, 120, 212, 0.15); } - header h1 { - margin-bottom: 0.2rem; + .page-header h1 { + color: var(--bs-primary-dark); + font-weight: 700; + margin-bottom: 0.75rem; + font-size: 2.6rem; } - nav#main-nav { - background: #444; - color: #fff; - padding: 0.5rem; - text-align: center; + .page-header h1 .bi { + font-size: 1.05em; + vertical-align: -0.1em; + margin-right: 0.3em; + color: var(--bs-primary); } - nav#main-nav ul { - list-style-type: none; - padding: 0; - margin: 0; + .page-header .lead { + color: var(--text-color-secondary); + font-size: 1.25rem; + max-width: 850px; + margin: auto; + font-weight: 300; } - nav#main-nav ul li { - display: inline; - margin-right: 15px; + + #filter-controls { + background-color: rgba(255, 255, 255, 0.9); + backdrop-filter: blur(8px); + padding: 1rem; + border-radius: 8px; + box-shadow: 0 4px 12px rgba(0,0,0,0.08); + margin-bottom: 2.5rem; + position: sticky; + top: 10px; + z-index: 1020; /* Bootstrap's default z-index for sticky things */ } - nav#main-nav a { - color: #fff; - text-decoration: none; - font-weight: bold; + #category-filters .btn { margin: 0.25rem; } + #category-filters .btn.active { font-weight: bold; } + #search-box:focus { + border-color: var(--bs-primary); + box-shadow: 0 0 0 0.25rem rgba(var(--bs-primary-rgb), 0.25); } - nav#main-nav a:hover { - text-decoration: underline; + + .schema-container { + background-color: rgba(255, 255, 255, 0.95); + border: 1px solid var(--card-border-color); + border-radius: 10px; + padding: 2rem 2rem 0.5rem 2rem; + margin-bottom: 3rem; + box-shadow: 0 8px 20px var(--card-shadow-color); } - main { - max-width: 1200px; - margin: 20px auto; - padding: 20px; + .schema-container[data-category="foundations"] { --category-color: var(--color-foundations); border-left: 5px solid var(--color-foundations); } + .schema-container[data-category="lossless"] { --category-color: var(--color-lossless); border-left: 5px solid var(--color-lossless); } + .schema-container[data-category="lossy"] { --category-color: var(--color-lossy); border-left: 5px solid var(--color-lossy); } + .schema-container[data-category="practical"] { --category-color: var(--color-practical); border-left: 5px solid var(--color-practical); } + .schema-container[data-category="standards"] { --category-color: var(--color-standards); border-left: 5px solid var(--color-standards); } + .schema-container[data-category="table"] { --category-color: var(--color-table); border-left: 5px solid var(--color-table); } + + + .section-title { + color: #fff; + background-color: var(--category-color); /* Uses category color from parent */ + margin: -3.5rem 0 2rem 0; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.1em; + font-size: 1.2rem; + padding: 0.6rem 1.3rem; + display: inline-block; + position: relative; + left: -0.5rem; + border-radius: 8px 8px 0 0; + box-shadow: 0 -3px 8px rgba(0,0,0,0.06); + } + + .info-card { background: #fff; + border: 1px solid var(--card-border-color); border-radius: 8px; - box-shadow: 0 0 10px rgba(0,0,0,0.1); + box-shadow: 0 5px 12px var(--card-shadow-color); + height: 100%; + display: flex; + flex-direction: column; + transition: box-shadow 0.3s ease, transform 0.3s ease; } - article, section { - margin-bottom: 25px; - padding-bottom: 15px; - border-bottom: 1px solid #eee; + .info-card:hover { + box-shadow: 0 8px 22px rgba(var(--bs-primary-rgb), 0.25); + transform: translateY(-4px); } - article:last-child, section:last-child { - border-bottom: none; - } - h2 { - color: #333; - border-bottom: 2px solid #4CAF50; - padding-bottom: 5px; - } - h3 { - color: #444; + + .info-card .card-body { padding: 0; flex-grow: 1; display: flex; flex-direction: column; } + .info-card h5 { /* Card Header */ + color: #fff; + background-color: var(--category-color); /* Uses card's category color */ + font-size: 1.2rem; + text-align: left; + margin: 0; + padding: 1rem 1.2rem; + font-weight: 600; + display: flex; + align-items: center; + gap: 0.7rem; + border-bottom: 1px solid var(--card-border-color); + border-radius: 7px 7px 0 0; } - h4 { - color: #555; - margin-top: 1.5em; + .info-card h5 .bi { font-size: 1.4em; opacity: 0.9; } + .context-tag { + font-size: 0.7rem; + font-weight: 500; + padding: 0.2em 0.5em; + border-radius: 4px; + background-color: rgba(255,255,255,0.2); + color: #fff; + margin-left: auto; /* Pushes to the right */ + white-space: nowrap; } - table { - width: 100%; - border-collapse: collapse; - margin-bottom: 15px; + .context-tag.apple { background-color: var(--color-apple); color: #333; } + + + .card-content-wrapper { padding: 1.5rem; flex-grow: 1; display: flex; flex-direction: column; } + .info-card p.summary { font-size: 0.95rem; color: var(--text-color-secondary); margin-bottom: 1.2rem; flex-grow: 1; line-height: 1.7; } + + .collapse-content { + font-size: 0.92rem; + border-top: 1px solid #e0e0e0; + padding: 1.5rem; + margin-top: 1rem; + color: var(--text-color-main); + background-color: #fafdff; + border-radius: 0 0 7px 7px; } - th, td { - border: 1px solid #ddd; - padding: 8px; - text-align: left; + .collapse-content h6 { + font-weight: 600; + color: var(--text-color-highlight); + margin-top: 1.2rem; + margin-bottom: 0.5rem; + font-size: 1.05rem; } - th { - background-color: #f2f2f2; - font-weight: bold; + .collapse-content ul { padding-left: 0.5rem; margin-bottom: 1.2rem; list-style: none; } + .collapse-content li { + margin-bottom: 0.8rem; + padding-bottom: 0.8rem; + font-size: 0.9rem; + line-height: 1.6; + border-bottom: 1px dashed #d0d9e0; + position: relative; + padding-left: 2rem; } - ul, ol { - margin-left: 20px; + .collapse-content li:last-child { border-bottom: none; margin-bottom: 0; } + .collapse-content li::before { + content: "\F282"; /* Default: check2. Can customize. */ + font-family: "bootstrap-icons"; + position: absolute; + left: 0; + top: 4px; + color: var(--category-color); /* Use category color from card */ + opacity: 0.9; + font-size: 1.2em; } - code { - background-color: #eef; - padding: 2px 4px; + .collapse-content li strong { color: var(--text-color-highlight); display: block; margin-bottom: 0.3rem; font-weight: 600; } + .collapse-content p { font-size: 0.92rem; margin-bottom: 0.8rem; line-height: 1.65; } + .collapse-content code { + font-size: 0.88rem; + color: var(--bs-primary-dark); + background-color: var(--bs-primary-light); + padding: 0.2em 0.5em; border-radius: 4px; - font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace; + font-family: Consolas, Menlo, Monaco, "Courier New", monospace; } - details { - background-color: #f9f9f9; - border: 1px solid #ddd; + .term { + font-weight: 600; + color: var(--bs-primary-dark); + background-color: var(--bs-primary-light); + padding: 0.15em 0.4em; border-radius: 4px; - margin-bottom: 10px; - padding: 10px; + cursor: help; } - summary { - font-weight: bold; - cursor: pointer; - color: #007bff; + + + .details-toggle { + font-size: 0.9rem; + margin-top: auto; + align-self: flex-start; + padding: 0.4rem 0.9rem; + color: var(--category-color); /* Use card's category color */ + border: 2px solid var(--category-color); + background-color: transparent; + transition: background-color 0.25s ease, color 0.25s ease, transform 0.15s ease; + display: inline-flex; + align-items: center; + gap: 0.5em; + border-radius: 5px; + font-weight: 500; } - summary:hover { - text-decoration: underline; + .details-toggle:hover, .details-toggle:focus { + background-color: var(--category-color); + color: white; + transform: scale(1.02); + outline: none; } + .details-toggle .bi { transition: transform 0.25s ease-in-out; } + .details-toggle[aria-expanded="true"] .bi-chevron-down { transform: rotate(180deg); } + footer { + padding: 2.5rem 0; + font-size: 0.9rem; + color: #525d66; text-align: center; - padding: 20px; - background: #333; - color: #fff; - margin-top: 30px; - } - footer a { - color: #4CAF50; + border-top: 1px solid #ddd; + margin-top: 2rem; } + footer a { color: var(--bs-primary); font-weight: 500; } + footer a:hover { color: var(--bs-primary-dark); text-decoration: underline; } + + .table-responsive { margin-top: 1.5rem; } + .table th { background-color: var(--color-table); color: white; } + .table-striped > tbody > tr:nth-of-type(odd) > * { --bs-table-accent-bg: var(--bs-primary-light); } + .placeholder-diagram { display: block; text-align: center; padding: 20px; border: 1px dashed #ccc; background-color: #fafafa; - margin: 10px 0; + margin: 15px 0; font-style: italic; color: #777; + border-radius: 6px; } + + /* Utility for hiding elements */ + .hidden { display: none !important; } + </style> </head> <body> - <header> - <h1>Data Compression Algorithms Cheatsheet</h1> - <p>Your Quick Guide to Understanding and Choosing Compression Algorithms</p> + <header class="page-header"> + <h1><i class="bi bi-file-earmark-zip-fill"></i> Data Compression Cheatsheet</h1> + <p class="lead"> + An interactive guide to understanding compression algorithms, from foundational theory to practical application. + </p> </header> - <nav id="main-nav"> - <ul> - <li><a href="#quick-ref">Quick Reference</a></li> - <li><a href="#theory">I. Foundational Theory</a></li> - <li><a href="#lossless-algorithms">II. Lossless Algorithms</a></li> - <li><a href="#lossy-algorithms">III. Lossy Algorithms</a></li> - <li><a href="#practical-considerations">IV. Practical Considerations</a></li> - <li><a href="#standards-bodies">V. Standards Bodies</a></li> - </ul> - </nav> - - <main> - <section id="quick-ref"> - <h2>Quick Reference Table: Common Compression Algorithms</h2> - <table> - <thead> - <tr> - <th>Algorithm</th> - <th>Type</th> - <th>Primary Use Case</th> - <th>Typical Ratio</th> - <th>Speed (C/D)</th> - <th>Key Characteristic</th> - <th>Common Extensions</th> - </tr> - </thead> - <tbody> - <tr> - <td><strong>RLE</strong></td> - <td>Lossless</td> - <td>Simple graphics, faxes</td> - <td>Low-Med</td> - <td>Fast/Fast</td> - <td>Simple, good for repeated data</td> - <td>(used within BMP, TIFF)</td> - </tr> - <tr> - <td><strong>Huffman</strong></td> - <td>Lossless</td> - <td>Text, component in others</td> - <td>Med</td> - <td>Med/Med</td> - <td>Optimal per-symbol coding</td> - <td>(used in JPEG, Deflate)</td> - </tr> - <tr> - <td><strong>LZ77/LZ78</strong></td> - <td>Lossless</td> - <td>General purpose, text</td> - <td>Med-High</td> - <td>Med/Med</td> - <td>Dictionary-based, adaptive</td> - <td>(basis for ZIP, GZIP)</td> - </tr> - <tr> - <td><strong>LZW</strong></td> - <td>Lossless</td> - <td>GIF images, <code>compress</code></td> - <td>Med</td> - <td>Med/Med</td> - <td>Dictionary-based, formerly patented</td> - <td><code>.gif</code></td> - </tr> - <tr> - <td><strong>bzip2</strong></td> - <td>Lossless</td> - <td>General file compression</td> - <td>High</td> - <td>Slow/Slow</td> - <td>BWT, good ratio but slow</td> - <td><code>.bz2</code></td> - </tr> - <tr> - <td><strong>LZMA / LZMA2</strong></td> - <td>Lossless</td> - <td>Archives (7z, xz)</td> - <td>Very High</td> - <td>Slow/Med</td> - <td>Excellent ratio, high memory</td> - <td><code>.7z</code>, <code>.xz</code></td> - </tr> - <tr> - <td><strong>Deflate</strong></td> - <td>Lossless</td> - <td>General (ZIP, GZIP, PNG)</td> - <td>Med-High</td> - <td>Med/Fast</td> - <td>LZ77 + Huffman, good balance</td> - <td><code>.zip</code>, <code>.gz</code>, <code>.png</code></td> - </tr> - <tr> - <td><strong>Arithmetic</strong></td> - <td>Lossless</td> - <td>Component in others</td> - <td>Med-High</td> - <td>Slow/Med</td> - <td>Near-optimal, complex</td> - <td>(used in JPEG2000)</td> - </tr> - <tr> - <td><strong>Brotli</strong></td> - <td>Lossless</td> - <td>Web content (text, fonts)</td> - <td>High-V.High</td> - <td>Slow/Fast</td> - <td>Excellent for text, static dictionary</td> - <td><code>.br</code></td> - </tr> - <tr> - <td><strong>Zstandard (Zstd)</strong></td> - <td>Lossless</td> - <td>General, databases, real-time</td> - <td>High-V.High</td> - <td>V.Fast/V.Fast</td> - <td>Fast, flexible levels, modern</td> - <td><code>.zst</code></td> - </tr> - <tr> - <td><strong>FLAC</strong></td> - <td>Lossless</td> - <td>Audio archival</td> - <td>Med (audio)</td> - <td>Med/Fast</td> - <td>Lossless audio, widely supported</td> - <td><code>.flac</code>, <code>.fla</code></td> - </tr> - <tr> - <td><strong>JPEG</strong></td> - <td>Lossy</td> - <td>Photographic images</td> - <td>High</td> - <td>Med/Fast</td> - <td>Widely supported, good for photos</td> - <td><code>.jpg</code>, <code>.jpeg</code></td> - </tr> - <tr> - <td><strong>WebP</strong></td> - <td>Lossy/Lossless</td> - <td>Web images (photos, graphics)</td> - <td>High (lossy)</td> - <td>Med/Fast</td> - <td>Versatile, animation, transparency</td> - <td><code>.webp</code></td> - </tr> - <tr> - <td><strong>AVIF</strong></td> - <td>Lossy/Lossless</td> - <td>Web images (next-gen)</td> - <td>Very High</td> - <td>Slow/Med</td> - <td>Excellent ratio/quality, HDR</td> - <td><code>.avif</code></td> - </tr> - <tr> - <td><strong>JPEG 2000</strong></td> - <td>Lossy/Lossless</td> - <td>Medical/archival images</td> - <td>V.High</td> - <td>Slow/Med</td> - <td>Better quality than JPEG, scalable</td> - <td><code>.jp2</code>, <code>.j2k</code></td> - </tr> - <tr> - <td><strong>H.264/AVC</strong></td> - <td>Lossy</td> - <td>Video (Blu-ray, streaming)</td> - <td>V.High</td> - <td>Med/Fast</td> - <td>Excellent quality/ratio, wide hardware support</td> - <td><code>.mp4</code>, <code>.mkv</code>, <code>.mov</code></td> - </tr> - <tr> - <td><strong>H.265/HEVC</strong></td> - <td>Lossy</td> - <td>Video (4K/UHD, streaming)</td> - <td>V.High</td> - <td>Slow/Fast</td> - <td>~2x efficiency of H.264</td> - <td><code>.mp4</code>, <code>.mkv</code></td> - </tr> - <tr> - <td><strong>VP9</strong></td> - <td>Lossy</td> - <td>Video (web streaming, YouTube)</td> - <td>V.High</td> - <td>Med/Fast</td> - <td>Royalty-free, H.265 competitor</td> - <td><code>.webm</code>, <code>.mp4</code></td> - </tr> - <tr> - <td><strong>AV1</strong></td> - <td>Lossy</td> - <td>Video (web streaming)</td> - <td>V.High</td> - <td>V.Slow/Med</td> - <td>Royalty-free, excellent compression</td> - <td><code>.mkv</code>, <code>.mp4</code></td> - </tr> - <tr> - <td><strong>MP3</strong></td> - <td>Lossy</td> - <td>Audio (music)</td> - <td>High</td> - <td>Fast/V.Fast</td> - <td>Ubiquitous, good quality for music</td> - <td><code>.mp3</code></td> - </tr> - <tr> - <td><strong>AAC</strong></td> - <td>Lossy</td> - <td>Audio (streaming, Apple)</td> - <td>V.High</td> - <td>Fast/Fast</td> - <td>Better than MP3 at same bitrate</td> - <td><code>.aac</code>, <code>.m4a</code>, <code>.mp4</code></td> - </tr> - <tr> - <td><strong>Opus</strong></td> - <td>Lossy</td> - <td>Audio (VoIP, streaming, web)</td> - <td>V.High</td> - <td>Fast/Fast</td> - <td>Royalty-free, versatile (speech/music), low latency</td> - <td><code>.opus</code></td> - </tr> - </tbody> - </table> - <p><em><strong>Note:</strong> Ratio & Speed are relative and can vary based on data, settings, and implementation.</em></p> - </section> - - <article id="theory"> - <h2>I. Foundational Theory: The "Why" and "How"</h2> - - <section id="theory-core"> - <h3>A. Core Concepts</h3> - <ul> - <li><strong>What is Data Compression?</strong> The process of reducing the size of data (number of bits) to store or transmit it more efficiently.</li> - <li><strong>Why Compress?</strong> - <ul> - <li><strong>Storage Savings:</strong> Store more data in the same space.</li> - <li><strong>Faster Data Transmission:</strong> Reduce time and bandwidth needed to transfer data.</li> - <li><strong>Reduced Costs:</strong> Lower expenses for storage and bandwidth.</li> - </ul> - </li> - <li><strong>Information Theory Basics:</strong> - <ul> - <li><strong>Entropy:</strong> A measure of the inherent randomness or uncertainty in data. It represents the theoretical lower bound for compression. Data with lower entropy (more predictable) can be compressed more.</li> - <li><strong>Redundancy:</strong> Information that is repeated or predictable and can be removed or represented more efficiently. Types of redundancy include: - <ul> - <li><strong>Spatial:</strong> Correlation between neighboring data points (e.g., pixels in an image).</li> - <li><strong>Temporal:</strong> Correlation between successive data points in time (e.g., frames in a video).</li> - <li><strong>Statistical/Symbol:</strong> Some symbols or patterns occur more frequently than others (e.g., the letter 'e' in English text).</li> - <li><strong>Perceptual:</strong> Information that human senses do not easily perceive or that is less important (exploited by lossy compression).</li> - </ul> - </li> - </ul> - </li> - </ul> - </section> - - <section id="theory-limits"> - <h3>B. Limits of Compression</h3> - <ul> - <li><strong>Incompressibility:</strong> Random data (or data that appears random, like already well-compressed or encrypted data) cannot be significantly compressed further by lossless methods. Applying a lossless compressor to such data may even slightly increase its size due to overhead.</li> - <li><strong>Rate-Distortion Theory (for Lossy Compression):</strong> This theory provides a mathematical framework for the trade-off between compression rate (bits used) and distortion (loss of fidelity). It defines the minimum achievable rate for a given level of distortion, and vice-versa. This is fundamental to understanding the performance limits of lossy codecs.</li> - </ul> - </section> - - <section id="theory-classification"> - <h3>C. Fundamental Classifications</h3> - <table> + <div class="container"> + <nav id="filter-controls" class="mb-4"> + <input type="search" id="search-box" class="form-control mb-3" placeholder="Search algorithms, concepts, terms..." data-bs-toggle="tooltip" title="Type to filter content"> + <div id="category-filters" class="btn-toolbar justify-content-center" role="toolbar" aria-label="Category Filters"> + <!-- Filter buttons will be injected here by JavaScript --> + </div> + <div id="no-results" class="alert alert-warning mt-3 hidden" role="alert"> + No items match your search or filter criteria. + </div> + </nav> + </div> + + <main class="container" id="main-container"> + <!-- Quick Reference Table Section --> + <div class="schema-container" data-category="table" id="section-quick-ref"> + <h2 class="section-title"><i class="bi bi-table"></i> Quick Reference Table</h2> + <div class="table-responsive"> + <table class="table table-striped table-hover"> <thead> <tr> - <th>Feature</th> - <th>Lossless Compression</th> - <th>Lossy Compression</th> + <th>Algorithm</th> + <th>Type</th> + <th>Primary Use Case</th> + <th>Typical Ratio</th> + <th>Speed (C/D)</th> + <th>Key Characteristic</th> + <th>Common Ext.</th> </tr> </thead> <tbody> - <tr> - <td><strong>Data Reconstruction</strong></td> - <td>Perfect: Original data can be fully restored.</td> - <td>Imperfect: Approximates original data; some information is lost.</td> - </tr> - <tr> - <td><strong>Information Loss</strong></td> - <td>None.</td> - <td>Yes, irreversible.</td> - </tr> - <tr> - <td><strong>Typical Ratio</strong></td> - <td>Moderate (e.g., 2:1 to 4:1).</td> - <td>High to Very High (e.g., 10:1 to 100:1 or more).</td> - </tr> - <tr> - <td><strong>Primary Use</strong></td> - <td>Text, code, executables, medical data, archives (integrity crucial).</td> - <td>Multimedia (images, audio, video) where some loss is acceptable.</td> - </tr> - <tr> - <td><strong>Examples</strong></td> - <td><code>ZIP</code>, <code>PNG</code>, <code>FLAC</code>.</td> - <td><code>JPEG</code>, <code>MP3</code>, <code>H.264</code>.</td> - </tr> + <tr><td><strong>RLE</strong></td><td>Lossless</td><td>Simple graphics, faxes</td><td>Low-Med</td><td>Fast/Fast</td><td>Simple, good for repeated data</td><td>(BMP, TIFF)</td></tr> + <tr><td><strong>Huffman</strong></td><td>Lossless</td><td>Text, component</td><td>Med</td><td>Med/Med</td><td>Optimal per-symbol</td><td>(JPEG, Deflate)</td></tr> + <tr><td><strong>LZ77/LZ78</strong></td><td>Lossless</td><td>General purpose, text</td><td>Med-High</td><td>Med/Med</td><td>Dictionary-based, adaptive</td><td>(ZIP, GZIP)</td></tr> + <tr><td><strong>LZW</strong></td><td>Lossless</td><td>GIF images, <code>compress</code></td><td>Med</td><td>Med/Med</td><td>Dictionary-based</td><td>.gif</td></tr> + <tr><td><strong>bzip2</strong></td><td>Lossless</td><td>General file compression</td><td>High</td><td>Slow/Slow</td><td>BWT, good ratio but slow</td><td>.bz2</td></tr> + <tr><td><strong>LZMA/LZMA2</strong></td><td>Lossless</td><td>Archives (7z, xz)</td><td>Very High</td><td>Slow/Med</td><td>Excellent ratio, high memory</td><td>.7z, .xz</td></tr> + <tr><td><strong>Deflate</strong></td><td>Lossless</td><td>General (ZIP, GZIP, PNG)</td><td>Med-High</td><td>Med/Fast</td><td>LZ77 + Huffman, good balance</td><td>.zip, .gz, .png</td></tr> + <tr><td><strong>Arithmetic</strong></td><td>Lossless</td><td>Component</td><td>Med-High</td><td>Slow/Med</td><td>Near-optimal, complex</td><td>(JPEG2000)</td></tr> + <tr><td><strong>Brotli</strong></td><td>Lossless</td><td>Web content (text, fonts)</td><td>High-V.High</td><td>Slow/Fast</td><td>Excellent for text</td><td>.br</td></tr> + <tr><td><strong>Zstandard (Zstd)</strong></td><td>Lossless</td><td>General, databases, real-time</td><td>High-V.High</td><td>V.Fast/V.Fast</td><td>Fast, flexible, modern</td><td>.zst</td></tr> + <tr><td><strong>FLAC</strong></td><td>Lossless</td><td>Audio archival</td><td>Med (audio)</td><td>Med/Fast</td><td>Lossless audio</td><td>.flac</td></tr> + <tr><td><strong>ALAC</strong></td><td>Lossless</td><td>Audio (Apple ecosystem)</td><td>Med (audio)</td><td>Med/Fast</td><td>Apple lossless audio</td><td>.m4a</td></tr> + <tr><td><strong>PPM</strong></td><td>Lossless</td><td>Text, high ratio</td><td>Very High</td><td>Slow/Slow</td><td>Context modeling, high ratio</td><td>-</td></tr> + <tr><td><strong>JPEG</strong></td><td>Lossy</td><td>Photographic images</td><td>High</td><td>Med/Fast</td><td>Widely supported for photos</td><td>.jpg, .jpeg</td></tr> + <tr><td><strong>HEIC</strong></td><td>Lossy (primarily)</td><td>Images (Apple default)</td><td>High</td><td>Med/Fast</td><td>Efficient, supports advanced features</td><td>.heic, .heif</td></tr> + <tr><td><strong>WebP</strong></td><td>Lossy/Lossless</td><td>Web images</td><td>High (lossy)</td><td>Med/Fast</td><td>Versatile, animation, transparency</td><td>.webp</td></tr> + <tr><td><strong>AVIF</strong></td><td>Lossy/Lossless</td><td>Web images (next-gen)</td><td>Very High</td><td>Slow/Med</td><td>Excellent ratio/quality, HDR</td><td>.avif</td></tr> + <tr><td><strong>JPEG 2000</strong></td><td>Lossy/Lossless</td><td>Medical/archival images</td><td>V.High</td><td>Slow/Med</td><td>Better quality than JPEG, scalable</td><td>.jp2, .j2k</td></tr> + <tr><td><strong>ProRes</strong></td><td>Lossy (visually)</td><td>Professional video editing</td><td>Low-Med</td><td>Fast/Fast (editing)</td><td>High quality, edit-friendly</td><td>.mov</td></tr> + <tr><td><strong>H.264/AVC</strong></td><td>Lossy</td><td>Video (Blu-ray, streaming)</td><td>V.High</td><td>Med/Fast</td><td>Excellent quality/ratio</td><td>.mp4, .mkv</td></tr> + <tr><td><strong>H.265/HEVC</strong></td><td>Lossy</td><td>Video (4K/UHD, streaming)</td><td>V.High</td><td>Slow/Fast</td><td>~2x efficiency of H.264</td><td>.mp4, .mkv</td></tr> + <tr><td><strong>VP9</strong></td><td>Lossy</td><td>Video (web streaming, YouTube)</td><td>V.High</td><td>Med/Fast</td><td>Royalty-free, H.265 competitor</td><td>.webm, .mp4</td></tr> + <tr><td><strong>AV1</strong></td><td>Lossy</td><td>Video (web streaming)</td><td>V.High</td><td>V.Slow/Med</td><td>Royalty-free, excellent compression</td><td>.mkv, .mp4</td></tr> + <tr><td><strong>MP3</strong></td><td>Lossy</td><td>Audio (music)</td><td>High</td><td>Fast/V.Fast</td><td>Ubiquitous for music</td><td>.mp3</td></tr> + <tr><td><strong>AAC</strong></td><td>Lossy</td><td>Audio (streaming, Apple)</td><td>V.High</td><td>Fast/Fast</td><td>Better than MP3 at same bitrate</td><td>.aac, .m4a</td></tr> + <tr><td><strong>Opus</strong></td><td>Lossy</td><td>Audio (VoIP, streaming, web)</td><td>V.High</td><td>Fast/Fast</td><td>Royalty-free, versatile, low latency</td><td>.opus</td></tr> + <tr><td><strong>Vorbis</strong></td><td>Lossy</td><td>Audio (open-source applications)</td><td>High</td><td>Fast/Fast</td><td>Royalty-free, good quality</td><td>.ogg, .oga</td></tr> </tbody> </table> - <h4>Near-Lossless Compression:</h4> - <p>A specialized category where the decompressed data is not identical to the original, but the differences are strictly bounded to be very small and often imperceptible or within acceptable error margins for specific applications (e.g., some scientific data, medical imaging).</p> - </section> - - <section id="theory-metrics"> - <h3>D. Key Evaluation Metrics</h3> - <ul> - <li><strong>Compression Ratio:</strong> <code>Original Size / Compressed Size</code>. (Higher is better).</li> - <li><strong>Space Savings:</strong> <code>(1 - (Compressed Size / Original Size)) * 100%</code>.</li> - <li><strong>Compression Speed:</strong> Rate at which data is compressed (e.g., MB/s).</li> - <li><strong>Decompression Speed:</strong> Rate at which data is decompressed (e.g., MB/s).</li> - <li><strong>Computational Cost/Resources:</strong> CPU usage, memory footprint for compression/decompression.</li> - <li><strong>Fidelity/Quality (Lossy):</strong> How close the decompressed data is to the original. - <ul> - <li><em>Objective:</em> PSNR (Peak Signal-to-Noise Ratio), SSIM (Structural Similarity Index).</li> - <li><em>Subjective:</em> Perceived quality by humans.</li> - </ul> - </li> - <li><strong>Asymmetry:</strong> The difference in computational cost between compression and decompression. Some algorithms are highly asymmetric (e.g., slow to compress, very fast to decompress – desirable for content distribution).</li> - <li><strong>Robustness to Errors:</strong> How well a compressed stream can recover from bit errors during transmission or storage. Some formats include error resilience features.</li> - </ul> - </section> - - <section id="theory-principles"> - <h3>E. Basic Principles/Underlying Techniques</h3> - <ul> - <li><strong>Dictionary-Based:</strong> Replaces repeated data sequences with references to entries in a dictionary (dynamically built or predefined). (e.g., LZ77, LZW). - <div class="placeholder-diagram">Diagram Idea: Simple visual of a text string with a repeated phrase, showing the phrase being replaced by a short code referencing a dictionary entry.</div> - </li> - <li><strong>Statistical Modeling:</strong> Assigns shorter codes to more frequent symbols/patterns and longer codes to less frequent ones. (e.g., Huffman Coding, Arithmetic Coding). - <div class="placeholder-diagram">Diagram Idea: Small Huffman tree for a few characters like A, B, C, D with different frequencies.</div> - </li> - <li><strong>Transform Coding:</strong> Converts data from its spatial/temporal domain to a frequency domain where energy is often more compacted into fewer coefficients, which can then be quantized and entropy coded. (e.g., DCT in JPEG, Wavelets in JPEG 2000). - <div class="placeholder-diagram">Diagram Idea: Conceptual 8x8 pixel block transforming into an 8x8 DCT coefficient block with energy concentrated in top-left.</div> - </li> - <li><strong>Run-Length Encoding (RLE):</strong> Replaces sequences (runs) of identical symbols with a count of the symbol and the symbol itself. (e.g., <code>AAAAA</code> -> <code>5A</code>).</li> - <li><strong>Predictive Coding:</strong> Predicts the next value in a data stream based on previous values and encodes the difference (error) between the predicted and actual value. (e.g., DPCM in audio, motion prediction in video).</li> - <li><strong>Context Modeling:</strong> A technique (often used with statistical coders) where the probability of a symbol is estimated based on the preceding symbols (the "context"). This allows the coder to adapt to local statistical variations in the data, improving compression. (e.g., used in PPM, Brotli, Zstd entropy stage, PAQ).</li> - <li><strong>Burrows-Wheeler Transform (BWT):</strong> A reversible data transform that groups similar characters together in the input string. While not a compressor itself, it preprocesses data to make it more amenable to subsequent compression stages (like Move-to-Front transform and RLE/statistical coding). Used in <code>bzip2</code>.</li> - <li><strong>Delta Coding (Differential Coding):</strong> A more general form of predictive coding. Stores the difference between consecutive data elements rather than the elements themselves. Very effective if data values change slowly or predictably.</li> - </ul> - </section> - </article> - - <article id="lossless-algorithms"> - <h2>II. Lossless Compression Algorithms</h2> - <p><em>Data can be perfectly reconstructed from the compressed version.</em></p> - - <details> - <summary>Run-Length Encoding (RLE)</summary> - <ul> - <li><strong>Core Idea:</strong> Replaces consecutive sequences (runs) of identical data values with a single data value and a count.</li> - <li><strong>Use Cases:</strong> Simple graphic images (icons, line drawings), fax transmissions, bitmap (<code>.BMP</code>) images, TIFF files. Effective for data with many repetitions.</li> - <li><strong>Strengths:</strong> Very simple to implement, computationally inexpensive, fast.</li> - <li><strong>Weaknesses:</strong> Inefficient for data without long runs; can even increase file size in such cases.</li> - <li><strong>File Extensions/Protocols:</strong> Used within BMP, TIFF, PDF. A <code>.rle</code> extension exists but is little-used.</li> - </ul> - </details> - - <details> - <summary>Huffman Coding</summary> - <ul> - <li><strong>Core Idea:</strong> Assigns variable-length codes to input characters based on their frequencies; more frequent characters get shorter codes. Uses prefix codes (no code is a prefix of another).</li> - <li><strong>Use Cases:</strong> Component in other algorithms (Deflate, JPEG, MP3, PNG). Transmitting text and fax.</li> - <li><strong>Strengths:</strong> Optimal per-symbol coding efficiency, lossless, relatively simple to implement.</li> - <li><strong>Weaknesses:</strong> Requires frequency of characters to be known beforehand (or two passes over data). Less effective if character frequencies are evenly distributed.</li> - <li><strong>File Extensions/Protocols:</strong> Used within PKZIP, GZIP, JPEG, PNG, MP3.</li> - </ul> - </details> - - <details> - <summary>Lempel-Ziv 77 (LZ77) & Lempel-Ziv 78 (LZ78)</summary> - <ul> - <li><strong>Core Idea:</strong> Dictionary-based algorithms. LZ77 uses a sliding window to find repeated sequences in previously seen data and replaces them with (offset, length) pointers. LZ78 explicitly builds a dictionary of phrases encountered and outputs dictionary codes.</li> - <li><strong>Use Cases:</strong> General-purpose text and data compression. Form the basis for many popular archivers (e.g., ZIP, GZIP via Deflate).</li> - <li><strong>Strengths:</strong> Adaptive (builds dictionary on-the-fly), good compression ratios, no prior knowledge of symbol probabilities needed.</li> - <li><strong>Weaknesses:</strong> LZ78 can be simpler but sometimes less effective than LZ77. Decompression might be slower if not optimized.</li> - <li><strong>File Extensions/Protocols:</strong> Foundational for formats like <code>.zip</code>, <code>.gz</code> (via Deflate). Some direct uses with <code>.lz</code>, <code>.lzh</code>.</li> - </ul> - </details> - - <details> - <summary>Lempel-Ziv-Welch (LZW)</summary> - <ul> - <li><strong>Core Idea:</strong> An improvement on LZ78; builds a string translation table (dictionary) from the input data. When a sequence is encountered that is already in the dictionary, its code is output. New sequences (current sequence + next character) are added to the dictionary.</li> - <li><strong>Use Cases:</strong> GIF image format, <code>compress</code> utility in Unix, TIFF, PDF files.</li> - <li><strong>Strengths:</strong> Simple to implement, fast decompression, good for repetitive data. Does not require prior information about the input data stream.</li> - <li><strong>Weaknesses:</strong> Patent issues historically (now expired). Can be less efficient than modern LZ variants for some data types. Files without repetitive information can become larger.</li> - <li><strong>File Extensions/Protocols:</strong> <code>.gif</code>, used in TIFF, PDF.</li> - </ul> - </details> - <details> - <summary>bzip2</summary> - <ul> - <li><strong>Core Idea:</strong> Uses the Burrows-Wheeler Transform (BWT) to group similar characters, followed by a Move-to-Front transform and Huffman coding.</li> - <li><strong>Use Cases:</strong> General file compression, common in Unix/Linux for distributing source code and data.</li> - <li><strong>Strengths:</strong> Generally achieves better compression ratios than Deflate (gzip) for many data types.</li> - <li><strong>Weaknesses:</strong> Significantly slower compression and decompression speeds compared to Deflate, Brotli, and Zstd. Not memory efficient for compression.</li> - <li><strong>File Extensions/Protocols:</strong> <code>.bz2</code></li> - </ul> - </details> - - <details> - <summary>LZMA / LZMA2 (Lempel-Ziv-Markov chain Algorithm)</summary> - <ul> - <li><strong>Core Idea:</strong> Uses an LZ77-variant dictionary coder with a very large dictionary size and a Markov chain-based range encoder (similar to arithmetic coding) for bit-level probability modeling. LZMA2 improves LZMA for multi-threading and better handling of incompressible data.</li> - <li><strong>Use Cases:</strong> Default for 7-Zip (<code>.7z</code>) archives, XZ Utils (<code>.xz</code>). Used for software distribution, large archives.</li> - <li><strong>Strengths:</strong> Very high compression ratios, often among the best for lossless general-purpose compression.</li> - <li><strong>Weaknesses:</strong> Can be slow for compression, especially at higher settings. Decompression is faster but still not as fast as Zstd or Deflate. Can require significant memory.</li> - <li><strong>File Extensions/Protocols:</strong> <code>.7z</code>, <code>.xz</code></li> - </ul> - </details> - - <details> - <summary>Deflate (LZ77 + Huffman)</summary> - <ul> - <li><strong>Core Idea:</strong> Combines LZ77 to find and replace duplicate strings with (distance, length) pairs, followed by Huffman coding to compress the literals and the LZ77 output.</li> - <li><strong>Use Cases:</strong> ZIP and GZIP file formats, PNG image files, HTTP compression.</li> - <li><strong>Strengths:</strong> Good balance of compression ratio and speed, widely adopted and supported, lossless.</li> - <li><strong>Weaknesses:</strong> Can be slower in compression and achieve lower ratios than newer algorithms like Zstandard or Brotli.</li> - <li><strong>File Extensions/Protocols:</strong> <code>.zip</code>, <code>.gz</code>, <code>.png</code> (internally).</li> - </ul> - </details> - - <details> - <summary>Arithmetic Coding</summary> - <ul> - <li><strong>Core Idea:</strong> Encodes the entire message into a single fractional number between 0 and 1. Achieves closer to theoretical entropy limits by assigning shorter effective codes to more probable sequences.</li> - <li><strong>Use Cases:</strong> Often used as a component in other compression standards like JPEG 2000, H.264/AVC, and some variants of bzip2.</li> - <li><strong>Strengths:</strong> Higher compression efficiency than Huffman coding, especially for skewed probabilities or small alphabets.</li> - <li><strong>Weaknesses:</strong> More computationally complex than Huffman coding. Historically had patent concerns. Sensitive to errors (a single bit error can corrupt the entire remaining message).</li> - <li><strong>File Extensions/Protocols:</strong> Used internally in formats like JPEG2000.</li> - </ul> - </details> - - <details> - <summary>Brotli (LZ77 + Huffman + 2nd order Context Modeling + Static Dictionary)</summary> - <ul> - <li><strong>Core Idea:</strong> Modern algorithm developed by Google. Uses a combination of a variant of LZ77, Huffman coding, 2nd order context modeling to select Huffman tables, and a pre-defined static dictionary of common substrings.</li> - <li><strong>Use Cases:</strong> Primarily for web content (HTTP compression, WOFF2 fonts). Excels on text data.</li> - <li><strong>Strengths:</strong> Excellent compression ratios, especially for text, often better than Gzip/Deflate. Fast decompression.</li> - <li><strong>Weaknesses:</strong> Compression speed can be slower than Gzip, though it offers various quality levels to balance speed and ratio.</li> - <li><strong>File Extensions/Protocols:</strong> <code>.br</code> (for files), <code>br</code> content encoding for HTTP.</li> - </ul> - </details> - - <details> - <summary>Zstandard (Zstd) (LZ77 variant + Finite State Entropy / ANS)</summary> - <ul> - <li><strong>Core Idea:</strong> Developed by Facebook. Modern algorithm using an LZ77-variant (block-sorting compressor) and a fast entropy coding stage (Finite State Entropy - FSE, an Asymmetric Numeral System variant). Offers many compression levels.</li> - <li><strong>Use Cases:</strong> General-purpose compression, databases (e.g., RocksDB, MySQL), file systems (e.g., ZFS, Btrfs), network traffic, real-time compression. Archival (e.g. <code>.tar.zst</code>).</li> - <li><strong>Strengths:</strong> Very fast compression and decompression speeds across a wide range of ratios. Highly flexible with many levels, good ratios comparable to Deflate but much faster. Supports dictionary compression for small files.</li> - <li><strong>Weaknesses:</strong> Newer, so adoption, while rapidly growing, might not be as universal as Deflate yet.</li> - <li><strong>File Extensions/Protocols:</strong> <code>.zst</code>, <code>zstd</code> content encoding for HTTP.</li> - </ul> - </details> - <details> - <summary>Prediction by Partial Matching (PPM)</summary> - <ul> - <li><strong>Core Idea:</strong> An adaptive statistical data compression technique based on context modeling and arithmetic coding. It uses several preceding symbols (the context) to predict the next symbol and its probability.</li> - <li><strong>Use Cases:</strong> Text compression, general-purpose data. Known for achieving high compression ratios, especially on text.</li> - <li><strong>Strengths:</strong> High compression ratios, adaptive.</li> - <li><strong>Weaknesses:</strong> Computationally expensive (both CPU and memory), especially for higher orders of context. Can be slow.</li> - <li><strong>File Extensions/Protocols:</strong> Not typically associated with a common standalone extension, but a core technique in some high-ratio archivers.</li> - </ul> - </details> - - - <h4>Lossless Audio Codecs</h4> - <details> - <summary>FLAC (Free Lossless Audio Codec)</summary> - <ul> - <li><strong>Core Idea:</strong> Uses linear prediction to model the audio signal, then encodes the residual error using Golomb-Rice coding.</li> - <li><strong>Use Cases:</strong> Archival of music, high-fidelity audio playback.</li> - <li><strong>Strengths:</strong> Good compression for audio (typically 30-60% reduction), royalty-free, widely supported.</li> - <li><strong>File Extensions:</strong> <code>.flac</code>, <code>.fla</code></li> - </ul> - </details> - <details> - <summary>ALAC (Apple Lossless Audio Codec)</summary> - <ul> - <li><strong>Core Idea:</strong> Also uses linear prediction, but with different parameters and entropy coding.</li> - <li><strong>Use Cases:</strong> Used within Apple's ecosystem (iTunes, iOS).</li> - <li><strong>Strengths:</strong> Similar compression to FLAC, good integration in Apple products.</li> - <li><strong>File Extensions:</strong> <code>.m4a</code> (when containing ALAC)</li> - </ul> - </details> - </article> - - <article id="lossy-algorithms"> - <h2>III. Lossy Compression Algorithms</h2> - <p><em>Achieve higher compression by discarding some information; original data cannot be perfectly reconstructed.</em></p> - - <details> - <summary>JPEG (Joint Photographic Experts Group)</summary> - <ul> - <li><strong>Core Idea:</strong> Divides image into 8x8 pixel blocks, applies Discrete Cosine Transform (DCT) to each block, quantizes DCT coefficients (discards less important high-frequency data), and then uses Huffman or Arithmetic coding for entropy encoding.</li> - <li><strong>Use Cases:</strong> Still images, especially photographs. Very common on the web.</li> - <li><strong>Strengths:</strong> Widely supported, good compression for photographic images at acceptable quality levels, adjustable compression level.</li> - <li><strong>Weaknesses:</strong> Lossy, can introduce "blocking" artifacts at high compression ratios, not ideal for images with sharp lines or text. Each compression-decompression cycle degrades quality.</li> - <li><strong>Configurable Parameters:</strong> Quality setting (typically 1-100), chroma subsampling.</li> - <li><strong>File Extensions:</strong> <code>.jpg</code>, <code>.jpeg</code>, <code>.jfif</code>, <code>.jif</code>, <code>.jpe</code>.</li> - </ul> - </details> - - <details> - <summary>JPEG 2000</summary> - <ul> - <li><strong>Core Idea:</strong> Uses a wavelet transform instead of DCT, applied to the entire image or large tiles. Offers progressive decoding (resolution or quality).</li> - <li><strong>Use Cases:</strong> Medical imaging (DICOM), digital cinema (DCP), archival of high-quality images, satellite imagery.</li> - <li><strong>Strengths:</strong> Better compression efficiency and image quality than original JPEG, especially at low bitrates. Supports both lossless and lossy compression. Region of Interest (ROI) coding, scalability (resolution, quality).</li> - <li><strong>Weaknesses:</strong> More computationally complex than JPEG. Limited native browser and general software support compared to JPEG. Can be slower.</li> - <li><strong>Configurable Parameters:</strong> Compression ratio/bitrate, quality layers, resolution levels, lossless/lossy.</li> - <li><strong>File Extensions:</strong> <code>.jp2</code>, <code>.j2k</code>, <code>.jpx</code>, <code>.jpm</code>, <code>.mj2</code> (Motion JPEG 2000).</li> - </ul> - </details> - - <h4>MPEG (Moving Picture Experts Group) - Family Overview</h4> - <p>A suite of standards for audio and video compression. Key members below:</p> - - <details> - <summary>MPEG-2 (specifically Part 2 Video, H.262)</summary> - <ul> - <li><strong>Core Idea:</strong> Block-based hybrid video coding using DCT, motion compensation (inter-frame prediction), and Huffman or variable-length coding.</li> - <li><strong>Use Cases:</strong> DVDs, standard-definition digital television broadcasting (DVB, ATSC), Blu-ray (for SD content).</li> - <li><strong>Strengths:</strong> Widely deployed standard, good quality for SD video, established hardware support.</li> - <li><strong>Weaknesses:</strong> Less efficient than newer standards like H.264/AVC for HD and higher resolutions.</li> - <li><strong>Configurable Parameters:</strong> Bitrate, Group of Pictures (GOP) structure, profiles and levels.</li> - <li><strong>File Extensions:</strong> <code>.mpg</code>, <code>.mpeg</code>, <code>.ts</code>, <code>.vob</code>, <code>.m2v</code>.</li> - </ul> - </details> - - <details> - <summary>H.264/AVC (Advanced Video Coding, MPEG-4 Part 10)</summary> - <ul> - <li><strong>Core Idea:</strong> Evolves MPEG-2 concepts with more advanced features: smaller block sizes for DCT (4x4 integer DCT), more flexible motion compensation (multiple reference frames, variable block sizes), in-loop deblocking filter, Context-Adaptive Binary Arithmetic Coding (CABAC) or CAVLC entropy coding.</li> - <li><strong>Use Cases:</strong> Blu-ray Discs, streaming video (YouTube, Netflix, Vimeo), video conferencing, digital TV (HD broadcasts), mobile video.</li> - <li><strong>Strengths:</strong> Significantly better compression efficiency (about 50% bitrate reduction for same quality) compared to MPEG-2. Wide hardware and software support. Good quality at various bitrates.</li> - <li><strong>Weaknesses:</strong> More computationally complex than MPEG-2. Licensing through patent pools.</li> - <li><strong>Configurable Parameters:</strong> Bitrate, profiles (e.g., Baseline, Main, High), levels, GOP settings, quantization parameters (QP).</li> - <li><strong>File Extensions:</strong> Commonly <code>.mp4</code>, <code>.mkv</code>, <code>.mov</code>, <code>.ts</code>, <code>.flv</code>, <code>.avi</code>.</li> - </ul> - </details> - - <details> - <summary>H.265/HEVC (High Efficiency Video Coding)</summary> - <ul> - <li><strong>Core Idea:</strong> Successor to H.264/AVC. Uses larger and more flexible coding units (Coding Tree Units - CTUs up to 64x64), improved prediction modes (intra and inter), Sample Adaptive Offset (SAO) filtering, and more parallel processing capabilities.</li> - <li><strong>Use Cases:</strong> 4K/8K Ultra HD Blu-ray, streaming of UHD content, modern digital TV broadcasts.</li> - <li><strong>Strengths:</strong> Roughly doubles the compression efficiency of H.264/AVC (i.e., about 50% bitrate reduction for similar quality). Supports higher resolutions and frame rates.</li> - <li><strong>Weaknesses:</strong> Significantly more computationally intensive for encoding and decoding than H.264. Complex patent licensing situation (though improving).</li> - <li><strong>Configurable Parameters:</strong> Bitrate, profiles (e.g., Main, Main10), tiers, levels, QP.</li> - <li><strong>File Extensions:</strong> Commonly <code>.mp4</code>, <code>.mkv</code>, <code>.ts</code>.</li> - </ul> - </details> - <details> - <summary>VP9 (Mention)</summary> - <ul> - <li><strong>Core Idea:</strong> Open and royalty-free video coding format developed by Google, successor to VP8. A primary competitor to H.265/HEVC before AV1.</li> - <li><strong>Use Cases:</strong> Widely used by YouTube for streaming, WebRTC.</li> - <li><strong>Strengths:</strong> Good compression efficiency, comparable to early H.265 implementations. Royalty-free. Strong browser support.</li> - <li><strong>Weaknesses:</strong> Largely being superseded by AV1 for future development and top-tier efficiency.</li> - <li><strong>File Extensions:</strong> Often in <code>.webm</code>, <code>.mp4</code>.</li> - </ul> - </details> - - <details> - <summary>AV1 (AOMedia Video 1)</summary> - <ul> - <li><strong>Core Idea:</strong> Developed by the Alliance for Open Media (AOMedia). Royalty-free. Uses advanced techniques like larger superblocks (up to 128x128), sophisticated intra and inter prediction, constrained directional enhancement filter (CDEF), loop restoration filter, and symbol coding based on Daala an adaptation of arithmetic coding.</li> - <li><strong>Use Cases:</strong> Increasingly used for web streaming (YouTube, Netflix, Twitch), real-time communications (WebRTC).</li> - <li><strong>Strengths:</strong> Offers better compression efficiency than H.265/HEVC (reportedly 20-30% improvement). Royalty-free, open source. Designed for internet video.</li> - <li><strong>Weaknesses:</strong> Very computationally intensive to encode (though improving rapidly with new encoders like SVT-AV1). Decoding can also be demanding, requiring modern hardware for high resolutions.</li> - <li><strong>Configurable Parameters:</strong> Bitrate, quality settings (CRF - Constant Rate Factor), speed presets.</li> - <li><strong>File Extensions:</strong> Commonly <code>.mkv</code>, <code>.webm</code>, <code>.mp4</code> (with ISOBMFF container).</li> - </ul> - </details> - - <details> - <summary>WebP</summary> - <ul> - <li><strong>Core Idea:</strong> Developed by Google. For lossy mode, it uses intra-frame prediction based on VP8 video codec techniques (block prediction, DCT/ADST, quantization, entropy coding). Also supports a lossless mode that uses different techniques (e.g., spatial prediction, color space transform, LZ77-like backend). Supports animation and transparency.</li> - <li><strong>Use Cases:</strong> Web images, aiming to replace JPEG, PNG, and GIF.</li> - <li><strong>Strengths:</strong> Better compression than JPEG (for lossy) and PNG (for lossless) at similar quality. Supports animation and alpha transparency. Widely supported in modern browsers.</li> - <li><strong>Weaknesses:</strong> Lossy quality can sometimes be debated vs. highly optimized JPEGs or newer AVIF.</li> - <li><strong>Configurable Parameters:</strong> Quality setting (lossy), effort setting (lossless).</li> - <li><strong>File Extensions:</strong> <code>.webp</code></li> - </ul> - </details> - - <details> - <summary>AVIF (AV1 Image File Format)</summary> - <ul> - <li><strong>Core Idea:</strong> An image format that uses AV1 video intra-frame coding techniques (e.g., advanced prediction modes, transforms, loop filters) to compress still images. Stores image sequences in HEIF container.</li> - <li><strong>Use Cases:</strong> Web images, aiming for significant quality improvements over JPEG and WebP at similar file sizes.</li> - <li><strong>Strengths:</strong> Significantly better compression efficiency than JPEG and often WebP. Supports HDR, wide color gamut, lossless, transparency, and animations. Royalty-free.</li> - <li><strong>Weaknesses:</strong> Newer, so software and browser support is still growing (though rapidly). Can be computationally more demanding than JPEG/WebP.</li> - <li><strong>Configurable Parameters:</strong> Quality setting (quantizer), speed/effort.</li> - <li><strong>File Extensions:</strong> <code>.avif</code></li> - </ul> - </details> - - - <details> - <summary>MP3 (MPEG-1 Audio Layer III)</summary> - <ul> - <li><strong>Core Idea:</strong> Uses psychoacoustic models to discard parts of the audio signal that are less perceptible to human hearing. Applies Modified Discrete Cosine Transform (MDCT), quantizes frequency coefficients, and uses Huffman coding.</li> - <li><strong>Use Cases:</strong> Digital audio, music files, podcast distribution. Historically dominant for portable music.</li> - <li><strong>Strengths:</strong> Ubiquitous support across devices and software. Good quality at moderate to high bitrates (e.g., 192-320 kbps). Small file sizes.</li> - <li><strong>Weaknesses:</strong> Lossy. Less efficient than newer audio codecs like AAC or Opus, especially at lower bitrates where artifacts can be noticeable.</li> - <li><strong>Configurable Parameters:</strong> Bitrate (Constant - CBR, or Variable - VBR), sample rate, stereo/mono.</li> - <li><strong>File Extensions:</strong> <code>.mp3</code>.</li> - </ul> - </details> - - <details> - <summary>AAC (Advanced Audio Coding)</summary> - <ul> - <li><strong>Core Idea:</strong> Successor to MP3, part of MPEG-2 and MPEG-4 standards. Uses improved psychoacoustic models, MDCT with better windowing functions, and more efficient coding techniques (e.g., Temporal Noise Shaping - TNS, Perceptual Noise Substitution - PNS).</li> - <li><strong>Use Cases:</strong> Default audio format for Apple iTunes/Music, YouTube, Nintendo DSi/3DS, PlayStation. Common in <code>.mp4</code> videos, digital radio (DAB+), streaming services.</li> - <li><strong>Strengths:</strong> Generally better audio quality than MP3 at the same bitrate, especially at lower bitrates. Supports more channels and higher sample rates.</li> - <li><strong>Weaknesses:</strong> Lossy. Several variants and profiles (e.g., AAC-LC, HE-AAC, HE-AACv2) can sometimes cause compatibility issues if not handled correctly.</li> - <li><strong>Configurable Parameters:</strong> Bitrate (CBR/VBR), profiles (LC, HE, HEv2), sample rate.</li> - <li><strong>File Extensions:</strong> <code>.aac</code>, <code>.m4a</code>, <code>.m4b</code>, <code>.m4p</code>, <code>.mp4</code>.</li> - </ul> - </details> - - <details> - <summary>Opus</summary> - <ul> - <li><strong>Core Idea:</strong> Royalty-free, highly versatile audio codec developed by Xiph.Org and standardized by IETF. Combines speech-oriented SILK (LPC-based) and music/general audio-oriented CELT (MDCT-based) algorithms, switching or combining them dynamically.</li> - <li><strong>Use Cases:</strong> Voice over IP (VoIP), video conferencing (WebRTC default), game chat, internet streaming, audiobooks.</li> - <li><strong>Strengths:</strong> Excellent quality across a wide range of bitrates (from low-bitrate speech to high-fidelity stereo music). Very low latency, making it ideal for real-time communication. Royalty-free and open source. Adaptive.</li> - <li><strong>Weaknesses:</strong> Less ubiquitous for stored music files compared to MP3 or AAC, though support is growing.</li> - <li><strong>Configurable Parameters:</strong> Bitrate (CBR/VBR), application type (VoIP, Audio, Restricted Low-Delay), frame duration, complexity.</li> - <li><strong>File Extensions:</strong> <code>.opus</code> (often within <code>.ogg</code> or <code>.webm</code> containers).</li> - </ul> - </details> - - <details> - <summary>Vorbis (Ogg Vorbis)</summary> - <ul> - <li><strong>Core Idea:</strong> Open-source, patent-free, lossy audio format. Uses Modified Discrete Cosine Transform (MDCT), vector quantization for floor and residue, and a codebook-based entropy encoding.</li> - <li><strong>Use Cases:</strong> Popular in open-source software, indie games, some streaming services (historically by Spotify).</li> - <li><strong>Strengths:</strong> Good audio quality, especially at mid to high bitrates. Royalty-free and open.</li> - <li><strong>Weaknesses:</strong> Generally considered less efficient than Opus or modern AAC variants at very low bitrates. Hardware support less widespread than MP3/AAC. Development has largely shifted towards Opus.</li> - <li><strong>Configurable Parameters:</strong> Quality setting (q -1.0 to 10.0), average/min/max bitrate.</li> - <li><strong>File Extensions:</strong> <code>.ogg</code>, <code>.oga</code>.</li> - </ul> - </details> - - <section id="lossy-psychophysical"> - <h4>Psychovisual and Psychoacoustic Principles in Lossy Codecs</h4> - <ul> - <li><strong>Psychoacoustics (Audio):</strong> Lossy audio codecs (MP3, AAC, Opus, Vorbis) exploit auditory masking. - <ul> - <li><strong>Frequency Masking:</strong> Louder sounds can make quieter sounds at nearby frequencies inaudible.</li> - <li><strong>Temporal Masking:</strong> A loud sound can mask a quieter sound immediately before (pre-masking) or after (post-masking) it.</li> - <li>These codecs discard or heavily quantize information in masked regions, which are less likely to be perceived by human ears.</li> - </ul> - </li> - <li><strong>Psychovisuals (Image/Video):</strong> Lossy image and video codecs (JPEG, H.264, AV1) exploit characteristics of the human visual system (HVS). - <ul> - <li><strong>Luminance vs. Chrominance Sensitivity:</strong> Humans are more sensitive to changes in brightness (luminance) than color (chrominance). Chroma subsampling (e.g., 4:2:0) reduces color information.</li> - <li><strong>Frequency Sensitivity:</strong> Humans are less sensitive to high-frequency visual information (fine details) than low-frequency information (overall shapes). Transform coding (DCT, wavelets) allows selective quantization, discarding more high-frequency detail.</li> - <li><strong>Contrast Masking:</strong> Visual patterns or textures can mask noise or artifacts within those regions.</li> - </ul> - </li> - </ul> - </section> - </article> - - <article id="practical-considerations"> - <h2>IV. Practical Considerations & Application: The "How-To"</h2> - - <section id="practical-choosing"> - <h3>A. Choosing the Right Algorithm</h3> - <h4>Key Questions to Ask:</h4> - <ol> - <li><strong>What type of data is it?</strong> (Text, image, audio, video, binary executable, mixed)</li> - <li><strong>Is any information loss acceptable?</strong> (Critical: Lossless vs. Lossy decision) - <ul> - <li><em>If Lossless:</em> Executables, source code, medical records, financial data, archival masters.</li> - <li><em>If Lossy is OK:</em> Most images for web, streaming audio/video, previews.</li> - </ul> - </li> - <li><strong>What is the primary goal?</strong> - <ul> - <li>Maximum compression ratio (e.g., for archival, limited storage)?</li> - <li>Fastest compression speed (e.g., for real-time capture)?</li> - <li>Fastest decompression speed (e.g., for quick user access, web delivery)?</li> - <li>Best quality (for lossy, at a given bitrate)?</li> - <li>Low computational/power cost (e.g., for mobile/embedded devices)?</li> - </ul> - </li> - <li><strong>What are the resource constraints?</strong> (CPU, RAM for encoding/decoding)</li> - <li><strong>What is the target platform/ecosystem?</strong> (Algorithm support, libraries, hardware acceleration)</li> - <li><strong>Are there licensing/royalty concerns?</strong> (e.g., H.264/H.265 vs. AV1/Opus)</li> - <li><strong>Energy Consumption / Battery Life:</strong> Especially relevant for mobile and IoT devices.</li> - <li><strong>Standards Compliance & Interoperability:</strong> For data exchange, adherence to widely adopted standards is crucial.</li> - </ol> - - <h4>General Guidelines (Examples):</h4> - <ul> - <li><strong>Text/Code:</strong> Zstd (good balance), Brotli (excellent for web text), Gzip (universal).</li> - <li><strong>General Archival:</strong> Zstd (high levels), 7-Zip (LZMA/LZMA2), XZ (LZMA2), bzip2.</li> - <li><strong>Photographic Images (Web):</strong> JPEG (balance), WebP (good alternative), AVIF (emerging, higher quality/ratio).</li> - <li><strong>Graphic Images (Web):</strong> PNG (lossless), WebP (lossless/lossy), SVG (vector).</li> - <li><strong>Streaming Video:</strong> H.264 (wide support), H.265 (better ratio for 4K+), AV1 (royalty-free, best ratio, growing support), VP9.</li> - <li><strong>Streaming Audio:</strong> Opus (versatile, low-latency), AAC (widely supported, good quality).</li> - <li><strong>Real-time Communication:</strong> Opus (audio), H.264/VP9/AV1 (video, depending on client support & latency needs).</li> - </ul> - <div class="placeholder-diagram">Diagram Idea: A simplified decision tree flowchart: Data Type -> Lossless/Lossy -> Key Priority (Speed/Ratio/Quality) -> Suggested Algorithms.</div> - </section> - - <section id="practical-tools"> - <h3>B. Common Tools, Libraries & Software</h3> - <h4>Command-Line Archivers:</h4> - <ul> - <li><code>gzip</code> / <code>gunzip</code>: Implements Deflate (common on Unix-like systems).</li> - <li><code>zip</code> / <code>unzip</code>: Handles <code>.zip</code> archives (commonly Deflate).</li> - <li><code>7-Zip</code> (<code>7z</code> command): Supports many formats including its own 7z (LZMA/LZMA2), ZIP, Gzip, Bzip2, TAR, Zstd, Brotli.</li> - <li><code>tar</code>: Not a compression tool itself, but often used to bundle files, then compressed with <code>gzip</code> (<code>.tar.gz</code>), <code>bzip2</code> (<code>.tar.bz2</code>), <code>xz</code> (<code>.tar.xz</code>), <code>zstd</code> (<code>.tar.zst</code>).</li> - <li><code>bzip2</code> / <code>bunzip2</code>: Implements bzip2 algorithm.</li> - <li><code>xz</code> / <code>unxz</code>: Implements LZMA2 algorithm.</li> - <li><code>brotli</code>: Brotli command-line tool.</li> - <li><code>zstd</code>: Zstandard command-line tool.</li> - </ul> - <h4>Libraries for Developers:</h4> - <ul> - <li><strong>zlib:</strong> (C library for Deflate; bindings in Python, Java, etc.) - Foundational for Gzip, PNG.</li> - <li><strong>libjpeg-turbo / libjpeg:</strong> (C library for JPEG)</li> - <li><strong>libpng:</strong> (C library for PNG, uses zlib)</li> - <li><strong>FFmpeg:</strong> (Powerful C library and command-line tool for audio/video transcoding, supports numerous codecs including H.264, H.265, AV1, MP3, AAC, Opus, Vorbis).</li> - <li><strong>libbrotli:</strong> (C library for Brotli)</li> - <li><strong>libzstd:</strong> (C library for Zstandard)</li> - <li><strong>liblzma:</strong> (C library for XZ/LZMA2)</li> - <li><strong>libbz2:</strong> (C library for bzip2)</li> - <li><strong>libopus:</strong> (C library for Opus)</li> - <li><strong>libvpx:</strong> (Google's C library for VP8/VP9 video codecs)</li> - <li><strong>libaom / SVT-AV1:</strong> (Reference and production C libraries for AV1)</li> - <li><strong>Programming Language Built-ins/Standard Libraries:</strong> - <ul> - <li>Python: <code>zlib</code>, <code>gzip</code>, <code>bz2</code>, <code>lzma</code>. Third-party: <code>python-zstandard</code>, <code>brotli</code>.</li> - <li>Java: <code>java.util.zip</code> (Deflate, GZIP). Third-party for others.</li> - </ul> - </li> - </ul> - <h4>Applications:</h4> - <ul> - <li>Image Editors (GIMP, Photoshop): Support JPEG, PNG, GIF, TIFF, WebP, AVIF etc.</li> - <li>Video Editors (DaVinci Resolve, Premiere Pro): Support various professional and consumer video codecs.</li> - <li>Audio Editors (Audacity, Audition): Support WAV, MP3, AAC, Ogg Vorbis, FLAC, Opus.</li> - <li>Game Engines (e.g., Unity, Unreal Engine): Support various texture compression formats (ASTC, BCn/S3TC, ETC) and audio codecs.</li> - </ul> - </section> - - <section id="practical-domains"> - <h3>C. Application Domains with Unique Needs</h3> - <ul> - <li><strong>Databases:</strong> Often use specialized techniques like dictionary encoding, bit-packing, delta encoding, RLE, and general-purpose algorithms (LZ4, Zstd) for columnar or row-based data to reduce storage and improve query I/O.</li> - <li><strong>Network Traffic:</strong> HTTP compression (Gzip, Brotli, Zstd) for web assets. Real-time protocols (RTP) for VoIP/video conferencing use codecs like Opus, G.7xx, H.264, AV1.</li> - <li><strong>Medical Imaging (e.g., DICOM standard):</strong> Often requires lossless (e.g., JPEG-LS, RLE, lossless JPEG 2000) or "visually lossless" lossy compression (JPEG 2000) to preserve diagnostic integrity.</li> - <li><strong>Genomic Data (e.g., FASTQ, BAM/CRAM):</strong> Specialized compressors (e.g., CRAM using LZMA/Arithmetic, Spring) are designed for highly repetitive sequence data.</li> - <li><strong>Log Files & Text Data:</strong> Algorithms like Zstd and Brotli excel due to their speed and high ratios on repetitive text.</li> - <li><strong>Archival & Backups:</strong> Emphasis on high compression ratios and data integrity. Algorithms like Zstd (high levels), LZMA/LZMA2 (used in 7z, xz), and Bzip2 are common.</li> - <li><strong>Scientific Data (e.g., HDF5, NetCDF):</strong> Often use chunking and support pluggable compression filters (like Deflate, Zstd, specialized lossless/lossy methods for floating-point data) to balance access speed and storage.</li> - </ul> - </section> - - <section id="practical-trends"> - <h3>D. Emerging Trends / Newer Algorithms</h3> - <ul> - <li><strong>AI/Neural Network-Based Compression:</strong> Active research area for images, video, and audio. Shows potential for very high compression ratios by learning data representations. Often computationally very expensive for training and sometimes for inference. Examples: (Research stage) VVC includes some AI-driven tools, some proprietary AI image/video enhancers/compressors.</li> - <li><strong>Perceptual Video Coding (e.g., VVC - Versatile Video Coding):</strong> The latest MPEG standard, aiming for ~30-50% improvement over HEVC. Incorporates more advanced techniques, some AI-influenced.</li> - <li><strong>Specialized Hardware Acceleration:</strong> Increasing availability of hardware encoders/decoders for newer codecs (AV1, VVC) in CPUs, GPUs, and dedicated ASICs, crucial for their practical adoption.</li> - <li><strong>Continued Focus on Speed AND Ratio:</strong> Algorithms like Zstd demonstrate that high speed and good ratios are not always mutually exclusive.</li> - <li><strong>Focus on Semantic Compression:</strong> Compressing based on the <em>meaning</em> or content of the data rather than just statistical redundancy, especially in AI-driven compression (e.g., transmitting a description of an image rather than pixels).</li> - <li><strong>Compression for Privacy:</strong> Techniques that aim to compress data while also providing some level of privacy, e.g., through homomorphic encryption friendly compression or by obfuscating sensitive parts during compression. (More niche/research but a growing consideration).</li> - </ul> - </section> - - <section id="practical-processing"> - <h3>E. Pre-processing and Post-processing in Compression</h3> - <ul> - <li><strong>Pre-processing:</strong> Steps taken before compression to make data more compressible. - <ul> - <li><em>Examples:</em> Normalization, removing noise (for lossy), data transformation (like BWT), reordering data fields, color space transformation.</li> - </ul> - </li> - <li><strong>Post-processing:</strong> Steps taken after decompression to enhance quality or reverse pre-processing steps. - <ul> - <li><em>Examples:</em> Deblocking filters (common in video codecs like H.264/AV1), deringing filters, error concealment, inverse color space transformation.</li> - </ul> - </li> - </ul> - </section> - </article> - - <article id="standards-bodies"> - <h2>V. Standards Bodies (Brief Mention)</h2> - <ul> - <li><strong>MPEG (Moving Picture Experts Group):</strong> Develops standards for audio and video (e.g., JPEG, MPEG-2, H.264, H.265, VVC, MP3, AAC). Part of ISO/IEC.</li> - <li><strong>ITU-T (International Telecommunication Union - Telecommunication Standardization Sector):</strong> Develops video coding standards, often jointly with MPEG (e.g., H.26x series).</li> - <li><strong>IETF (Internet Engineering Task Force):</strong> Develops standards for internet protocols, including codecs for real-time communication (e.g., Opus, AV1 (via AOMedia)).</li> - <li><strong>AOMedia (Alliance for Open Media):</strong> Consortium developing royalty-free video codecs like AV1.</li> - <li><strong>ISO (International Organization for Standardization) & IEC (International Electrotechnical Commission):</strong> General standards bodies, often publishing MPEG work.</li> - <li><strong>W3C (World Wide Web Consortium):</strong> Standardizes web technologies, including formats like WebP, PNG, and font compression (WOFF/WOFF2 using Brotli/Zopfli).</li> - </ul> - </article> + </div> + <p class="text-muted small mt-2"><em><strong>Note:</strong> Ratio & Speed are relative and can vary based on data, settings, and implementation.</em></p> + </div> + + <!-- I. Foundational Theory --> + <div class="schema-container" data-category="foundations" id="section-foundations"> + <h2 class="section-title"><i class="bi bi-lightbulb-fill"></i> I. Foundational Theory</h2> + <div class="row"> + <!-- Core Concepts Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="foundations"> + <div class="card-body"> + <h5><i class="bi bi-bullseye"></i> Core Concepts</h5> + <div class="card-content-wrapper"> + <p class="summary">Fundamental ideas like "What is compression?", its necessity, entropy, and types of redundancy.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseCoreConcepts" aria-expanded="false" aria-controls="collapseCoreConcepts"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseCoreConcepts"> + <h6>What is Data Compression?</h6> + <p>The process of reducing the size of data (number of bits) to store or transmit it more efficiently.</p> + <h6>Why Compress?</h6> + <ul> + <li><strong>Storage Savings:</strong> Store more data in the same space.</li> + <li><strong>Faster Data Transmission:</strong> Reduce time and bandwidth needed to transfer data.</li> + <li><strong>Reduced Costs:</strong> Lower expenses for storage and bandwidth.</li> + </ul> + <h6>Information Theory Basics:</h6> + <ul> + <li><strong>Entropy:</strong> A measure of the inherent randomness or uncertainty in data. Represents the theoretical lower bound for compression.</li> + <li><strong>Redundancy:</strong> Information that is repeated or predictable. Types include: + <ul> + <li><span class="term" data-bs-toggle="tooltip" title="Correlation between neighboring data points (e.g., pixels in an image).">Spatial Redundancy</span></li> + <li><span class="term" data-bs-toggle="tooltip" title="Correlation between successive data points in time (e.g., frames in a video).">Temporal Redundancy</span></li> + <li><span class="term" data-bs-toggle="tooltip" title="Some symbols or patterns occur more frequently than others.">Statistical/Symbol Redundancy</span></li> + <li><span class="term" data-bs-toggle="tooltip" title="Information that human senses do not easily perceive or that is less important (exploited by lossy compression).">Perceptual Redundancy</span></li> + </ul> + </li> + </ul> + </div> + </div> + </div> + + <!-- Limits of Compression Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="foundations"> + <div class="card-body"> + <h5><i class="bi bi-cone-striped"></i> Limits of Compression</h5> + <div class="card-content-wrapper"> + <p class="summary">Understanding incompressibility and the theoretical boundaries set by Rate-Distortion Theory for lossy compression.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseLimitsCompression" aria-expanded="false" aria-controls="collapseLimitsCompression"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseLimitsCompression"> + <h6>Incompressibility</h6> + <p>Truly random data (or data that appears random, like encrypted or already well-compressed data) cannot be significantly compressed further by lossless methods. Applying a lossless compressor might even slightly increase size due to overhead.</p> + <h6>Rate-Distortion Theory (Lossy)</h6> + <p>A mathematical framework defining the trade-off between compression <span class="term" data-bs-toggle="tooltip" title="Number of bits used to represent the data after compression.">rate</span> (bits used) and <span class="term" data-bs-toggle="tooltip" title="The amount of information lost or fidelity degradation in lossy compression.">distortion</span> (loss of fidelity). It sets the minimum achievable rate for a given distortion level, guiding lossy codec design.</p> + </div> + </div> + </div> + + + <!-- Fundamental Classifications Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="foundations"> + <div class="card-body"> + <h5><i class="bi bi-diagram-2-fill"></i> Classifications</h5> + <div class="card-content-wrapper"> + <p class="summary">Key distinctions: Lossless vs. Lossy compression, and the concept of Near-Lossless compression.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseClassifications" aria-expanded="false" aria-controls="collapseClassifications"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseClassifications"> + <h6>Lossless vs. Lossy</h6> + <table class="table table-sm"> + <thead><tr><th>Feature</th><th>Lossless</th><th>Lossy</th></tr></thead> + <tbody> + <tr><td>Reconstruction</td><td>Perfect</td><td>Imperfect (Approximation)</td></tr> + <tr><td>Info Loss</td><td>None</td><td>Yes (irreversible)</td></tr> + <tr><td>Typical Ratio</td><td>Moderate (2:1-4:1)</td><td>High (10:1-100:1+)</td></tr> + <tr><td>Use Cases</td><td>Text, code, archives</td><td>Multimedia (images, audio, video)</td></tr> + </tbody> + </table> + <h6>Near-Lossless Compression</h6> + <p>A specialized category where decompressed data isn't identical, but differences are strictly bounded and often imperceptible (e.g., some scientific data, medical imaging).</p> + </div> + </div> + </div> + + <!-- Evaluation Metrics Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="foundations"> + <div class="card-body"> + <h5><i class="bi bi-graph-up-arrow"></i> Evaluation Metrics</h5> + <div class="card-content-wrapper"> + <p class="summary">How compression algorithms are measured: Ratio, Speed, Cost, Fidelity, Asymmetry, Robustness.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseMetrics" aria-expanded="false" aria-controls="collapseMetrics"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseMetrics"> + <ul> + <li><strong>Compression Ratio/Savings:</strong> <code>Original Size / Compressed Size</code> or <code>(1 - CS/OS) * 100%</code>.</li> + <li><strong>Compression/Decompression Speed:</strong> Rate of data processing (e.g., MB/s).</li> + <li><strong>Computational Cost:</strong> CPU, memory usage.</li> + <li><strong>Fidelity/Quality (Lossy):</strong> Objective (PSNR, SSIM) or subjective perception.</li> + <li><strong>Asymmetry:</strong> Difference in computational cost between compression and decompression.</li> + <li><strong>Robustness to Errors:</strong> How well a compressed stream recovers from bit errors.</li> + </ul> + </div> + </div> + </div> + + <!-- Basic Principles/Techniques Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="foundations" data-category-alt="techniques"> <!-- techniques for filtering --> + <div class="card-body"> + <h5><i class="bi bi-tools"></i> Basic Principles/Techniques</h5> + <div class="card-content-wrapper"> + <p class="summary">Underlying methods used in many algorithms: Dictionary-based, Statistical, Transform, RLE, Predictive, Context Modeling, BWT, Delta Coding.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapsePrinciples" aria-expanded="false" aria-controls="collapsePrinciples"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapsePrinciples"> + <ul> + <li><strong>Dictionary-Based:</strong> Replaces sequences with references (e.g., LZ77, LZW). <div class="placeholder-diagram">Diagram: Sliding Window & Dictionary</div></li> + <li><strong>Statistical Modeling:</strong> Shorter codes for frequent symbols (e.g., Huffman, Arithmetic). <div class="placeholder-diagram">Diagram: Huffman Tree Example</div></li> + <li><strong>Transform Coding:</strong> Converts data to a more compressible domain (e.g., DCT in JPEG). <div class="placeholder-diagram">Diagram: DCT Block Transformation</div></li> + <li><strong>Run-Length Encoding (RLE):</strong> Replaces sequences of identical symbols (e.g., <code>AAAAA</code> -> <code>5A</code>).</li> + <li><strong>Predictive Coding:</strong> Encodes difference from predicted value.</li> + <li><strong>Context Modeling:</strong> Estimates symbol probability based on preceding symbols.</li> + <li><strong>Burrows-Wheeler Transform (BWT):</strong> Reversible transform grouping similar characters (used in bzip2).</li> + <li><strong>Delta Coding:</strong> Stores difference between consecutive data elements.</li> + </ul> + </div> + </div> + </div> + </div> <!-- /.row --> + </div> <!-- /.schema-container#foundations --> + + + <!-- II. Lossless Compression Algorithms --> + <div class="schema-container" data-category="lossless" id="section-lossless"> + <h2 class="section-title"><i class="bi bi-file-earmark-check-fill"></i> II. Lossless Algorithms</h2> + <div class="row"> + <!-- RLE Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-front"></i> Run-Length Encoding (RLE) <span class="context-tag">Classic</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Simple technique replacing consecutive identical data values with a count and the value.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseRLE" aria-expanded="false" aria-controls="collapseRLE"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseRLE"> + <h6>Core Idea:</h6> <p>Replaces runs of identical data with a count and the single data value (e.g., <code>WWWWBB</code> -> <code>4W2B</code>).</p> + <h6>Use Cases:</h6> <p>Simple graphics, icons, fax transmissions, bitmap images (BMP), TIFF.</p> + <h6>Strengths:</h6> <p>Very simple, computationally inexpensive, fast.</p> + <h6>Weaknesses:</h6> <p>Inefficient for data without long runs; can even increase file size.</p> + <h6>File Extensions:</h6> <p>Used within BMP, TIFF, PDF.</p> + </div> + </div> + </div> + <!-- Huffman Coding Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-tree-fill"></i> Huffman Coding <span class="context-tag">Classic</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Assigns variable-length codes based on symbol frequencies; more frequent symbols get shorter codes.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseHuffman" aria-expanded="false" aria-controls="collapseHuffman"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseHuffman"> + <h6>Core Idea:</h6> <p>Builds a prefix code tree where more frequent symbols have shorter paths (codes).</p> + <h6>Use Cases:</h6> <p>Component in Deflate (ZIP, GZIP), JPEG, MP3, PNG.</p> + <h6>Strengths:</h6> <p>Optimal per-symbol coding, relatively simple.</p> + <h6>Weaknesses:</h6> <p>Requires symbol frequencies beforehand (or two passes). Not adaptive by itself.</p> + <div class="placeholder-diagram">Diagram: Simple Huffman Tree Example</div> + </div> + </div> + </div> + <!-- LZ77/LZ78 Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-book-fill"></i> LZ77 & LZ78 <span class="context-tag">Classic</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Dictionary-based algorithms. LZ77 uses a sliding window; LZ78 builds an explicit dictionary.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseLZ7778" aria-expanded="false" aria-controls="collapseLZ7778"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseLZ7778"> + <h6>Core Idea:</h6> <p>Replace repeated sequences with references to previously seen data.</p> + <h6>Use Cases:</h6> <p>General-purpose text/data. Basis for Deflate (ZIP, GZIP).</p> + <h6>Strengths:</h6> <p>Adaptive, good compression ratios.</p> + <h6>Weaknesses:</h6> <p>Can be slower if not optimized.</p> + </div> + </div> + </div> + <!-- LZW Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-journals"></i> LZW (Lempel-Ziv-Welch) <span class="context-tag">Classic</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Builds a dictionary from data; outputs dictionary codes. Used in GIF.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseLZW" aria-expanded="false" aria-controls="collapseLZW"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseLZW"> + <h6>Core Idea:</h6> <p>Builds a string translation table from input data; outputs codes for encountered sequences.</p> + <h6>Use Cases:</h6> <p>GIF images, <code>compress</code> utility, TIFF, PDF.</p> + <h6>Strengths:</h6> <p>Simple, fast decompression.</p> + <h6>Weaknesses:</h6> <p>Patents (now expired). Less efficient than modern LZ variants.</p> + <h6>File Extensions:</h6> <p><code>.gif</code> (internally)</p> + </div> + </div> + </div> + <!-- bzip2 Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-arrow-repeat"></i> bzip2 <span class="context-tag">Archive</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Uses Burrows-Wheeler Transform (BWT) followed by MTF and Huffman coding. Good ratio, but slow.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseBzip2" aria-expanded="false" aria-controls="collapseBzip2"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseBzip2"> + <h6>Core Idea:</h6> <p>Reorders data using BWT to group similar characters, then compresses the transformed data.</p> + <h6>Use Cases:</h6> <p>General file compression, software distribution (common on Linux/Unix).</p> + <h6>Strengths:</h6> <p>Generally better compression ratios than Deflate.</p> + <h6>Weaknesses:</h6> <p>Significantly slower compression and decompression than Deflate, Zstd.</p> + <h6>File Extensions:</h6> <p><code>.bz2</code></p> + </div> + </div> + </div> + <!-- LZMA / LZMA2 Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-stack"></i> LZMA / LZMA2 <span class="context-tag">Archive</span></h5> + <div class="card-content-wrapper"> + <p class="summary">LZ77 variant with large dictionary and range encoding. Very high ratios, but can be slow and memory-intensive.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseLZMA" aria-expanded="false" aria-controls="collapseLZMA"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseLZMA"> + <h6>Core Idea:</h6> <p>Combines an LZ77-like dictionary coder with sophisticated probability modeling (range coder).</p> + <h6>Use Cases:</h6> <p>Default for 7-Zip (<code>.7z</code>), XZ Utils (<code>.xz</code>). Software distribution, large archives.</p> + <h6>Strengths:</h6> <p>Very high compression ratios.</p> + <h6>Weaknesses:</h6> <p>Slow compression, high memory usage. Decompression is faster but not top-tier.</p> + <h6>File Extensions:</h6> <p><code>.7z</code>, <code>.xz</code></p> + </div> + </div> + </div> + <!-- Deflate Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-file-zip-fill"></i> Deflate (LZ77 + Huffman) <span class="context-tag">General</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Combines LZ77 and Huffman coding. Widely used in ZIP, GZIP, PNG.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseDeflate" aria-expanded="false" aria-controls="collapseDeflate"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseDeflate"> + <h6>Core Idea:</h6> <p>Finds duplicate strings with LZ77, then compresses literals and LZ77 output with Huffman.</p> + <h6>Use Cases:</h6> <p><code>.zip</code>, <code>.gz</code> files, PNG images, HTTP compression.</p> + <h6>Strengths:</h6> <p>Good balance of speed and ratio, widely adopted.</p> + <h6>Weaknesses:</h6> <p>Outperformed in ratio and/or speed by modern algorithms like Zstd, Brotli.</p> + <h6>File Extensions:</h6> <p><code>.zip</code>, <code>.gz</code>, <code>.png</code> (internally)</p> + </div> + </div> + </div> + <!-- Arithmetic Coding Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-calculator-fill"></i> Arithmetic Coding <span class="context-tag">Advanced</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Encodes entire message as a single fraction. Achieves near-optimal compression.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseArithmetic" aria-expanded="false" aria-controls="collapseArithmetic"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseArithmetic"> + <h6>Core Idea:</h6> <p>Represents the input data as a single fractional number in the range [0,1). More efficient than Huffman for skewed probabilities.</p> + <h6>Use Cases:</h6> <p>Component in JPEG 2000, H.264/AVC, some bzip2 variants.</p> + <h6>Strengths:</h6> <p>Higher compression efficiency than Huffman, especially for skewed probabilities.</p> + <h6>Weaknesses:</h6> <p>More computationally complex, historically patent-encumbered.</p> + </div> + </div> + </div> + <!-- Brotli Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-wind"></i> Brotli <span class="context-tag">Modern</span> <span class="context-tag">Web</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Modern algorithm (LZ77 + Huffman + Context Modeling + Static Dictionary). Excellent for web text.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseBrotli" aria-expanded="false" aria-controls="collapseBrotli"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseBrotli"> + <h6>Core Idea:</h6> <p>Uses LZ77, Huffman coding, 2nd order context modeling, and a large pre-defined static dictionary.</p> + <h6>Use Cases:</h6> <p>Web content (HTTP compression), WOFF2 fonts. Excels on text.</p> + <h6>Strengths:</h6> <p>Excellent compression ratios, fast decompression.</p> + <h6>Weaknesses:</h6> <p>Compression can be slower than Gzip/Zstd, though offers quality levels.</p> + <h6>File Extensions:</h6> <p><code>.br</code> (files), <code>br</code> (HTTP content encoding)</p> + </div> + </div> + </div> + <!-- Zstandard Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-speedometer2"></i> Zstandard (Zstd) <span class="context-tag">Modern</span> <span class="context-tag">Fast</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Modern algorithm (LZ77 variant + ANS/FSE). Very fast with good ratios. Highly flexible.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseZstd" aria-expanded="false" aria-controls="collapseZstd"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseZstd"> + <h6>Core Idea:</h6> <p>Combines an LZ77-variant with a fast entropy stage (Finite State Entropy - FSE, an Asymmetric Numeral System variant).</p> + <h6>Use Cases:</h6> <p>General-purpose, databases (MySQL, RocksDB), file systems (ZFS, Btrfs), real-time, archives (<code>.tar.zst</code>).</p> + <h6>Strengths:</h6> <p>Very fast compression/decompression, flexible levels, good ratios, dictionary support.</p> + <h6>Weaknesses:</h6> <p>Newer, so adoption still growing vs. Deflate (though rapidly).</p> + <h6>File Extensions:</h6> <p><code>.zst</code></p> + </div> + </div> + </div> + <!-- PPM Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-puzzle-fill"></i> Prediction by Partial Matching (PPM) <span class="context-tag">High Ratio</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Adaptive statistical technique using context modeling and arithmetic coding. High ratios, but slow.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapsePPM" aria-expanded="false" aria-controls="collapsePPM"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapsePPM"> + <h6>Core Idea:</h6> <p>Uses preceding symbols (context) to predict the next symbol's probability for arithmetic coding.</p> + <h6>Use Cases:</h6> <p>Text compression, general-purpose where ratio is paramount.</p> + <h6>Strengths:</h6> <p>Very high compression ratios, adaptive.</p> + <h6>Weaknesses:</h6> <p>Computationally expensive (CPU and memory), can be very slow.</p> + </div> + </div> + </div> + <!-- FLAC Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-music-note-beamed"></i> FLAC <span class="context-tag">Audio</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Lossless audio compression using linear prediction and Golomb-Rice coding.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseFLAC" aria-expanded="false" aria-controls="collapseFLAC"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseFLAC"> + <h6>Core Idea:</h6> <p>Models audio signal with linear prediction, encodes residual error.</p> + <h6>Use Cases:</h6> <p>Archival of music, high-fidelity audio playback.</p> + <h6>Strengths:</h6> <p>Good audio compression (30-60% reduction), royalty-free, widely supported.</p> + <h6>Weaknesses:</h6> <p>Specifically for audio.</p> + <h6>File Extensions:</h6> <p><code>.flac</code>, <code>.fla</code></p> + </div> + </div> + </div> + <!-- ALAC Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossless"> + <div class="card-body"> + <h5><i class="bi bi-apple"></i> ALAC (Apple Lossless) <span class="context-tag">Audio</span> <span class="context-tag apple">Apple</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Apple's lossless audio codec, also using linear prediction.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseALAC" aria-expanded="false" aria-controls="collapseALAC"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseALAC"> + <h6>Core Idea:</h6> <p>Similar to FLAC, uses linear prediction with different parameters and entropy coding.</p> + <h6>Use Cases:</h6> <p>Used within Apple's ecosystem (Apple Music Lossless, iTunes libraries).</p> + <h6>Strengths:</h6> <p>Similar compression to FLAC, well-integrated in Apple products.</p> + <h6>Weaknesses:</h6> <p>Less universal support outside Apple ecosystem compared to FLAC.</p> + <h6>File Extensions:</h6> <p><code>.m4a</code> (when containing ALAC)</p> + </div> + </div> + </div> + </div> <!-- /.row --> + </div> <!-- /.schema-container#lossless --> + + <!-- III. Lossy Compression Algorithms --> + <div class="schema-container" data-category="lossy" id="section-lossy"> + <h2 class="section-title"><i class="bi bi-file-earmark-minus-fill"></i> III. Lossy Algorithms</h2> + <div class="row"> + <!-- JPEG Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-image-fill"></i> JPEG <span class="context-tag">Image</span> <span class="context-tag">Classic</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Widely used for photographic images. Uses DCT, quantization, and Huffman/Arithmetic coding.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseJPEG" aria-expanded="false" aria-controls="collapseJPEG"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseJPEG"> + <h6>Core Idea:</h6> <p>Transforms 8x8 pixel blocks using Discrete Cosine Transform (DCT), quantizes coefficients, then entropy codes.</p> + <h6>Use Cases:</h6> <p>Still images (photographs). Very common on the web.</p> + <h6>Strengths:</h6> <p>Widely supported, good for photos at reasonable quality.</p> + <h6>Weaknesses:</h6> <p>Blocking artifacts at low quality, not ideal for sharp lines/text.</p> + <h6>Parameters:</h6> <p>Quality setting (1-100), chroma subsampling.</p> + <h6>File Extensions:</h6> <p><code>.jpg</code>, <code>.jpeg</code></p> + <div class="placeholder-diagram">Diagram: JPEG 8x8 DCT Block Processing</div> + </div> + </div> + </div> + <!-- HEIC Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-phone-fill"></i> HEIC (High Efficiency Image Format) <span class="context-tag">Image</span> <span class="context-tag apple">Apple Default</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Apple's default image format. Uses HEVC/H.265 for image data, offers better compression than JPEG.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseHEIC" aria-expanded="false" aria-controls="collapseHEIC"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseHEIC"> + <h6>Core Idea:</h6> <p>Stores HEVC-encoded image data within an HEIF container. Offers better compression than JPEG for similar quality.</p> + <h6>Use Cases:</h6> <p>Default image capture on modern iPhones/iPads. Growing support on other platforms.</p> + <h6>Strengths:</h6> <p>~50% smaller file size than JPEG for similar quality. Supports transparency, animations, depth maps, Live Photos.</p> + <h6>Weaknesses:</h6> <p>Not as universally supported as JPEG yet, though adoption is increasing. Can have licensing considerations (HEVC patents).</p> + <h6>Parameters:</h6> <p>Typically managed by capture device settings.</p> + <h6>File Extensions:</h6> <p><code>.heic</code>, <code>.heif</code></p> + </div> + </div> + </div> + <!-- ProRes Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-film"></i> Apple ProRes <span class="context-tag">Video</span> <span class="context-tag apple">Pro Video</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Family of high-quality, lossy (visually lossless to near-lossless) video codecs for professional post-production.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseProRes" aria-expanded="false" aria-controls="collapseProRes"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseProRes"> + <h6>Core Idea:</h6> <p>Intra-frame DCT-based codecs optimized for editing performance and high image fidelity.</p> + <h6>Use Cases:</h6> <p>Video acquisition (iPhone Cinematic Mode), professional video editing (Final Cut Pro), intermediate/mastering format.</p> + <h6>Strengths:</h6> <p>Excellent image quality, robust editing performance, supports alpha channels (ProRes 4444), multiple data rates/quality levels.</p> + <h6>Weaknesses:</h6> <p>Large file sizes compared to distribution codecs (H.264/HEVC). Not intended for final delivery to end-users.</p> + <h6>Variants:</h6> <p>ProRes Proxy, LT, 422, 422 HQ, 4444, 4444 XQ, ProRes RAW.</p> + <h6>File Extensions:</h6> <p><code>.mov</code> (QuickTime container)</p> + </div> + </div> + </div> + <!-- JPEG 2000 Card (Moved for alphabetical, if preferred) --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-aspect-ratio-fill"></i> JPEG 2000 <span class="context-tag">Image</span> <span class="context-tag">Specialized</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Uses wavelet transform. Better quality than JPEG at high compression, supports lossless.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseJPEG2000" aria-expanded="false" aria-controls="collapseJPEG2000"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseJPEG2000"> + <h6>Core Idea:</h6> <p>Applies wavelet transform to entire image or large tiles, offering progressive decoding.</p> + <h6>Use Cases:</h6> <p>Medical imaging (DICOM), digital cinema, archival.</p> + <h6>Strengths:</h6> <p>Better quality than JPEG at high compression, lossless option, ROI coding.</p> + <h6>Weaknesses:</h6> <p>More complex, less native software support than JPEG, computationally intensive.</p> + <h6>Parameters:</h6> <p>Compression ratio/bitrate, quality layers, lossless/lossy.</p> + <h6>File Extensions:</h6> <p><code>.jp2</code>, <code>.j2k</code></p> + </div> + </div> + </div> + <!-- WebP Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-globe"></i> WebP <span class="context-tag">Image</span> <span class="context-tag">Web</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Google's image format. Lossy mode uses VP8-based prediction; also supports lossless, animation, transparency.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseWebP" aria-expanded="false" aria-controls="collapseWebP"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseWebP"> + <h6>Core Idea:</h6> <p>Lossy uses intra-frame prediction from VP8 video; lossless uses different techniques (spatial prediction, LZ77).</p> + <h6>Use Cases:</h6> <p>Web images, aiming to replace JPEG, PNG, GIF.</p> + <h6>Strengths:</h6> <p>Better compression than JPEG (lossy) and PNG (lossless). Supports animation & alpha.</p> + <h6>Weaknesses:</h6> <p>Lossy quality can be debated vs. highly optimized JPEGs or newer AVIF.</p> + <h6>Parameters:</h6> <p>Quality setting (lossy), effort setting (lossless).</p> + <h6>File Extensions:</h6> <p><code>.webp</code></p> + </div> + </div> + </div> + <!-- AVIF Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-image-alt"></i> AVIF (AV1 Image Format) <span class="context-tag">Image</span> <span class="context-tag">Next-Gen Web</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Image format using AV1 video intra-frame coding. Excellent efficiency, supports HDR.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseAVIF" aria-expanded="false" aria-controls="collapseAVIF"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseAVIF"> + <h6>Core Idea:</h6> <p>Leverages AV1 video compression techniques for still images, stored in HEIF container.</p> + <h6>Use Cases:</h6> <p>Web images, aiming for superior quality/ratio over JPEG/WebP.</p> + <h6>Strengths:</h6> <p>Significantly better compression than JPEG/WebP. Supports HDR, wide color gamut, lossless, animation. Royalty-free.</p> + <h6>Weaknesses:</h6> <p>Newer, software/browser support still growing. Can be computationally demanding.</p> + <h6>Parameters:</h6> <p>Quality setting (quantizer), speed/effort.</p> + <h6>File Extensions:</h6> <p><code>.avif</code></p> + </div> + </div> + </div> + + <!-- MPEG Family Overview Card (Informational) --> + <div class="col-12 mb-4"> <!-- Full width for overview --> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5 class="text-center"><i class="bi bi-film"></i> MPEG Family (Video & Audio)</h5> + <div class="card-content-wrapper"> + <p class="summary text-center">The Moving Picture Experts Group (MPEG) has developed a suite of widely adopted standards for audio and video compression. Key video codecs include H.264/AVC and H.265/HEVC. Key audio codecs include MP3 and AAC. Details for prominent members are in individual cards below.</p> + </div> + </div> + </div> + </div> + + <!-- H.264/AVC Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-camera-reels-fill"></i> H.264/AVC (MPEG-4 Part 10) <span class="context-tag">Video</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Widely used video codec. Excellent balance of quality and compression. Used in Blu-ray, streaming.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseH264" aria-expanded="false" aria-controls="collapseH264"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseH264"> + <h6>Core Idea:</h6> <p>Advanced video coding with flexible macroblocks, improved prediction, in-loop deblocking filter.</p> + <h6>Use Cases:</h6> <p>Blu-ray, streaming, video conferencing, most web video.</p> + <h6>Strengths:</h6> <p>Excellent quality/ratio balance, wide hardware support.</p> + <h6>Weaknesses:</h6> <p>More complex than MPEG-2, royalty-bearing.</p> + <h6>Parameters:</h6> <p>Bitrate, profiles (Baseline, Main, High), levels, GOP settings.</p> + <h6>File Extensions:</h6> <p><code>.mp4</code>, <code>.mkv</code>, <code>.mov</code></p> + </div> + </div> + </div> + <!-- H.265/HEVC Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-badge-4k-fill"></i> H.265/HEVC <span class="context-tag">Video</span> <span class="context-tag apple">Apple Used</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Successor to H.264, roughly 2x efficiency. Used for 4K/UHD content. Apple default video codec.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseH265" aria-expanded="false" aria-controls="collapseH265"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseH265"> + <h6>Core Idea:</h6> <p>Larger coding units (CTUs), improved prediction modes, Sample Adaptive Offset (SAO) filtering.</p> + <h6>Use Cases:</h6> <p>4K/UHD Blu-ray, high-resolution streaming. Default video on modern iPhones/iPads.</p> + <h6>Strengths:</h6> <p>Significantly better compression than H.264.</p> + <h6>Weaknesses:</h6> <p>More complex, licensing was complicated (improving).</p> + <h6>Parameters:</h6> <p>Bitrate, profiles (Main, Main10), tiers, levels.</p> + <h6>File Extensions:</h6> <p><code>.mp4</code>, <code>.mkv</code>, <code>.mov</code></p> + </div> + </div> + </div> + <!-- VP9 Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-youtube"></i> VP9 <span class="context-tag">Video</span> <span class="context-tag">Web</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Google's open and royalty-free video codec. Widely used by YouTube, competitor to H.265.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseVP9" aria-expanded="false" aria-controls="collapseVP9"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseVP9"> + <h6>Core Idea:</h6> <p>Advanced video coding techniques, designed for web streaming and real-time communication.</p> + <h6>Use Cases:</h6> <p>YouTube, WebRTC, other streaming services.</p> + <h6>Strengths:</h6> <p>Good compression efficiency (comparable to early H.265), royalty-free, strong browser support.</p> + <h6>Weaknesses:</h6> <p>Largely being superseded by AV1 for top-tier efficiency.</p> + <h6>File Extensions:</h6> <p>Often in <code>.webm</code>, <code>.mp4</code>.</p> + </div> + </div> + </div> + <!-- AV1 Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-broadcast-pin"></i> AV1 (AOMedia Video 1) <span class="context-tag">Video</span> <span class="context-tag apple">Apple Support</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Royalty-free, open-source video codec. Aims for better efficiency than HEVC. Growing in web streaming. Apple adds hardware decode support.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseAV1" aria-expanded="false" aria-controls="collapseAV1"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseAV1"> + <h6>Core Idea:</h6> <p>Advanced techniques: larger superblocks, sophisticated prediction, CDEF/loop restoration filters.</p> + <h6>Use Cases:</h6> <p>Web streaming (YouTube, Netflix, Twitch), real-time communications (WebRTC).</p> + <h6>Strengths:</h6> <p>Excellent compression (better than HEVC), royalty-free. Apple hardware decoding from A17 Pro/M3 chips.</p> + <h6>Weaknesses:</h6> <p>Very computationally intensive to encode (improving), decode can also be heavy without hardware support.</p> + <h6>Parameters:</h6> <p>Bitrate, quality settings (CRF), speed presets.</p> + <h6>File Extensions:</h6> <p><code>.mkv</code>, <code>.webm</code>, <code>.mp4</code> (with ISOBMFF)</p> + </div> + </div> + </div> + <!-- MP3 Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-file-earmark-music-fill"></i> MP3 (MPEG-1 Audio Layer III) <span class="context-tag">Audio</span> <span class="context-tag">Classic</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Ubiquitous audio codec. Uses psychoacoustic model, MDCT, quantization, Huffman.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseMP3" aria-expanded="false" aria-controls="collapseMP3"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseMP3"> + <h6>Core Idea:</h6> <p>Discards parts of audio signal less perceptible to human hearing using psychoacoustic models.</p> + <h6>Use Cases:</h6> <p>Digital audio, music files, podcasts.</p> + <h6>Strengths:</h6> <p>Ubiquitous support, good quality at moderate bitrates.</p> + <h6>Weaknesses:</h6> <p>Older, less efficient than AAC/Opus. Audible artifacts at low bitrates.</p> + <h6>Parameters:</h6> <p>Bitrate (CBR/VBR, e.g., 128, 192, 320 kbps).</p> + <h6>File Extensions:</h6> <p><code>.mp3</code></p> + </div> + </div> + </div> + <!-- AAC Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-music-player-fill"></i> AAC (Advanced Audio Coding) <span class="context-tag">Audio</span> <span class="context-tag apple">Apple Standard</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Successor to MP3, better quality at same bitrate. Standard for Apple Music, iTunes.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseAAC" aria-expanded="false" aria-controls="collapseAAC"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseAAC"> + <h6>Core Idea:</h6> <p>Improved psychoacoustic model, MDCT with better windowing, more efficient coding techniques (TNS, PNS).</p> + <h6>Use Cases:</h6> <p>Apple Music/iTunes, YouTube, streaming, digital radio (DAB+).</p> + <h6>Strengths:</h6> <p>Better quality than MP3 at same bitrate, especially lower bitrates.</p> + <h6>Weaknesses:</h6> <p>Several variants (AAC-LC, HE-AAC) can cause confusion.</p> + <h6>Parameters:</h6> <p>Bitrate, profiles (LC, HE, HEv2).</p> + <h6>File Extensions:</h6> <p><code>.aac</code>, <code>.m4a</code>, <code>.mp4</code></p> + </div> + </div> + </div> + <!-- Opus Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-mic-fill"></i> Opus <span class="context-tag">Audio</span> <span class="context-tag apple">Apple Support</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Royalty-free, versatile (speech & music), low latency. Excellent for VoIP, streaming. Supported by Apple.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseOpus" aria-expanded="false" aria-controls="collapseOpus"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseOpus"> + <h6>Core Idea:</h6> <p>Combines SILK (speech) and CELT (music) algorithms, dynamically switching or combining.</p> + <h6>Use Cases:</h6> <p>VoIP, video conferencing (WebRTC default), game chat, streaming, audiobooks. Used by FaceTime audio.</p> + <h6>Strengths:</h6> <p>Excellent quality across wide bitrate range, very low delay, royalty-free, adaptive.</p> + <h6>Weaknesses:</h6> <p>Less ubiquitous for stored music vs. MP3/AAC (though growing).</p> + <h6>Parameters:</h6> <p>Bitrate, application type (VoIP, Audio, Low-Delay).</p> + <h6>File Extensions:</h6> <p><code>.opus</code> (often in <code>.ogg</code> or <code>.webm</code>)</p> + </div> + </div> + </div> + <!-- Vorbis Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-soundwave"></i> Vorbis (Ogg Vorbis) <span class="context-tag">Audio</span> <span class="context-tag">Open Source</span></h5> + <div class="card-content-wrapper"> + <p class="summary">Open-source, patent-free audio format. Good quality, popular in open-source applications.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseVorbis" aria-expanded="false" aria-controls="collapseVorbis"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseVorbis"> + <h6>Core Idea:</h6> <p>Uses Modified Discrete Cosine Transform (MDCT), vector quantization, and codebook-based entropy encoding.</p> + <h6>Use Cases:</h6> <p>Open-source software, indie games, some streaming (historically Spotify).</p> + <h6>Strengths:</h6> <p>Good quality, royalty-free and open.</p> + <h6>Weaknesses:</h6> <p>Less efficient than Opus/modern AAC at very low bitrates. Hardware support less widespread.</p> + <h6>Parameters:</h6> <p>Quality level (q -1.0 to 10.0), average bitrate.</p> + <h6>File Extensions:</h6> <p><code>.ogg</code>, <code>.oga</code></p> + </div> + </div> + </div> + <!-- Psychophysical Principles Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="lossy"> + <div class="card-body"> + <h5><i class="bi bi-ear-fill"></i> Psychovisual/Psychoacoustic Principles</h5> + <div class="card-content-wrapper"> + <p class="summary">How lossy codecs exploit human perception limits (auditory/frequency masking, luminance vs. chrominance sensitivity).</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapsePsychophysical" aria-expanded="false" aria-controls="collapsePsychophysical"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapsePsychophysical"> + <h6>Psychoacoustics (Audio)</h6> + <p>Lossy audio codecs (MP3, AAC, Opus) exploit auditory masking:</p> + <ul> + <li><strong>Frequency Masking:</strong> Louder sounds make quieter sounds at nearby frequencies inaudible.</li> + <li><strong>Temporal Masking:</strong> A loud sound masks quieter sounds immediately before (pre-masking) or after (post-masking) it.</li> + </ul> + <p>Codecs discard or heavily quantize information in masked regions.</p> + <h6>Psychovisuals (Image/Video)</h6> + <p>Lossy image/video codecs (JPEG, H.264) exploit Human Visual System (HVS) characteristics:</p> + <ul> + <li><strong>Luminance vs. Chrominance Sensitivity:</strong> Humans are more sensitive to brightness (luminance) than color (chrominance). <span class="term" data-bs-toggle="tooltip" title="Reduces color information (e.g., 4:2:0) as humans are less sensitive to color changes than brightness changes.">Chroma subsampling</span> reduces color info.</li> + <li><strong>Frequency Sensitivity:</strong> Less sensitive to high-frequency details. Transform coding allows selective quantization.</li> + <li><strong>Contrast Masking:</strong> Visual patterns can mask noise within those regions.</li> + </ul> + </div> + </div> + </div> + </div> <!-- /.row --> + </div> <!-- /.schema-container#lossy --> + + <!-- IV. Practical Considerations & Application --> + <div class="schema-container" data-category="practical" id="section-practical"> + <h2 class="section-title"><i class="bi bi-wrench-adjustable-circle-fill"></i> IV. Practical Considerations</h2> + <div class="row"> + <!-- Choosing Algorithm Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="practical"> + <div class="card-body"> + <h5><i class="bi bi-signpost-split-fill"></i> Choosing the Right Algorithm</h5> + <div class="card-content-wrapper"> + <p class="summary">Factors to consider: data type, loss tolerance, ratio, speed, resources, licensing, ecosystem.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseChoosingAlgo" aria-expanded="false" aria-controls="collapseChoosingAlgo"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseChoosingAlgo"> + <h6>Key Questions:</h6> + <ol> + <li>What data type? (Text, image, audio, video, binary)</li> + <li>Is loss acceptable? (Lossless vs. Lossy)</li> + <li>Primary goal? (Ratio, speed, quality, low cost)</li> + <li>Resource constraints? (CPU, RAM)</li> + <li>Target platform/ecosystem support?</li> + <li>Licensing/royalty concerns?</li> + <li>Energy consumption / battery life needs?</li> + <li>Standards compliance / interoperability needs?</li> + </ol> + <div class="placeholder-diagram">Flowchart: Decision Tree for Algorithm Selection</div> + <h6>General Guidelines:</h6> + <ul> + <li><strong>Text/Code:</strong> Zstd, Brotli, Gzip.</li> + <li><strong>Archives:</strong> Zstd (high levels), 7-Zip (LZMA2), XZ.</li> + <li><strong>Web Photos:</strong> JPEG, WebP, AVIF, HEIC (where supported).</li> + <!-- ... more guidelines ... --> + </ul> + </div> + </div> + </div> + <!-- Tools & Libraries Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="practical"> + <div class="card-body"> + <h5><i class="bi bi-gear-wide-connected"></i> Tools, Libraries & Software</h5> + <div class="card-content-wrapper"> + <p class="summary">Common archivers (gzip, 7-Zip), libraries (zlib, FFmpeg), and software implementing these algorithms.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseToolsLibs" aria-expanded="false" aria-controls="collapseToolsLibs"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseToolsLibs"> + <h6>Command-Line Archivers:</h6> + <ul> + <li><code>gzip</code>, <code>zip</code>, <code>7-Zip (7z)</code>, <code>tar</code> (with compressors)</li> + <li><code>brotli</code>, <code>zstd</code>, <code>bzip2</code>, <code>xz</code></li> + </ul> + <h6>Libraries for Developers:</h6> + <ul> + <li><strong>zlib:</strong> (C library for Deflate)</li> + <li><strong>libjpeg-turbo:</strong> (JPEG C library)</li> + <li><strong>FFmpeg:</strong> (Audio/video codecs library & tool)</li> + <!-- ... more libraries ... --> + </ul> + <h6>Applications:</h6> + <p>Image Editors (GIMP, Photoshop, Pixelmator), Video Editors (DaVinci, Premiere, Final Cut Pro), Audio Editors (Audacity, Logic Pro).</p> + </div> + </div> + </div> + <!-- Application Domains Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="practical"> + <div class="card-body"> + <h5><i class="bi bi-hdd-stack-fill"></i> Application Domains</h5> + <div class="card-content-wrapper"> + <p class="summary">Unique compression needs in databases, network traffic, medical imaging, genomics, archives, scientific data.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseAppDomains" aria-expanded="false" aria-controls="collapseAppDomains"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseAppDomains"> + <ul> + <li><strong>Databases:</strong> Columnar compression, delta encoding.</li> + <li><strong>Network Traffic:</strong> HTTP compression (Gzip, Brotli), real-time (Opus).</li> + <li><strong>Medical Imaging (DICOM):</strong> Lossless (JPEG-LS, RLE) or visually lossless (JPEG 2000).</li> + <li><strong>Genomics Data (FASTQ, CRAM):</strong> Specialized algorithms.</li> + <!-- ... more domains ... --> + </ul> + </div> + </div> + </div> + <!-- Emerging Trends Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="practical"> + <div class="card-body"> + <h5><i class="bi bi-rocket-launch-fill"></i> Emerging Trends</h5> + <div class="card-content-wrapper"> + <p class="summary">AI/Neural Network-based compression, perceptual video coding (VVC), hardware acceleration, semantic compression, privacy-aware compression.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapseEmergingTrends" aria-expanded="false" aria-controls="collapseEmergingTrends"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseEmergingTrends"> + <ul> + <li><strong>AI/Neural Network-Based Compression:</strong> Promising for images/video/audio, but often computationally expensive.</li> + <li><strong>Perceptual Video Coding (VVC):</strong> Latest MPEG standard, ~30-50% improvement over HEVC.</li> + <li><strong>Specialized Hardware Acceleration:</strong> For newer codecs (AV1, VVC).</li> + <li><strong>Focus on Semantic Compression:</strong> Compressing based on data *meaning*.</li> + <li><strong>Compression for Privacy:</strong> Emerging techniques.</li> + </ul> + </div> + </div> + </div> + <!-- Pre/Post Processing Card --> + <div class="col-lg-4 col-md-6 mb-4"> + <div class="info-card" data-category="practical"> + <div class="card-body"> + <h5><i class="bi bi-funnel-fill"></i> Pre/Post-processing</h5> + <div class="card-content-wrapper"> + <p class="summary">Steps taken before compression (e.g., normalization, BWT) or after decompression (e.g., deblocking filters) to improve results.</p> + <button class="btn details-toggle" type="button" data-bs-toggle="collapse" data-bs-target="#collapsePrePostProcessing" aria-expanded="false" aria-controls="collapsePrePostProcessing"> + Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapsePrePostProcessing"> + <h6>Pre-processing Examples:</h6> + <ul> + <li>Normalization, noise removal (for lossy), data transformation (like BWT), reordering data fields.</li> + </ul> + <h6>Post-processing Examples:</h6> + <ul> + <li>Deblocking filters (common in video codecs), deringing filters, error concealment.</li> + </ul> + </div> + </div> + </div> + </div> <!-- /.row --> + </div> <!-- /.schema-container#practical --> + + <!-- V. Standards Bodies --> + <div class="schema-container" data-category="standards" id="section-standards"> + <h2 class="section-title"><i class="bi bi-building-fill"></i> V. Standards Bodies</h2> + <div class="row"> + <div class="col-12 mb-4"> <!-- Full width for overview --> + <div class="info-card" data-category="standards"> + <div class="card-body"> + <h5 class="text-center"><i class="bi bi-award-fill"></i> Key Organizations</h5> + <div class="card-content-wrapper"> + <p class="summary text-center">Several organizations play crucial roles in developing and standardizing compression algorithms, ensuring interoperability and advancing the field. Key players include MPEG, ITU-T, IETF, AOMedia, ISO/IEC, and W3C.</p> + <button class="btn details-toggle mx-auto" type="button" data-bs-toggle="collapse" data-bs-target="#collapseStandardsBodies" aria-expanded="false" aria-controls="collapseStandardsBodies"> + Explore Details <i class="bi bi-chevron-down"></i> + </button> + </div> + </div> + <div class="collapse collapse-content" id="collapseStandardsBodies"> + <ul> + <li><strong>MPEG (Moving Picture Experts Group):</strong> Develops standards for audio and video (e.g., JPEG, MPEG-2, H.264, H.265, VVC, MP3, AAC). Part of ISO/IEC.</li> + <li><strong>ITU-T (International Telecommunication Union - Telecommunication Standardization Sector):</strong> Develops video coding standards, often jointly with MPEG (e.g., H.26x series).</li> + <li><strong>IETF (Internet Engineering Task Force):</strong> Develops standards for internet protocols, including codecs for real-time communication (e.g., Opus, AV1 via AOMedia).</li> + <li><strong>AOMedia (Alliance for Open Media):</strong> Consortium developing royalty-free video codecs like AV1.</li> + <li><strong>ISO (International Organization for Standardization) & IEC (International Electrotechnical Commission):</strong> General standards bodies, often publishing MPEG work.</li> + <li><strong>W3C (World Wide Web Consortium):</strong> Standardizes web technologies, including formats like WebP, PNG, and font compression (WOFF/WOFF2).</li> + </ul> + </div> + </div> + </div> + </div> + </div> + </main> - <footer> - <p>Data Compression Cheatsheet. Content is for informational purposes. - <script>document.write("Last updated: " + new Date().toLocaleDateString());</script> + <footer class="container text-center"> + <p class="mb-1"> + Data Compression Cheatsheet © <span id="currentYear"></span>. + Content is for informational purposes. + </p> + <p class="mb-2" style="font-size: 0.85em;"> + Last Updated: <span id="lastUpdatedDate"></span> </p> - <a href="#main-nav">Back to Top</a> + <div> + <a href="https://en.wikipedia.org/wiki/Data_compression" target="_blank" rel="noopener noreferrer" class="mx-2" data-bs-toggle="tooltip" title="Learn more about Data Compression on Wikipedia"> + <i class="bi bi-wikipedia"></i> Wikipedia: Data Compression + </a> + <a href="https://www.itu.int/en/ITU-T/publications/Pages/default.aspx" target="_blank" rel="noopener noreferrer" class="mx-2" data-bs-toggle="tooltip" title="Explore ITU-T Publications"> + <i class="bi bi-book-half"></i> ITU-T Publications + </a> + </div> </footer> + + <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script> + <script> + document.addEventListener("DOMContentLoaded", () => { + // Initialize Bootstrap Tooltips + const tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]')); + tooltipTriggerList.map(function (tooltipTriggerEl) { + return new bootstrap.Tooltip(tooltipTriggerEl); + }); + + // Dynamic Year and Last Updated Date + document.getElementById("currentYear").textContent = new Date().getFullYear(); + const today = new Date(); + const options = { year: 'numeric', month: 'long', day: 'numeric' }; + document.getElementById("lastUpdatedDate").textContent = today.toLocaleDateString('en-US', options); + + + // Collapse toggle icon change + const collapseElements = document.querySelectorAll(".collapse"); + collapseElements.forEach((collapseEl) => { + const button = document.querySelector(`.details-toggle[data-bs-target="#${collapseEl.id}"]`); + const icon = button ? button.querySelector(".bi-chevron-down") : null; // Only select bi-chevron-down + if (button && icon) { + const updateIcon = (isShown) => { + if (isShown) { + icon.style.transform = "rotate(180deg)"; + } else { + icon.style.transform = "rotate(0deg)"; + } + }; + updateIcon(collapseEl.classList.contains("show")); + collapseEl.addEventListener("show.bs.collapse", () => updateIcon(true)); + collapseEl.addEventListener("hide.bs.collapse", () => updateIcon(false)); + } + }); + + // Filter and Search Functionality + const searchBox = document.getElementById('search-box'); + const categoryFiltersContainer = document.getElementById('category-filters'); + const mainContainer = document.getElementById('main-container'); + const allSchemaContainers = Array.from(mainContainer.querySelectorAll('.schema-container')); + const noResultsDiv = document.getElementById('no-results'); + + const categories = [ + { id: 'all', name: 'All Categories', color: '#0D6EFD' }, // Using primary for 'All' + { id: 'table', name: 'Quick Table', color: 'var(--color-table)' }, + { id: 'foundations', name: 'Foundations', color: 'var(--color-foundations)' }, + { id: 'lossless', name: 'Lossless', color: 'var(--color-lossless)' }, + { id: 'lossy', name: 'Lossy', color: 'var(--color-lossy)' }, + { id: 'practical', name: 'Practical Tips', color: 'var(--color-practical)' }, + { id: 'standards', name: 'Standards Bodies', color: 'var(--color-standards)' }, + ]; + + categories.forEach(category => { + const button = document.createElement('button'); + button.type = 'button'; + button.classList.add('btn', 'filter-btn'); + if (category.id === 'all') { + button.classList.add('btn-primary', 'active'); + } else { + button.classList.add('btn-outline-secondary'); // Start as outline + // Use JS to set CSS variables for dynamic theming based on category color + button.style.setProperty('--bs-btn-color', category.color); + button.style.setProperty('--bs-btn-border-color', category.color); + button.style.setProperty('--bs-btn-hover-color', '#fff'); + button.style.setProperty('--bs-btn-hover-bg', category.color); + button.style.setProperty('--bs-btn-hover-border-color', category.color); + button.style.setProperty('--bs-btn-active-color', '#fff'); + button.style.setProperty('--bs-btn-active-bg', category.color); + button.style.setProperty('--bs-btn-active-border-color', category.color); + // For focus shadow, convert hex to RGB for the rgba() value + let r = 0, g = 0, b = 0; + if (category.color.startsWith('#')) { + r = parseInt(category.color.slice(1, 3), 16); + g = parseInt(category.color.slice(3, 5), 16); + b = parseInt(category.color.slice(5, 7), 16); + } else if (category.color.startsWith('var(--color-')) { // Handle CSS variables + const varName = category.color.slice(4, -1); // Extract var name + const actualColor = getComputedStyle(document.documentElement).getPropertyValue(varName).trim(); + if (actualColor.startsWith('#')) { + r = parseInt(actualColor.slice(1, 3), 16); + g = parseInt(actualColor.slice(3, 5), 16); + b = parseInt(actualColor.slice(5, 7), 16); + } + } + button.style.setProperty('--bs-btn-focus-shadow-rgb', `${r},${g},${b}`); + } + button.textContent = category.name; + button.dataset.filterCategory = category.id; + categoryFiltersContainer.appendChild(button); + }); + + + let currentFilter = 'all'; + + function applyFilters() { + const searchTerm = searchBox.value.toLowerCase().trim(); + let itemsFound = 0; + + allSchemaContainers.forEach(sectionContainer => { + const sectionCategory = sectionContainer.dataset.category; + let sectionHasVisibleCards = false; + + const cardsInSection = Array.from(sectionContainer.querySelectorAll('.info-card')); + + if (cardsInSection.length > 0) { + cardsInSection.forEach(card => { + const cardTextContent = ( + (card.querySelector('h5')?.textContent || '') + + (card.querySelector('p.summary')?.textContent || '') + + (card.querySelector('.collapse-content')?.textContent || '') + ).toLowerCase(); + + const matchesSearch = searchTerm === '' || cardTextContent.includes(searchTerm); + const matchesFilter = currentFilter === 'all' || sectionCategory === currentFilter; + + if (matchesSearch && matchesFilter) { + card.closest('.col-lg-4, .col-md-6, .col-12').classList.remove('hidden'); + sectionHasVisibleCards = true; + itemsFound++; + } else { + card.closest('.col-lg-4, .col-md-6, .col-12').classList.add('hidden'); + } + }); + + if (sectionHasVisibleCards && (currentFilter === 'all' || sectionCategory === currentFilter)) { + sectionContainer.classList.remove('hidden'); + } else { + sectionContainer.classList.add('hidden'); + } + } else { // For sections like Quick Reference Table + const sectionTextContent = sectionContainer.textContent.toLowerCase(); + const matchesSearch = searchTerm === '' || sectionTextContent.includes(searchTerm); + const matchesFilter = currentFilter === 'all' || sectionCategory === currentFilter; + + if (matchesSearch && matchesFilter) { + sectionContainer.classList.remove('hidden'); + itemsFound++; + } else { + sectionContainer.classList.add('hidden'); + } + } + }); + noResultsDiv.classList.toggle('hidden', itemsFound > 0); + } + + searchBox.addEventListener('input', applyFilters); + + categoryFiltersContainer.addEventListener('click', (event) => { + if (event.target.classList.contains('filter-btn')) { + categoryFiltersContainer.querySelectorAll('.filter-btn').forEach(btn => { + btn.classList.remove('active', 'btn-primary'); + btn.classList.add('btn-outline-secondary'); + // Reset custom styles for non-active outline buttons to default outline-secondary or their category outline + const cat = categories.find(c => c.id === btn.dataset.filterCategory); + if (cat && cat.id !== 'all') { + btn.style.setProperty('--bs-btn-color', cat.color); + btn.style.setProperty('--bs-btn-border-color', cat.color); + btn.style.setProperty('--bs-btn-hover-color', '#fff'); + btn.style.setProperty('--bs-btn-hover-bg', cat.color); + btn.style.setProperty('--bs-btn-hover-border-color', cat.color); + btn.style.setProperty('--bs-btn-active-color', '#fff'); + btn.style.setProperty('--bs-btn-active-bg', cat.color); + btn.style.setProperty('--bs-btn-active-border-color', cat.color); + } else if (cat && cat.id === 'all') { // Reset "All" button if it's not the one being clicked + btn.style.removeProperty('--bs-btn-bg'); + btn.style.removeProperty('--bs-btn-border-color'); + btn.style.removeProperty('--bs-btn-color'); + btn.classList.add('btn-outline-primary'); // Ensure it reverts to a generic outline + btn.classList.remove('btn-primary'); + } + }); + + // Style the clicked button as active + const clickedCategory = event.target.dataset.filterCategory; + if (clickedCategory === 'all') { + event.target.classList.add('active', 'btn-primary'); + event.target.classList.remove('btn-outline-secondary'); + event.target.style.removeProperty('--bs-btn-bg'); // Let BS primary take over + event.target.style.removeProperty('--bs-btn-border-color'); + event.target.style.removeProperty('--bs-btn-color'); + } else { + event.target.classList.add('active'); + event.target.classList.remove('btn-outline-secondary'); + const activeCat = categories.find(c => c.id === clickedCategory); + if(activeCat) { + event.target.style.setProperty('--bs-btn-bg', activeCat.color); + event.target.style.setProperty('--bs-btn-border-color', activeCat.color); + event.target.style.setProperty('--bs-btn-color', '#fff'); + } + } + + currentFilter = clickedCategory; + applyFilters(); + } + }); + applyFilters(); // Initial load + }); + </script> </body> </html> \ No newline at end of file