From 4c51c36a5574bbfb2cb91daa24b6fed2583852ce Mon Sep 17 00:00:00 2001 From: Charliechen114514 <725610365@qq.com> Date: Sun, 14 Jun 2026 08:50:06 +0800 Subject: [PATCH 1/2] =?UTF-8?q?-=20=E9=87=8D=E5=86=99=E4=B8=AD=E6=96=87?= =?UTF-8?q?=E5=AD=A6=E4=B9=A0=E8=B7=AF=E7=BA=BF=E5=9B=BE(documents/roadmap?= =?UTF-8?q?/index.md):=E5=8E=BB=E6=8E=89=E6=98=93=E5=A4=B1=E6=95=88?= =?UTF-8?q?=E7=9A=84=E7=AF=87=E6=95=B0=E5=A0=86=E7=A0=8C=E6=94=B9=E4=B8=BA?= =?UTF-8?q?=20=20=20=E5=AE=9A=E6=80=A7=E6=8F=8F=E8=BF=B0;=E4=BF=AE?= =?UTF-8?q?=E6=AD=A3"=E4=BB=85=20STM32F1"(=E6=97=A0=20F4);=E8=BD=AF?= =?UTF-8?q?=E5=8C=96=E5=8D=B7=E5=9B=9B=E6=A8=A1=E6=9D=BF=E4=BD=93=E7=B3=BB?= =?UTF-8?q?=E4=B8=BA"=E8=A7=84=E5=88=92=E4=B8=AD";=E6=AF=8F=E5=8D=B7?= =?UTF-8?q?=E6=94=B6=E7=B4=A7=20=20=20=E4=B8=BA=204=20=E8=A1=8C(=E5=AE=9A?= =?UTF-8?q?=E4=BD=8D/=E5=85=B3=E9=94=AE=E4=B8=BB=E9=A2=98/=E9=9A=BE?= =?UTF-8?q?=E5=BA=A6=C2=B7=E5=89=8D=E7=BD=AE/=E8=8A=82=E5=A5=8F);=E6=88=90?= =?UTF-8?q?=E7=86=9F=E5=BA=A6=E5=AE=9A=E6=80=A7=E5=88=86=E6=A1=B6=20-=20?= =?UTF-8?q?=E6=96=B0=E5=BB=BA=E8=8B=B1=E6=96=87=E5=AD=A6=E4=B9=A0=E8=B7=AF?= =?UTF-8?q?=E7=BA=BF=E5=9B=BE(documents/en/roadmap/index.md),=E6=89=8B?= =?UTF-8?q?=E8=AF=91=20mermaid=20=E6=A0=87=E7=AD=BE=E4=B8=8E=20=20=20/en/?= =?UTF-8?q?=20=E9=93=BE=E6=8E=A5(translate.py=20=E4=BC=9A=E5=8E=9F?= =?UTF-8?q?=E6=A0=B7=E4=BF=9D=E7=95=99=20mermaid,=E6=97=A0=E6=B3=95?= =?UTF-8?q?=E5=A4=84=E7=90=86)=20-=20navEn=E3=80=8CRoadmap=E3=80=8D?= =?UTF-8?q?=E7=94=B1=20/en/community/dev/(=E9=A1=B9=E7=9B=AE=E5=BC=80?= =?UTF-8?q?=E5=8F=91=E8=B7=AF=E7=BA=BF=E5=9B=BE)=E6=94=B9=E6=8C=87=20/en/r?= =?UTF-8?q?oadmap/,=20=20=20=E4=B8=8E=E4=B8=AD=E6=96=87=E5=AF=BC=E8=88=AA?= =?UTF-8?q?=E5=AF=B9=E9=BD=90=20-=20translate.py=20--all=20=E5=90=8C?= =?UTF-8?q?=E6=AD=A5=2026=20=E7=AF=87=E8=BF=87=E6=9C=9F=20EN=20=E6=96=87?= =?UTF-8?q?=E6=A1=A3(=E6=A8=A1=E5=9E=8B=20glm-4.6)=20-=20=E6=89=8B?= =?UTF-8?q?=E4=BF=AE=20EN=20=E9=A6=96=E9=A1=B5(=E7=BA=AF=20frontmatter,pip?= =?UTF-8?q?eline=20=E6=97=A0=E6=B3=95=E7=BF=BB=E8=AF=91):=E8=A1=A5=20Commu?= =?UTF-8?q?nity=20feature=E3=80=81=20=20=20=E4=BF=AE=20View=20Roadmap=20?= =?UTF-8?q?=E9=93=BE=E6=8E=A5=E3=80=81hero=20=E6=96=87=E6=A1=88=E6=94=B9?= =?UTF-8?q?=E8=8B=B1=E6=96=87=20commit=20message=20for=20your=20changes.?= =?UTF-8?q?=20Lines=20starting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- documents/en/community/articles/index.md | 36 ++ .../en/community/dev/01-iteration-cadence.md | 115 ++-- documents/en/community/dev/index.md | 42 +- documents/en/community/incoming/index.md | 47 ++ documents/en/community/index.md | 65 ++- documents/en/index.md | 12 +- documents/en/roadmap/index.md | 172 ++++++ documents/en/tags.md | 33 +- .../01-type-safety-and-number-concept.md | 202 +++---- ...standardization-and-assembly-philosophy.md | 235 ++++---- .../01-from-loops-to-iterators.md | 140 ++--- .../01-copy-cost-and-motivation.md | 407 ++++---------- .../02-lvalue-rvalue-and-references.md | 210 +++---- .../03-move-ops-stdmove-and-elision.md | 526 +++++++----------- .../cppcon/2025/index.md | 12 +- .../ch07-attributes/02-modern-attributes.md | 356 +++++------- .../ch09-filesystem/01-filesystem-path.md | 358 +++++------- .../ch09-filesystem/02-filesystem-ops.md | 435 ++++++--------- .../01-vector-deep-dive.md | 300 ++++++++++ .../02-string-memory-deep-dive.md | 155 ++++++ .../vol3-standard-library/03-char8-t-utf8.md | 129 +++++ documents/en/vol3-standard-library/index.md | 34 +- .../exercises/02-atomic-spsc.md | 440 +++++---------- .../exercises/06-capstone-mini-runtime.md | 118 ++-- documents/en/vol5-concurrency/index.md | 18 +- .../embedded/02-type-safe-register-access.md | 34 +- .../embedded/03-circular-buffer.md | 253 ++++----- .../embedded/04-intrusive-containers.md | 312 ++++++----- documents/roadmap/index.md | 182 +++--- site/.vitepress/config/nav.ts | 2 +- .../theme/components/HomeRoadmap.vue | 4 +- .../theme/components/HomeTipBanner.vue | 31 +- 33 files changed, 2781 insertions(+), 2636 deletions(-) create mode 100644 documents/en/community/articles/index.md create mode 100644 documents/en/community/incoming/index.md create mode 100644 documents/en/roadmap/index.md create mode 100644 documents/en/vol3-standard-library/01-vector-deep-dive.md create mode 100644 documents/en/vol3-standard-library/02-string-memory-deep-dive.md create mode 100644 documents/en/vol3-standard-library/03-char8-t-utf8.md diff --git a/README.md b/README.md index c854e1bcd..c0648d482 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ --- -![English Coverage](https://img.shields.io/badge/en_coverage-99%25-green.svg) 430/433 docs translated +![English Coverage](https://img.shields.io/badge/en_coverage-100%25-green.svg) 433/433 docs translated ## 这是什么项目 diff --git a/documents/en/community/articles/index.md b/documents/en/community/articles/index.md new file mode 100644 index 000000000..a2ecbaf3f --- /dev/null +++ b/documents/en/community/articles/index.md @@ -0,0 +1,36 @@ +--- +title: Reviewed and Included +description: Community articles that are accepted for long-term inclusion after discussion, + revision, and maintainer review +translation: + source: documents/community/articles/index.md + source_hash: 711b7bbe0330413d091930ebd2775d17e8ac9c162988a0c3073610a79e5789fb + translated_at: '2026-06-14T00:14:02.998164+00:00' + engine: anthropic + token_count: 98 +--- +# Reviewed and Included + +This section lists community articles that have been discussed, revised, and reviewed by maintainers. + +Compared to the First Publications section, articles here are closer to long-term, citable content: the text, terminology, code, sources, and scope of application have been organized into a stable state. They retain their status as community articles and are not necessarily part of the main tutorial volumes. + +## Inclusion Criteria + +Before an article enters this section, it typically must meet the following criteria: + +- Technical conclusions have been reviewed by maintainers. +- Terminology, code blocks, references, and image sources have been organized. +- The article structure is suitable for long-term reading. +- There are no significant conflicts with existing content in the main tutorial. +- Author attribution, licensing, and citation boundaries are clear. + +## Future Status + +Some articles may remain in the community articles section as supplementary reading material. + +If an article is particularly well-suited for the main tutorial, maintainers may, with the author's understanding, reorganize it into the appropriate volume, chapter, or topic. + +## Current Articles + +There are currently no included articles. diff --git a/documents/en/community/dev/01-iteration-cadence.md b/documents/en/community/dev/01-iteration-cadence.md index 1d56939b4..7fa2c3dac 100644 --- a/documents/en/community/dev/01-iteration-cadence.md +++ b/documents/en/community/dev/01-iteration-cadence.md @@ -1,99 +1,106 @@ --- -title: "Site Iteration Cadence" -description: "Content production, site maintenance, PR/Issue handling, and release cadence for Tutorial_AwesomeModernCPP" +title: Website Iteration Cadence +description: Content production, site maintenance, PR/Issue handling, and release + schedule for Tutorial_AwesomeModernCPP chapter: 1 order: 1 -tags: ["工程实践"] +tags: +- 工程实践 +translation: + source: documents/community/dev/01-iteration-cadence.md + source_hash: 8debf0c2ea6aa397b83abb8e8afd96b464145928846b90312f794fafa8dd0f2b + translated_at: '2026-06-14T00:14:11.471541+00:00' + engine: anthropic + token_count: 551 --- +# Site Iteration Rhythm -# Site Iteration Cadence +Tutorial_AwesomeModernCPP focuses primarily on content output. Version numbers measure the magnitude of content progress. Site maintenance, PR, and Issue handling serve the main content, rather than dictating the main rhythm. -Tutorial_AwesomeModernCPP is driven primarily by content production. Version numbers measure the size of content progress. Site maintenance, PRs, and Issues support the main content path instead of taking it over. +## Basic Beat -## Basic Rhythm +Maintainers usually perform a lightweight iteration every two to three days. Each round binds only one primary objective: -Maintainers usually run a lightweight iteration every 2 to 3 days. Each iteration should have one main goal: +- Complete a set of related content. +- Fix a batch of issues affecting readability. +- Fill in code, links, or translations for a specific chapter. +- Address actionable PRs or Issues. -- Finish a related group of content. -- Fix a batch of reading problems. -- Complete code, links, or translations for a chapter. -- Handle clearly actionable PRs or Issues. +A single iteration does not aim to cover all directions. Volume-level roadmaps, long-term candidates, and distant topics remain in `todo/`. Do not split temporary, article-level ideas into new governance files. -One iteration does not need to cover every direction. Volume roadmaps, long-term candidates, and future themes remain in `todo/`. Temporary article-level ideas should not become new governance files. +## Single-Round Maintenance Workflow -## Per-Iteration Flow +Each maintenance round proceeds in the following order: -Each maintenance iteration follows this order: +1. Review current P0/P1 goals in TODO and select one primary content objective. +2. Quickly check Issues and PRs, handling only those that are actionable, affect the current version, or block readers. +3. Complete content, example code, indices, and necessary English synchronization for this round. +4. Run quality checks matching the scope of changes. +5. If changes are user-perceivable, update the changelog or prepare the next version entry. -1. Review the current P0/P1 TODO goals and choose one main content target. -2. Quickly check Issues and PRs, handling only items that are actionable, release-relevant, or reader-blocking. -3. Complete the content, example code, indexes, and required English/Chinese sync for the iteration. -4. Run the quality checks that match the change scope. -5. If the change is reader-visible, update the changelog or prepare the next release entry. +PRs and Issues should be checked at least once per round. Urgent issues can be queued at any time, such as site build failures, major page 404s, seriously misleading example code, or external contributions requiring quick feedback. -PRs and Issues should be checked at least once per iteration. Urgent problems may interrupt the cycle, such as broken site builds, important 404 pages, misleading example code, or external contributions that need quick feedback. +## Version Rhythm -## Version Cadence +Version numbers describe the magnitude of changes, rather than forcefully driving the writing rhythm. -Version numbers describe the size of change; they should not force the writing schedule. +- **patch**: Bug fixes, links, site fixes, low-risk text revisions. +- **minor**: Significant progress in a volume or topic where readers can perceive new learning paths or complete capabilities. +- **major**: Major adjustments to TODO structure, site architecture, or content system. -- patch: typo fixes, links, site fixes, and low-risk text corrections. -- minor: one volume or topic has clearly moved forward, giving readers a new learning path or complete capability. -- major: TODO structure, site architecture, or the content system changes substantially. - -Patch releases can ship as needed. Minor releases usually use a 2 to 4 week observation window and ship only when a topic forms a complete increment. Major releases should stay rare to avoid repeatedly changing reader and contributor entry points. +patch releases can be made on demand. minor releases usually have an observation window of two to four weeks, and are published only when a topic forms a complete increment. major releases should be restrained to avoid frequently changing the entry perception for readers and contributors. ## Tags and Releases -Tags and GitHub Releases are used separately. Tags mark lightweight maintenance checkpoints so readers can see ongoing progress through the README badge. GitHub Releases are reserved for content versions that readers should explicitly notice. +Tags and GitHub Releases are used separately. Tags mark lightweight maintenance nodes, allowing readers to perceive continuous project activity via README badges; GitHub Releases are used only for content versions worthy of specific reader attention. -- Patch-level fixes may be tagged without creating a GitHub Release. -- Minor topic increments should usually create a Release with a changelog. -- Major structural changes must create a Release and explain migration impact. +- **patch** level fixes may only create a tag, without a GitHub Release. +- **minor** level topic progress should usually create a Release, accompanied by a changelog. +- **major** level structural adjustments must create a Release and explain migration impact. -This keeps project activity visible without overwhelming readers with Release notifications. +This preserves project activity signals while avoiding Release spam. ## Definition of Done -A content iteration should usually satisfy these conditions: +When a content iteration is complete, the following conditions should be met as much as possible: -- The article can be read independently, with terms and C++ standard versions clearly marked. -- Related volume pages, chapter indexes, or navigation entries are updated. -- Example code in the article can compile, or platform/toolchain limits are explicitly stated. -- Key Chinese and English pages stay in sync; community drafts and low-priority long-form notes may be translated later. -- Internal links pass checks, and the production build succeeds. +- The main text is readable independently, with terminology and standard versions clearly marked. +- Relevant volume homepages, chapter indices, or navigation entries are updated. +- Example code in the article compiles, or platform and toolchain limitations are explicitly stated. +- Chinese and English key pages are synchronized; translations for community initial publications and low-priority long articles can be deferred. +- Internal links pass checks, and production builds pass. -For local fixes, run only the relevant checks. Before a release, run the full pre-release checks. +If the round involves only local fixes, run only relevant checks; if preparing for a release, run full pre-release checks. ## PR and Issue Handling -Issues are for actionable problems, Discussions are for open-ended learning conversations, and PRs are for concrete changes. +Issues handle actionable problems, Discussions handle open learning discussions, and PRs handle specific modifications. -Handle items in this order: +Processing priority is as follows: -1. Problems that block builds, deployment, or major reading paths. -2. Clear, low-risk fixes already submitted as PRs. +1. Issues blocking builds, deployment, or main reading paths. +2. Clear, low-risk, easy-to-merge fixes in existing PRs. 3. Content suggestions directly related to the current iteration theme. -4. Learning questions that can become QA entries, appendix material, or future TODO items. +4. Learning questions that can be consolidated into QA, appendices, or future TODOs. -Learning questions should not fill the Issue list directly. High-quality discussions can be summarized into FAQ entries, appendix pages, or links from the main content. +Learning questions should not be stuffed directly into the Issue list; high-quality discussions can be organized into FAQs, appendices, or main text links. ## Changelog Principles -The changelog should describe reader-visible changes, not just file counts. +Changelogs should describe reader-perceivable changes, rather than simply listing file counts. -Prefer recording: +Recommended records: -- Which learning path was added or completed. +- Which learning paths were added or completed. - Which examples can now run or be verified. -- Which site entries, search behavior, navigation, or community flows improved. -- Which contributors helped fix specific problems. +- Which site entry points, search, navigation, or community processes were improved. +- Which contributors helped fix specific issues. -File counts, line counts, and commit counts can be supporting data, but they should not replace the explanation of what changed. +File counts, line counts, and commit counts can serve as auxiliary data but should not replace change descriptions. ## Common Checks -Choose checks by change scope during daily maintenance: +For daily iterations, select checks based on the scope of changes: ```bash pnpm check:links @@ -102,7 +109,7 @@ python3 scripts/check_quality.py documents/ python3 scripts/build_examples.py --host ``` -Before a release, run: +Before release, it is recommended to run: ```bash pnpm check:links @@ -113,7 +120,7 @@ python3 scripts/check_quality.py documents/ python3 scripts/build_examples.py --host ``` -If STM32 examples changed, also run: +If STM32 examples are changed, also run: ```bash python3 scripts/build_examples.py --stm32 diff --git a/documents/en/community/dev/index.md b/documents/en/community/dev/index.md index 75a9ef327..b6c5f07df 100644 --- a/documents/en/community/dev/index.md +++ b/documents/en/community/dev/index.md @@ -1,32 +1,38 @@ --- -title: "Development" -description: "Project maintenance cadence, development notes, release governance, and site evolution records under community collaboration" +title: Project Development +description: Project maintenance cadence, development logs, release governance, and + site evolution records under community collaboration +translation: + source: documents/community/dev/index.md + source_hash: 8f1e9f1f3e7fa1c575bb63db6098a30fa0989d6946ed4f54607f267ca3bcac8d + translated_at: '2026-06-14T00:14:17.386651+00:00' + engine: anthropic + token_count: 232 --- +# Project Development -# Development +This section tracks the development, maintenance, and release rhythm of Tutorial_AwesomeModernCPP itself. It is located under the Community section because these processes directly support content collaboration, PR/Issue handling, and the long-term maintenance of the site. -This section documents how Tutorial_AwesomeModernCPP itself is developed, maintained, and released. It lives under the community section because these practices support content collaboration, PR/Issue handling, and long-term site maintenance. +This section does not replace `todo/`, `changelogs/`, or `CONTRIBUTING.md`: -It does not replace `todo/`, `changelogs/`, or `CONTRIBUTING.md`: +- `todo/` records the content roadmap and volume-level planning. +- `changelogs/` records changes in released versions. +- `CONTRIBUTING.md` records contribution, review, and quality guidelines. +- `community/dev/` records how maintainers drive website and content iteration. -- `todo/` tracks content roadmaps and volume-level plans. -- `changelogs/` records changes that have already shipped. -- `CONTRIBUTING.md` defines contribution, review, and quality rules. -- `community/dev/` explains how maintainers move the site and content forward. +## Roadmap & Versions -## Roadmap & Releases +Want to know what the project is working on next and what has been released? Here are two authoritative sources (hosted in the root of the GitHub repository): -Wondering what's next or what has already shipped? These are the two authoritative sources (hosted at the repository root on GitHub): +- 📋 **[Project Master Roadmap](https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP/blob/main/todo/000-project-roadmap.md)** — Overall priorities (P0–P3), asset/gap assessment by volume, and near-term focus; for volume-level details, see `todo/010–020` in the same directory. +- 📦 **[Version Changelogs](https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP/tree/main/changelogs)** — New content and significant changes for each released version. -- 📋 **[Project Roadmap](https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP/blob/main/todo/000-project-roadmap.md)** — overall priorities (P0–P3), per-volume asset/gap assessment, and near-term focus; volume-level detail lives in `todo/010–020` alongside it. -- 📦 **[Release History](https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP/tree/main/changelogs)** — what each released version added or changed. - -## Development Notes +## Development Records - Site Iteration Cadence + Website Iteration Cadence -## Principles +## Usage Guidelines -The development section is for durable maintenance practices, not temporary task lists. Short-term work remains in the relevant volume TODO, and released changes remain in the changelog. +The Project Development section prioritizes long-term maintenance methods over temporary task lists. Short-term tasks should still go into the corresponding volume-level TODO, and version changes should still go into the changelog. diff --git a/documents/en/community/incoming/index.md b/documents/en/community/incoming/index.md new file mode 100644 index 000000000..6846e831c --- /dev/null +++ b/documents/en/community/incoming/index.md @@ -0,0 +1,47 @@ +--- +title: '**First Issue of Community Submissions**' +description: Community submission that has passed basic checks and is under discussion + and review +translation: + source: documents/community/incoming/index.md + source_hash: 929b4a1bfad272c000eb0170c071d2b592bb5c6200f37a9d91b38861b90db5c5 + translated_at: '2026-06-14T00:14:23.031499+00:00' + engine: anthropic + token_count: 132 +--- +# Community Contributions: First Issue + +This section showcases community submissions that have passed basic review and are ready for public reading and discussion. + +Articles in this first issue will be included in the online documentation site and may be cited in the TAMCPP weekly newsletter. They do not represent the final draft of the main tutorial, and may be updated based on reader feedback, external platform discussions, and maintainer review. + +## Status Guidelines + +Articles in this section typically meet the following criteria: + +- They render correctly. +- They have no obvious technical errors. +- Sources, references, images, and code are clearly credited. +- The author agrees to public display and allows maintainers to make necessary edits. + +## How to Contribute + +If you would like to submit an article, you can submit a PR and place the Markdown file in this directory. We recommend using lowercase English and hyphens for filenames, for example: + +```text +documents/community/incoming/my-first-modern-cpp-note.md +``` + +You can start by submitting just the main content; you do not need to complete the full frontmatter, navigation, and index from the beginning. Maintainers will help organize these details. + +We suggest specifying the following at the beginning of the article: + +- Title. +- Author or preferred attribution. +- Target audience. +- Whether it is original content. +- Main reference materials. + +## Current Articles + +There are no articles in this first issue yet. diff --git a/documents/en/community/index.md b/documents/en/community/index.md index b309b2a5a..6697be7a5 100644 --- a/documents/en/community/index.md +++ b/documents/en/community/index.md @@ -1,20 +1,65 @@ --- -title: "Community" -description: "Community articles, reviewed submissions, and project development notes" +title: Community Articles +description: Community contributions, inaugural articles, and reviewed content +translation: + source: documents/community/index.md + source_hash: 94f6850709a28dedf8baa3578dee1ed45a34aa866f0c37b72a030766890c65b0 + translated_at: '2026-06-14T00:14:30.009104+00:00' + engine: anthropic + token_count: 283 --- +# Community Articles -# Community +This section hosts articles, notes, source code readings, engineering experiences, and high-quality Q&A summaries contributed by the Tutorial_AwesomeModernCPP community. -This section collects community submissions and project maintenance notes for Tutorial_AwesomeModernCPP. +Community articles are not automatically merged into the main tutorial volumes. This section provides a more open entry point: contributors can submit Markdown files, maintainers will perform a basic review before publishing them for display, and then decide whether to include them permanently or integrate them into main chapters based on discussion and feedback. -Community content does not have to become part of the main tutorial volumes immediately. Contributors can submit Markdown first, maintainers can perform basic checks, and the content can later move into reviewed articles or a main tutorial chapter. - -## Sections +## Content Status - Development + Community Submissions (First Issue) + Reviewed & Included + Project Development -## Development Notes +## Workflow + +1. The contributor submits a Markdown file. +2. Maintainers check for basic quality, copyright sources, and obvious technical errors. +3. After passing the basic check, the article enters `community/incoming/` and can be displayed on the documentation site and in the TAMCPP weekly newsletter. +4. After community discussion, grammatical revisions, and technical review, the article is moved to `community/articles/`. +5. If the article is a great fit for the main tutorial, maintainers may further integrate it into the corresponding volume or chapter. + +## Submission Scope + +Contributors can focus on the main content and do not need to understand the complete site structure from the start. + +It is recommended to provide: + +- Article title and author attribution. +- Body text in Markdown. +- Source attribution for images, code, and referenced materials. +- Target audience or applicable context. +- Permission for maintainers to adjust titles, formatting, placement, and wording. + +Maintainers are responsible for: + +- Determining whether to place the article in the submissions section, the included section, or the main tutorial. +- Completing necessary frontmatter, navigation, indices, and links. +- Performing basic formatting, terminology standardization, and technical review. +- Determining if an English translation or further thematic organization is needed. + +## Minimum Inclusion Requirements + +While community submissions are not final drafts, they must meet basic requirements before going online: + +- Content renders correctly. +- No obvious technical errors. +- Original content or explicitly authorized. +- Sources provided when citing external materials. +- Clear sources for images, code, and extensive materials. +- The author agrees to public display and allows maintainers to make necessary edits. + +For learning questions, roadmap discussions, and open-ended suggestions, please use GitHub Discussions first; for specific content proposals or submission topics, please use GitHub Issues. -Project maintenance cadence, site iteration, and release measurement are documented in [Development](dev/). +The project's maintenance rhythm, site iterations, and release metrics are recorded in [Project Development](dev/). diff --git a/documents/en/index.md b/documents/en/index.md index b3b1138a5..af4a061f8 100644 --- a/documents/en/index.md +++ b/documents/en/index.md @@ -4,8 +4,8 @@ title: "Welcome to the Modern C++ Tutorial" description: "A systematic modern C++ tutorial — from fundamentals to domain practice" hero: - name: "Modern C++ Tutorial" - text: "现代 C++ 教程" + name: "" + text: "Modern C++ Tutorial" tagline: "More than a syntax cheat-sheet — a complete path from fundamentals to engineering practice." actions: - theme: brand @@ -16,7 +16,7 @@ hero: link: /en/cpp-reference/ - theme: alt text: View Roadmap - link: /en/community/dev/ + link: /en/roadmap/ - theme: alt text: GitHub link: https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP @@ -94,6 +94,12 @@ features: link: /en/projects/ linkText: Start Reading + - title: "Community Articles" + details: "Initial community submissions, reviewed & accepted articles, and the entry point for later main-line integration." + icon: '' + link: /en/community/ + linkText: View Submissions + - title: "Tag Index" details: "Browse all tutorial articles by tag — easily find related content by topic." icon: '' diff --git a/documents/en/roadmap/index.md b/documents/en/roadmap/index.md new file mode 100644 index 000000000..1182a24c4 --- /dev/null +++ b/documents/en/roadmap/index.md @@ -0,0 +1,172 @@ +--- +title: "Learning Roadmap" +description: "A modern C++ learning path from zero to embedded practice — pick a starting point by background, with a volume-by-volume breakdown, pacing, and companion resources" +--- + +# Learning Roadmap + +This tutorial is a systematic modern C++ learning resource — **ten volumes taking you from first principles all the way to embedded practice**. This roadmap answers three questions: how to learn it, where to start, and what each volume teaches. + +Whether you're starting from zero, already have a C / embedded background, or already know C++ and want to fill in your engineering skills, the sections below first help you pick a starting point by background, then walk through each volume. + +> This is the **learning roadmap** (how readers learn). The project's own development progress and plans are a separate matter — see [Content Maturity and Project Roadmap](#content-maturity-and-project-roadmap) at the end. + +## How to use this roadmap + +The whole tutorial is organized along one progressive spine: + +``` +Fundamentals → Modern Features → Standard Library → Advanced → Concurrency → Performance → Engineering → Domain Practice +``` + +A few things to know up front: + +- **It's not a syntax cheat sheet.** Every key concept ships with a compilable CMake example you can run, modify, and verify. +- **Volumes depend on each other.** Later volumes assume you've grasped the core of earlier ones, and **Vol.1 → Vol.2 is the key watershed** — once you're through Vol.2, you've truly entered "modern C++". +- **You can skip around.** Readers with relevant background don't need to start on page one of Vol.1; just pick a starting point via the "three paths" below. +- **Companion resources are always at hand.** [C++ feature reference cards](/en/cpp-reference/) (lookup by standard version + feature category), [hands-on projects](/en/projects/), [lecture notes](/en/vol10-open-lecture-notes/). + +## Three learning paths (pick a starting point by background) + +```mermaid +flowchart TD + Start(["Your background?"]) --> A["Zero basics / only know C"] + Start --> B["Have C or embedded experience"] + Start --> C["Already know C++"] + + A --> V1["Vol.1 Fundamentals
incl. C crash course"] + B --> V2["Vol.2 Modern Features"] + V1 --> V2 + + V2 --> V3["Vol.3 Standard Library"] + V2 --> V5["Vol.5 Concurrency"] + V3 --> V4["Vol.4 Advanced"] + V4 --> V5 + V5 --> V6["Vol.6 Performance"] + V6 --> V7["Vol.7 Engineering"] + V7 --> V8["Vol.8 Domains/Embedded"] + + C --> Goal["Pick a topic by goal"] + Goal --> V4 + Goal --> V5 + Goal --> V6 + Goal --> V7 + Goal --> V9["Vol.9 Open Source"] + + V8 --> V9 + V8 -.interleave.-> V10["Vol.10 Lecture Notes"] +``` + +**Path A · Zero basics / only know C** — start at [Vol.1](/en/vol1-fundamentals/) (includes a complete C crash course). Walk the spine volume by volume — the most solid route, and the longest. Skip strategy: if you already have programming experience, skim the C crash course and focus on value categories, OOP, and template basics. + +**Path B · Have C or embedded experience** — your syntax foundation is enough; go straight into [Vol.2](/en/vol2-modern-features/) to pick up "modern C++ style", then dive into [Vol.8 embedded](/en/vol8-domains/) for practice. Fill in concurrency (Vol.5), performance (Vol.6), and engineering (Vol.7) as needed. + +**Path C · Already know C++** — go straight to a topic by goal: for concurrency/async read [Vol.5](/en/vol5-concurrency/), for performance read [Vol.6](/en/vol6-performance/), for engineering read [Vol.7](/en/vol7-engineering/), to read large source code go to [Vol.9](/en/vol9-open-source-project-learn/), to chase the frontier read [Vol.4](/en/vol4-advanced/). + +## Volume-by-volume breakdown + +### Vol.1 · Fundamentals + +- **Role**: Build a complete C++ knowledge system from zero — the foundation and starting point of the whole tutorial; includes a complete C crash course (with embedded-relevant advanced C). +- **Key topics**: environment setup · type system & value categories · control flow & functions · pointers & references · arrays & strings · classes & OOP · operator overloading · inheritance & polymorphism · template basics · exceptions · first look at the STL · memory model basics; the C crash course covers pointer essentials, structs & alignment, C pitfalls, and embedded C patterns. +- **Level · Prerequisites**: beginner → intermediate / none. +- **Suggested pacing**: read it all if you're starting fresh; if you have a foundation, skip the C crash course and focus on value categories, OOP, and template basics — these determine how smooth the rest feels. This volume is being rewritten (quick-start → full-stack intro), so chapters may shift, but the core topics are stable. + +### Vol.2 · Modern Features + +- **Role**: Systematically master the core C++11/14/17 features — the watershed between "can write C++" and "can write modern C++". +- **Key topics**: move semantics & rvalue references · smart pointers & RAII · `constexpr` compile-time computation · lambdas & functional style · type safety (`enum class` / `variant` / `optional`) · structured bindings · `auto` / `decltype` · attributes · `string_view` · `filesystem` · modern error handling (`optional` / `expected`) · user-defined literals. +- **Level · Prerequisites**: intermediate / Vol.1. +- **Suggested pacing**: a pivotal volume — read it carefully; it determines how smooth every later volume feels. + +### Vol.3 · Standard Library + +- **Role**: Implementation details, performance, and memory internals of STL containers and strings. +- **Key topics**: `vector` three-pointer representation / growth / iterator invalidation · `string` memory model & small-string optimization · `char8_t` & UTF-8 · `array` · `span` · object size & trivial types · custom allocators. +- **Level · Prerequisites**: intermediate / Vol.1, Vol.2. +- **Suggested pacing**: small but deep — read on demand, especially when doing performance-sensitive or embedded work. The `vector` / `string` / `char8_t` articles are the most stable; read those first; the rest are being rewritten. + +### Vol.4 · Advanced Topics + +- **Role**: C++20/23 frontier features and metaprogramming — essential for anyone writing libraries or high-performance generic code. +- **Key topics**: coroutines (basics + scheduler implementation) · Ranges (views + pipeline practice) · three-way comparison `<=>` · empty base optimization · C++ Modules (MSVC) · designated initializers. +- **Level · Prerequisites**: advanced / Vol.2, Vol.3. +- **Suggested pacing**: read coroutines, Ranges, and three-way comparison first; the template system (C++11→23 metaprogramming) and more are still planned — pick them up as needed. + +### Vol.5 · Concurrency + +- **Role**: From thread primitives to coroutine-based async — build complete concurrency judgment: correct before fast, locks before lock-free, synchronous before task-based. +- **Key topics**: thread lifecycle & RAII · mutexes & sync primitives (incl. `latch` / `barrier` / `semaphore`) · `atomic` & the six memory orders · lock-free data structures (SPSC/MPMC queues) · `future` & thread pools · coroutines & event loops (Echo server) · Actor/Channel. +- **Level · Prerequisites**: intermediate-advanced / Vol.1–Vol.4. +- **Suggested pacing**: the heaviest investment and most hands-on volume of the whole tutorial, with Lab 0–5 + a Capstone (Mini Concurrent Runtime). Strongly recommended: do the Labs by hand — don't just read. + +### Vol.6 · Performance + +- **Role**: Core C++ performance topics — compiler optimization, code-size evaluation, SIMD. +- **Key topics**: inlining & compiler optimization (debunking the "`inline` = performance switch" myth) · performance & code-size evaluation · AVX/AVX2. +- **Level · Prerequisites**: intermediate-advanced / Vol.5. +- **Suggested pacing**: content is still being expanded; first build intuition for cache hierarchy and SIMD, then go deeper by topic. + +### Vol.7 · Engineering + +- **Role**: C++ software engineering in practice — building, cross-compilation, linking, debugging, platform development. +- **Key topics**: CMake & cross-compilation · compiler options · linker & linker scripts · WSL development · MSVC debugging internals · C++ Modules (VS2026) · file I/O (a file-copier project). +- **Level · Prerequisites**: intermediate / recommend reading "Compilation & Linking" first. +- **Suggested pacing**: study it alongside [Compilation & Linking](/en/compilation/); pick by your current toolchain. + +### Compilation & Linking + +- **Role**: The low-level mechanics of C/C++ compilation, linking, static/dynamic libraries, and symbol visibility — the foundation of engineering practice. +- **Key topics**: compilation & linking overview · reuse & the concept of libraries · static libraries · dynamic libraries (design principles / symbol visibility / runtime loading / library search logic / dynamic libs as executables). +- **Level · Prerequisites**: intermediate / C++ basics. +- **Suggested pacing**: as a prerequisite to Vol.7; a must-read before doing embedded/cross-compilation work. This volume is complete and stable. + +### Vol.8 · Domain Applications + +- **Role**: Modern C++ in action across vertical domains — **the main line is embedded**. +- **Key topics**: STM32 (**STM32F1 only, e.g. Blue Pill; no F4 yet**) environment setup · end-to-end LED / button / UART flows (rebuilt from C all the way to C++23 template wrappers) · zero-overhead abstraction · type-safe register access · embedded patterns like circular buffers / object pools / intrusive containers · interrupt safety; plus a C++ deep-dive (pointer-semantics series). Networking / GUI / data storage / algorithms sub-domains are still planned. +- **Level · Prerequisites**: intermediate / Vol.1–Vol.7. +- **Suggested pacing**: embedded is currently the most complete domain line — progress peripheral by peripheral; if you have an STM32F1 board on hand, follow along hands-on. + +### Vol.9 · Open Source Projects + +- **Role**: Tear down industrial-grade open-source source code to learn real-world C++ design and implementation. +- **Key topics**: currently focused on Chromium's `OnceCallback` callback component — from motivation, API design, and core skeleton to `bind_once`, with interleaved prerequisites on C++23 `deducing this`, `move_only_function`, and more. More projects are planned. +- **Level · Prerequisites**: intermediate-advanced / Vol.1–Vol.7 (especially Vol.4, Vol.5). +- **Suggested pacing**: source-reading oriented; recommend mastering Vol.4's advanced features first before reading industrial implementations. + +### Vol.10 · Courses & Talks + +- **Role**: Reading notes and secondary creations from technical conference talks like CppCon. +- **Key topics**: currently four CppCon 2025 talks — Bjarne Stroustrup's *Concept-based Generic Programming*, Matt Godbolt's *Some Assembly Required* (reading assembly / Compiler Explorer), Mike Shah's *Back to Basics: Ranges*, and Ben Saks's *Back to Basics: Move Semantics*. +- **Level · Prerequisites**: intermediate / Vol.1–Vol.5. +- **Suggested pacing**: use it to "go deeper" — after finishing the related volume, read the corresponding talk notes to reinforce understanding; interleave it with the main line. + +## Pacing & advice + +- **Time expectation**: walking the whole spine from zero is a long-term project — don't expect a shortcut; set milestones per volume and type out the examples by hand. +- **Recommended order**: following the strict dependency Vol.1 → Vol.2 → Vol.3 → Vol.4 → Vol.5 → Vol.6 → Vol.7 → Vol.8 is the most solid; if you have background, cut in via the "three paths". +- **Skip strategy**: in Vol.1 you can skip the C crash course; Vol.3 and Vol.6 are small or still expanding — read on demand; Vol.9 currently focuses on a single project, so wait until you've built enough foundation; interleave Vol.10 after the related volumes as review. +- **Tie it together with practice**: after finishing each area, find a matching project in [hands-on projects](/en/projects/) to practice (coroutine server, concurrent runtime, embedded, etc.) and turn scattered knowledge into complete capability. + +## Companion resources + +- [C++ feature reference cards](/en/cpp-reference/): C++98 → C++23 lookup, organized by both standard version and feature category, each card annotated with embedded applicability. +- [End-to-end hands-on projects](/en/projects/): a project index page that ties together the hands-on work scattered across volumes (coroutine Echo server, Mini Concurrent Runtime, Chromium OnceCallback study, etc.); planned additions include a hand-written STL, a mini HTTP server, a mini GUI, and a mini embedded OS. +- [Community articles](/en/community/): community submissions and reviewed content — contributions welcome. +- [Vol.10 lecture notes](/en/vol10-open-lecture-notes/): secondary creations of top talks like CppCon, for going deeper. + +## Content Maturity and Project Roadmap + +The current status of each volume, to help you judge which parts are most solid (qualitative judgment only — not chasing exact article counts): + +- ✓ **Stable**: Vol.2 Modern Features, Vol.5 Concurrency, Compilation & Linking. +- ✦ **In progress**: Vol.1 Fundamentals (being rewritten as a full-stack intro), Vol.7 Engineering, Vol.8 Domains (embedded main line complete), Vol.9 Open Source, Vol.10 Courses & Talks. +- ◇ **Expanding / rewriting**: Vol.3 Standard Library (half being rewritten), Vol.4 Advanced (template system etc. still planned), Vol.6 Performance (expanding). + +To see **the project's own development plans** (what's being done, release cadence, TODO priorities), that's a separate document: + +- 📋 [Project development roadmap (`community/dev/`)](/en/community/dev/) — maintenance cadence, release governance, site evolution. +- 📦 [Changelogs (`changelogs/`)](https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP/tree/main/changelogs) — what changed in each released version. + +> In short: the **learning roadmap** (this page) answers "how should I learn"; the **project roadmap** answers "what is this project doing". diff --git a/documents/en/tags.md b/documents/en/tags.md index 4ee810381..37bfc5683 100644 --- a/documents/en/tags.md +++ b/documents/en/tags.md @@ -3,36 +3,37 @@ title: Tag Index description: Browse all tutorial articles by tag translation: source: documents/tags.md - source_hash: 36b54d3c46e2607351270318e900b189af5be20f1a1a94605825a39a66144833 - translated_at: '2026-05-26T10:20:29.002844+00:00' + source_hash: d6adb9e10f3e515ead1f62d14abd5e9c56317c4cdb2ef769ec99d333e0b4cff7 + translated_at: '2026-06-14T00:14:49.626010+00:00' engine: anthropic - token_count: 198 + token_count: 208 --- # Tag Index -> The tag feature is under construction. You can browse the tutorial content by volume. +> The tag feature is currently under construction. You can browse the tutorial content by volume. ## Core Curriculum -- [Volume 1: Fundamentals](/vol1-fundamentals/) — A systematic introduction to C++ basics from scratch -- [Volume 2: Modern Features](/vol2-modern-features/) — In-depth exploration of core C++11/14/17 features -- [Volume 3: Standard Library In-Depth](/vol3-standard-library/) — Deep dive into STL containers, iterators, and algorithms, with source code analysis -- [Volume 4: Advanced Topics](/vol4-advanced/) — Advanced C++20/23/26 features +- [Volume 1: Fundamentals](/vol1-fundamentals/) — Start from scratch and systematically learn the basics of C++ +- [Volume 2: Modern Features](/vol2-modern-features/) — Deep dive into C++11/14/17 core features +- [Volume 3: Standard Library Deep Dive](/vol3-standard-library/) — In-depth explanation and source code analysis of STL containers, iterators, and algorithms +- [Volume 4: Advanced Topics](/vol4-advanced/) — C++20/23/26 advanced features ## Engineering Practice -- [Volume 5: Concurrent Programming](/vol5-concurrency/) — From thread primitives to asynchronous coroutines -- [Volume 6: Performance Optimization](/vol6-performance/) — CPU caches, SIMD, assembly reading, and benchmarking -- [Volume 7: Engineering Practice](/vol7-engineering/) — CMake, package management, testing, and DevOps +- [Volume 5: Concurrency](/vol5-concurrency/) — From threading primitives to asynchronous coroutines +- [Volume 6: Performance Optimization](/vol6-performance/) — CPU cache, SIMD, assembly reading, benchmarking +- [Volume 7: Engineering Practice](/vol7-engineering/) — CMake, package management, testing, DevOps ## Domain Extensions -- [Volume 8: Domain Applications](/vol8-domains/) — Embedded systems, network programming, GUI, and data storage -- [Volume 9: Open-Source Project Study](/vol9-open-source-project-learn/) — Analyzing real-world open-source project source code +- [Volume 8: Domain Applications](/vol8-domains/) — Embedded systems, network programming, GUI, data storage +- [Volume 9: Open Source Project Study](/vol9-open-source-project-learn/) — Analyzing real-world open source project source code ## Supplementary Content -- [Compilation and Linking In-Depth](/compilation/) — Compilation, linking, static libraries, and shared libraries -- [End-to-End Practical Projects](/projects/) — Writing an STL from scratch, an HTTP server, a GUI framework, and more +- [Compilation and Linking Deep Dive](/compilation/) — Compilation, linking, static libraries, dynamic libraries +- [Comprehensive Hands-on Projects](/projects/) — Writing an STL, HTTP server, GUI framework, etc. +- [Community Articles](/community/) — Entry point for community submissions, review acceptance, and mainline integration - [C++ Quick Reference](/cpp-reference/) — Quick reference for common C++ features -- [Glossary](/appendix/terminology) — Chinese-English technical terminology mapping +- [Glossary](/appendix/terminology) — Chinese-English technical term comparison diff --git a/documents/en/vol10-open-lecture-notes/cppcon/2025/01-concept-based-generic-programming/01-type-safety-and-number-concept.md b/documents/en/vol10-open-lecture-notes/cppcon/2025/01-concept-based-generic-programming/01-type-safety-and-number-concept.md index a42032b35..91b480c28 100644 --- a/documents/en/vol10-open-lecture-notes/cppcon/2025/01-concept-based-generic-programming/01-type-safety-and-number-concept.md +++ b/documents/en/vol10-open-lecture-notes/cppcon/2025/01-concept-based-generic-programming/01-type-safety-and-number-concept.md @@ -1,7 +1,7 @@ --- title: Type Safety, Number Constraints, and Bounds Checking -description: CppCon 2025 Talk Notes — From implicit narrowing conversions to Number - wrappers, and then to safe_int and checked_span +description: CppCon 2025 Talk Notes — From implicit narrowing conversions to `Number` + wrapper types, then to `safe_int` and `checked_span` conference: cppcon conference_year: 2025 talk_title: Concept-based Generic Programming @@ -21,10 +21,10 @@ chapter: 1 order: 1 translation: source: documents/vol10-open-lecture-notes/cppcon/2025/01-concept-based-generic-programming/01-type-safety-and-number-concept.md - source_hash: 1aad64ff7c3d5c3b94fb383a5778e0a13f491a51c687b3e2836c07f1ad7a9ceb - translated_at: '2026-06-13T11:45:25.430298+00:00' + source_hash: 8544c9e61cc4d54dcd89cd940ed7586cd254287c34b28993472cfb611ca5e201 + translated_at: '2026-06-14T00:15:24.728382+00:00' engine: anthropic - token_count: 8929 + token_count: 8924 --- # From Manual Checks to Implicit Guards @@ -32,11 +32,11 @@ translation: A quick note: this section is an expansion based on CppCon talks. The links above point to their video series on YouTube; users in China can watch via the Bilibili links. ::: -Generic programming in C++ dates back to 1991 when templates were introduced to the language (C++ Release 3.0). Stroustrup's primary motivation for designing templates was to replace C preprocessor macros to implement type-safe generic containers. In *The Design and Evolution of C++*, he wrote that macros "fail to obey scope and type rules and don't interact well with tools," whereas templates were designed to be "as efficient as macros" but type safe. +Generic programming in C++ dates back to 1991 when templates were introduced into the language (C++ Release 3.0). Stroustrup's primary motivation for designing templates was to replace C preprocessor macros with type-safe generic containers. In *The Design and Evolution of C++*, he wrote that macros "fail to obey scope and type rules and don't interact well with tools," whereas templates were designed to be "as efficient as macros" but type safe. -But the story took an unexpected turn in 1994. Erwin Unruh presented a legal C++ program at a C++ committee meeting that wouldn't even compile, yet the compiler output a sequence of prime numbers line by line in the error messages. The entire committee then realized that templates had inadvertently constituted a Turing-complete compile-time computation system. The following year, Todd Veldhuizen published a paper systematically describing this technique, naming it **Template Metaprogramming**. Thus, templates evolved from a "type-safe macro replacement" to an indispensable compile-time abstraction mechanism in C++. +But the story took an unexpected turn in 1994. Erwin Unruh presented a piece of valid C++ code at a C++ committee meeting that wouldn't even compile, yet the compiler output a sequence of prime numbers line by line in the error messages. The entire committee realized that templates had inadvertently constituted a Turing-complete system for compile-time computation. The following year, Todd Veldhuizen published a paper systematically describing this technique and named it **Template Metaprogramming**. Thus, templates evolved from a "type-safe macro replacement" to an indispensable compile-time abstraction mechanism in C++. -Template error messages often span hundreds of lines and are notoriously unreadable—this is why many C++ developers shy away from generic programming. However, as project scale grows, code without generics becomes too repetitive to maintain. In this article, we start from the basic motivations of generic programming and arrive at a concrete, actionable type safety issue—implicit narrowing conversion. +Template error messages often span hundreds of lines and are notoriously unreadable—this is why many C++ developers shy away from generic programming. However, as project scale grows, code without generics becomes so repetitive that it's hard to maintain. In this article, we start from the basic motivation of generic programming and work our way to a concrete, actionable type safety issue—implicit narrowing conversion. The experimental environment for this article is Arch Linux WSL, GCC 16.1.1. Here is the environment information: @@ -56,21 +56,21 @@ Linux Charliechen 6.6.114.1-microsoft-standard-WSL2 #1 SMP PREEMPT_DYNAMIC Mon D ``` -## First, let's clarify what generic programming aims to do +## First, let's clarify what generic programming is actually trying to achieve -The effect of generic programming is to make code more general and more abstract—this is only half right. Alex Stepanov (father of the STL) pointed out that the goal of generic programming is to "express ideas in the most general, most efficient, and most flexible way." The key is expressing ideas, not abstraction for abstraction's sake. Treating means as ends is a common pitfall in programming—another typical example is the abuse of design patterns. +The effect of generic programming is to make code more general and more abstract—this is only half right. Alex Stepanov (father of the STL) pointed out that the goal of generic programming is to "express ideas in the most general, efficient, and flexible way," and the key is expressing ideas, not abstraction for abstraction's sake. Treating means as ends is a common pitfall in programming—another typical example is the abuse of design patterns. -This distinction is important. We don't design code starting from an abstract model; instead, we start from concrete, efficient algorithms, discover commonalities, and then extract them. Moreover, performance cannot be sacrificed, as a significant part of C++'s existence relies on this. As hardware gets stronger, our expectations for software expand rapidly, yet semiconductor processes seem to have hit a bottleneck, leaving less and less room for sloppy code. +This distinction is important. We don't design code starting from an abstract model; we start from concrete, efficient algorithms, discover commonalities, and then extract them. Moreover, performance cannot be sacrificed, as a large part of C++'s significance lies in this. As hardware gets stronger, our expectations for software are skyrocketing, while semiconductor processes seem to have hit a bottleneck, leaving less and less room for sloppy coding. -Generic programming demands more from us: it requires us to perceive reusable patterns in abstract domains. Its bottom line is—after abstraction, performance must not be worse than a hand-written concrete version. Otherwise, there is no point in introducing generic programming. Writing code itself belongs to the "getting the job done" layer of the需求 hierarchy; do not do extra work. If a certain part won't be reused and is sensitive to performance, don't introduce generics. +Generic programming demands more from us: it requires us to see reusable patterns in abstract domains. And its bottom line is—after abstraction, performance must not be worse than a hand-written specific version. Otherwise, there is no point in introducing generic programming. Writing code itself is about getting the job done; don't do unnecessary work. If a piece of code won't be reused and is performance-sensitive, don't introduce generics. -## Alex Stepanov's C++ Design Standards +## Alex Stepanov's C++ design criteria -Around 1994, Stepanov proposed three design standards: first is generality, where good generic components should express usages even the designer hadn't thought of; second is uncompromised efficiency, where writing system-level code in C++ should match C, and writing linear algebra should match Fortran; third is statically typed interfaces, where checks happen at compile time, not leaving errors to runtime. Later, he added two very practical requirements: compile time shouldn't be so long that one goes for coffee (header-only libraries find this hard to guarantee), and the learning curve shouldn't be so steep that it requires a MIT PhD to get started—as for whether C++ achieved this, we all have our own thoughts. +Stepanov proposed three design criteria around 1994: first is generality, good generic components should express usages even the designer didn't think of; second is uncompromising efficiency, writing system-level code in C++ should match C, and linear algebra should match Fortran; third is statically typed interfaces, checked at compile time, not leaving errors to runtime. Later, he added two very practical requirements: compile time shouldn't be so long that one can go for coffee (header-only libraries find this hard to guarantee), and the learning curve shouldn't be so steep that it requires a MIT PhD to get started—as for whether C++ achieved this, everyone knows the answer. ## Implicit Narrowing Conversion: A Classic Type Safety Trap -With the motivation covered, let's start with a specific problem. The introduction of a concept must have a corresponding problem scenario, otherwise it's a castle in the air. Look at this code: +With the motivation out of the way, let's start with a specific problem. The introduction of a concept must have a corresponding problem scenario, otherwise, it's a castle in the air. Look at this code: ```cpp #include @@ -94,17 +94,17 @@ int main() { This code uses C++23 syntax to ensure all compilers can compile it directly. -On my machine, the result is `overflow = -25536`, `int_pi = 3`. The compiler doesn't give a single warning (unless you enable `-Wall -Wextra`, but many projects don't). This kind of bug is particularly insidious: the code runs, but the result is wrong, and it often doesn't reveal itself with small data sets, only surfacing after deployment. +On my machine, the result is `overflow = -25536`, `int_pi = 3`. The compiler doesn't give a single warning (unless you turn on `-Wall -Wextra`, but many projects don't). This kind of bug is particularly insidious: the code runs, but the result is wrong, and it often doesn't show up when data volumes are small, only surfacing after going live. -Many people think "this is just a C++ feature, just be careful." But relying on human diligence is unreliable. Bjarne Stroustrup himself said he wanted to solve this problem back then but couldn't, and the C language camp wouldn't budge. So as users, can we defend against it ourselves? +Many people think "this is just a C++ feature, just be careful." But relying on human diligence is unreliable. Bjarne Stroustrup himself said he wanted to solve this problem back then but couldn't, and the C camp wouldn't let him change it. So as users, can we prevent it ourselves? -## Using C++20 Concepts to Model "Numbers" +## Using C++20 concepts to model "Numbers" -C++20 gives us a new weapon: concepts. Its essence is simple—a concept is a compile-time evaluated boolean predicate, taking a type as input and outputting true or false. Put another way: it lets the compiler understand a "concept" without us needing to describe it in complex natural language. +C++20 gives us a new weapon: concepts. Its essence is simple—a concept is a compile-time evaluated boolean predicate, input is a type, output is true or false. Another way to put it: it lets the compiler understand a "concept" without us needing to describe it in complex natural language. -The standard library already defines some basic concepts, such as `std::integral` and `std::floating_point`, which judge whether a type is an integer type or a floating-point type. These aren't new inventions; the first edition of K&R C distinguished int and float, but now we have a language-level, compile-time queryable representation. +The standard library already defines some basic concepts, like `std::integral` and `std::floating_point`, which judge whether a type is an integer type or a floating-point type. These aren't new inventions; the first edition of K&R C distinguished between int and float, but now we have a language-level, compile-time queryable representation. -Let's first write a simple concept to express the idea of a "number": +Let's first write a simple concept to express the concept of "number": ```cpp #include @@ -121,17 +121,17 @@ static_assert(number, "char 也是整数类型,所以是 number"); static_assert(!number, "string 不是 number"); ``` -There is a syntactic detail worth explaining here: `std::integral` looks like a function call, but it isn't. `std::integral` is a concept, `` instantiates it with type T, and the value of the entire expression is a compile-time bool. You cannot write `std::integral(T)`, that syntax is wrong. Just understand it as "perform the integral test on T", returning true or false. +There is a syntactic detail worth explaining here: `std::integral` looks like a function call, but it isn't. `std::integral` is a concept, `` instantiates it with type T, and the value of the whole expression is a compile-time bool. You can't write `std::integral(T)`, that syntax is wrong. Just understand it as "perform the integral test on T", returning true or false. -Running the code above, all four `static_assert` assertions pass, indicating our `number` concept basically works. +Running the code above, all four `static_assert` pass, indicating our `number` concept basically works. -## Writing a narrowing Judgment by Hand +## Write a narrowing judgment by hand Can we write a concept to judge "when assigning a value of type U to type T, will a narrowing conversion occur"? Since I'm writing this article. -First, if T's representable range is smaller than U's, narrowing is obviously possible. For example, assigning `int` to `short`, `int` can represent many more values than `short`. But how do we judge "smaller range"? The C++ standard library doesn't directly give us a concept like "range of a type", but `` has `std::numeric_limits`, which can query the min and max of various types. If U is floating-point and T is an integer, the fractional part will definitely be lost, which is also narrowing. +First, if T's representation range is smaller than U's, narrowing is obviously possible. For example, assigning `int` to `short`, `int` can represent many more values than `short`. But how to judge "smaller range"? The C++ standard library doesn't directly give us a concept for "type's value range", but `` has `std::numeric_limits`, which can query the min and max of various types. If U is floating-point and T is integer, the fractional part will definitely be lost, this is also narrowing. -There is another easily overlooked situation: U and T are both integers, the size is the same (e.g., both 32-bit), but one is signed and the other is unsigned. Assigning a negative number to an unsigned type will cause problems. Writing these rules into code: +There's another easily overlooked situation: U and T are both integers, the size is the same (e.g., both 32-bit), but signedness differs, then assigning a negative number to an unsigned type will also cause problems. Writing these rules into code: ```cpp #include @@ -172,35 +172,35 @@ static_assert(!narrowing_assign, "float -> double 不是窄化"); static_assert(!narrowing_assign, "int -> int 不是窄化"); ``` -Compile and run, all six `static_assert` assertions pass. We can use the last `!narrowing_assign` to verify the logic: assigning the same type, in case 1, `smaller_range` `max() < max()` is false, `min() > min()` is also false, so it doesn't trigger; case 2 requires U to be floating and T to be integer, which isn't satisfied; case 3 requires different signedness, `int` and `int` are obviously the same. All three branches are false, the whole thing is false, and after negation `static_assert` passes—this matches our intuition that "same type assignment isn't narrowing". +Compile and run, all six `static_assert` pass. We can use the last `!narrowing_assign` to verify the logic: assigning the same type, in case 1, `smaller_range` in `max() < max()` is false, `min() > min()` is also false, so it doesn't trigger; case 2 requires U is floating-point and T is integer, not satisfied; case 3 requires signedness differs, `int` and `int` are obviously the same. All three branches are false, the whole thing is false, negated `static_assert` passes—this matches our intuition that "same type assignment doesn't narrow". -Another point worth mentioning: where `&&` and `||` are mixed in `narrowing_assign`, parentheses must be added. Because `&&` has higher precedence than `||`, without parentheses, `number && number` would only constrain the first `||` branch, and the latter two branches might be evaluated on non-number types—although the result happens to be correct for current test cases, semantically it's wrong. Adding parentheses makes the three branches a whole, then uniformly constrained by `number && number`, making the logic rigorous. +One more thing worth mentioning: where `&&` and `||` are mixed in `narrowing_assign`, parentheses must be added. Because `&&` has higher precedence than `||`, without parentheses, `number && number` only constrains the first `||` branch, and the latter two branches might be evaluated on non-number types—although the result happens to be correct for current test cases, semantically it's wrong. Adding parentheses makes the three branches a whole, then uniformly constrained by `number && number`, the logic is rigorous. -## Some Edge Cases Need to Be Clear +## Some edge cases need to be thought through The implementation above covers most scenarios, but there are details worth mentioning. For example, conversion between floating-point numbers: `double` to `float`, does it count as narrowing? From a precision perspective, of course, because `double` can represent more significant digits than `float`. But in the current implementation, `smaller_range` will judge `numeric_limits::max() < numeric_limits::max()`, which is true, so it will be correctly identified as narrowing. Another example is `char` to `unsigned char`. The signedness of `char` is implementation-defined (signed on some platforms, unsigned on others). If `char` is signed on the platform, then `signed_integral != signed_integral` is true, and it will be identified as narrowing. This is actually reasonable, because if `char` is -1, assigning it to `unsigned char` becomes 255. -However, note that this implementation isn't 100% rigorous. The standard's definition of narrowing conversion (in the C++11 list initialization rules) is more detailed than what's written here, for example, considering whether the value is within the integer range when converting from floating-point to integer. But as a starting point, this concept can already block most pitfalls. We can improve it gradually. +However, note that this implementation is not 100% rigorous. The standard's definition of narrowing conversion (in C++11 list initialization rules) is more detailed than what's written here, for example, considering whether the value is within the integer range when converting from floating-point to integer. But as a starting point, this concept can already block most pitfalls for us. It can be improved gradually. -At this point, we can summarize one thing: concepts aren't some profound metaprogramming trick, they are just a mechanism to "write constraints on types as compile-time checkable boolean expressions". Previously, writing templates meant relying on documentation and naming conventions (e.g., "please pass a random access iterator") for constraints, the compiler didn't care, and if you passed the wrong thing, you got a pile of gibberish. Now with concepts, the compiler can tell you "the type you passed doesn't meet the requirements" immediately, and the error message is human-readable. +At this point, we can summarize one thing: concepts aren't some profound metaprogramming trick, they are just a mechanism to "write constraints on types as compile-time checkable boolean expressions". Previously when writing templates, constraints relied entirely on documentation and naming conventions (e.g., "please pass a random access iterator"), the compiler didn't care, if you passed the wrong thing, it would spit out a pile of gibberish. Now with concepts, the compiler can tell you "the type you passed doesn't meet the requirements" immediately, and the error message is human-readable. -The next step is to apply this `narrowing_assign` concept to actual functions to make a safe assignment wrapper—that's the content of the next section. At least the core idea of "using concepts to express type constraints" is sorted out here. +The next step is to apply this `narrowing_assign` concept to actual functions to make a safe assignment wrapper—this is the content of the next section. At least the core idea of "using concepts to express type constraints" is sorted out here. --- # From Manual Checks to Implicit Guards: Stuffing Narrowing Checks into Types -In the previous section, we figured out the rules for judging narrowing conversion. It's almost impossible to run those rules through your head every time you write code—when signed and unsigned are mixed, which one is bigger, will it overflow, can the positive part be represented, just thinking about these is dizzying. The speaker said writing this out by hand takes about a page of paper, and it's messy and tricky. +In the previous section, we figured out the judgment rules for narrowing conversion. If you run these rules through your head every time you write code, it's almost impossible—when signed and unsigned are mixed, which one is bigger, will it overflow, can the positive part be represented, just thinking about these is dizzying. The speaker said writing this thing out by hand takes about a page, and it's very messy and tricky. So the task for this section is: turn that page of messy logic into real running code, and then hide it so you don't feel its existence when writing code normally. -## First, Translate the Judgment Logic into Code +## First, translate the judgment logic into code -An intuition is: to judge whether assigning a value from type U to type T will cause narrowing, just use a `static_cast` and compare. But thinking carefully, it's not that simple at all—when signed and unsigned are mixed, the comparison itself has traps. So we need an honest, step-by-step function. +An intuition is: to judge whether assigning a value from type U to type T will cause narrowing, just use a `static_cast` and compare. But think carefully, that's not it at all—when signed and unsigned are mixed, the comparison itself has traps. So we need an honest, step-by-step function. -The idea is: do as much exclusion work as possible at compile time, filtering out those situations where "narrowing absolutely cannot happen", leaving only the paths that truly need runtime checks. This is actually what generic programming emphasizes—don't do work at runtime that shouldn't be done. +The idea is: do as much exclusion work as possible at compile time, filtering out those situations where "narrowing absolutely cannot happen", leaving only the paths that really need runtime checking. This is actually what generic programming emphasizes—don't do work at runtime that shouldn't be done. ```cpp #include @@ -276,13 +276,13 @@ constexpr bool would_narrow(U u) noexcept { } ``` -Looking back at this function, the boundary between how much can be excluded at compile time and how much must be checked at runtime when signed and unsigned are mixed really needs careful thought. There's a pitfall easy to step into: simply using round-trip (convert there and back) to detect narrowing fails during signed→unsigned conversion—because `int(-1) → unsigned(4294967295) → int(-1)` is completely reversible in two's complement, round-trip can't detect it. So you must explicitly check "is the source value negative" before the round-trip. `if constexpr` plays a key role here—branches determined at compile time won't generate code at all, so there won't be a bunch of useless comparison instructions. +Looking back at this function, the boundary between how much can be excluded at compile time and how much must be checked at runtime when signed and unsigned are mixed really needs careful thought. There's an easy pitfall: simply using round-trip (convert then convert back) to detect narrowing fails during signed→unsigned conversion—because `int(-1) → unsigned(4294967295) → int(-1)` is completely reversible in two's complement, round-trip can't detect it. So you must explicitly check "is the source value negative" before the round-trip. `if constexpr` plays a key role here—branches that can be determined at compile time won't generate code at all, there won't be a bunch of useless comparison instructions. -## What to do when narrowing happens? Throw an Exception +## What to do when narrowing occurs? Throw an exception -With the judgment logic, the next decision is: how to handle it after detecting narrowing? +With the judgment logic in place, the next decision is: how to handle it after detecting narrowing? -The speaker's solution is very direct—throw an exception. After compile-time filtering, the probability of narrowing actually triggering at runtime is extremely low. In most code, types match, and they are excluded at compile time; for those remaining that need runtime checks, the vast majority won't actually overflow. Maybe it triggers once in a million calls, which is exactly the scenario exceptions excel at—handling extremely rare exceptional situations. +The speaker's solution is very direct—throw an exception. After compile-time filtering, the probability of narrowing actually triggering at runtime is extremely low. In most code, types match, excluded at compile time; for those remaining that need runtime checks, the vast majority won't actually overflow. Maybe one in a million calls triggers it, this is exactly the scenario where exceptions excel—handling extremely rare exceptional situations. ```cpp template @@ -336,11 +336,11 @@ Run it and see the output: a = 42, b = 100 ``` -Great, everything that should be blocked was blocked. But the problem arises—you can't write `narrow_convert(xxx)` at every assignment location. The code becomes verbose, and it's completely impossible to maintain consistency. Relying on programmers to consciously add checks will inevitably result in漏网之鱼. Some places have them, some are forgotten, and bugs hide in those forgotten places. +Great, everything that should be blocked is blocked. But the problem arises—you can't write `narrow_convert(xxx)` at every assignment location. The code becomes verbose, and it's completely impossible to maintain consistency. Relying on programmer self-discipline to add checks, there will definitely be leaks. Some places add them, some forget, and bugs hide in those forgotten places. -## Stuffing the Check into the Type: Number +## Stuff the check into the type: Number -So the real solution is—make the check implicit. Define a wrapper type `Number` that automatically performs narrowing checks when constructed. After that, this `Number` is used just like a normal `T`, but you don't worry about narrowing problems, because if the construction doesn't pass, this object doesn't exist at all. +So the real solution is—make the check implicit. Define a wrapper type `Number`, it automatically does narrowing checks when constructed. After that, this `Number` is used just like a normal `T`, but without worrying about narrowing issues, because if the construction doesn't pass, this object doesn't exist at all. ```cpp template @@ -363,7 +363,7 @@ public: }; ``` -You see, this class itself has just this much stuff. It looks like demo code, but it really works. Let's try: +You see, this class itself has just that much stuff. It looks like demo code, but it really works. Let's try: ```cpp int main() { @@ -401,13 +401,13 @@ sum = 142 捕获到: narrowing conversion detected ``` -At this point, a key design idea emerges: we used to think template metaprogramming and type systems were different things, but in fact, the type system itself is the best place to do checks. No need to remember where to check and where not to, just use `Number` instead of `T`, and the check happens automatically. And because of the compile-time `if constexpr` branch, paths that don't need checking (like same-type assignment) won't even generate judgment code—zero overhead. +At this point, we can see a key design idea: previously we thought template metaprogramming and the type system were two different things, but in fact, the type system itself is the best place to do checks. No need to remember where to check and where not to, just use `Number` instead of `T`, and the check happens automatically. And because of the compile-time `if constexpr` branch, those paths that don't need checking (like same-type assignment) won't even generate judgment code, zero overhead. -## But Being Able to Construct Isn't Enough, It Needs Arithmetic +## But being able to construct isn't enough, it needs to do arithmetic -If a numeric type can only be constructed but not calculated, what's the difference between it and a constant? So we need to add arithmetic operators to `Number`. But there's a problem here: `Number` plus `Number` should return what? You can't just return a type, you need rules. +If a numeric type can only construct but not calculate, what's the difference between it and a constant? So we need to add arithmetic operators to `Number`. But there's a problem here: `Number` plus `Number` should return what? You can't just return a type, you need rules. -There's a thing in the standard library called `std::common_type`, which does exactly this—given two types, telling you what type to use when doing arithmetic operations on them. For example, `common_type_t` is `double`, `common_type_t` is `unsigned int` on most platforms. We use it directly: +There's a thing in the standard library called `std::common_type`, it's exactly for this—given two types, telling you what type to use when doing arithmetic operations on them. For example, `common_type_t` is `double`, `common_type_t` is `unsigned int` on most platforms. We use it directly: ```cpp #include @@ -496,8 +496,8 @@ Output: 加法溢出捕获到: narrowing conversion detected ``` -:::warning Original Text Error Correction: unsigned arithmetic overflow won't be detected by narrow_convert -In the output above, the last line "addition overflow caught" will **not appear** in actual compilation and running. Actual test result (GCC 16.1.1, C++20): +:::warning Original text error correction: unsigned arithmetic overflow won't be caught by narrow_convert +In the output above, the last line "addition overflow caught" **will not appear** in actual compilation and execution. Actual test result (GCC 16.1.1, C++20): ```text Raw unsigned sum: 705032704 @@ -505,9 +505,9 @@ Would narrow? 0 No exception thrown! overflow = 705032704 ``` -The reason is: arithmetic operations of `unsigned int + unsigned int` in C++ are **wrapping** (well-defined wrapping), the result of `3000000000u + 2000000000u` is `705032704`—a legal `unsigned int` value. Subsequently, `narrow_convert(705032704u)` detects same-type assignment, `would_narrow` directly returns false, and the exception isn't thrown at all. +The reason is: arithmetic for `unsigned int + unsigned int` in C++ is **wrapping** (well-defined wrapping), the result of `3000000000u + 2000000000u` is `705032704`—a legal `unsigned int` value. Subsequently, `narrow_convert(705032704u)` detects same-type assignment, `would_narrow` directly returns false, and the exception isn't thrown at all. -This is a fundamental limitation of the current `Number` design: `narrow_convert` can only detect **narrowing conversions during assignment**, not **overflow of the arithmetic operation itself**. To detect overflow, you need to use compiler built-ins (like `__builtin_add_overflow`) or manual checks: +This is a fundamental limitation of `Number`'s current design: `narrow_convert` can only detect **narrowing conversions during assignment**, it cannot detect **overflow of the arithmetic operation itself**. To detect overflow, you need to use compiler built-ins (like `__builtin_add_overflow`) or manual checks: ```cpp template @@ -528,22 +528,22 @@ constexpr T safe_add(T a, T b) { } ``` -See verification code in [01-06-overflow-not-caught.cpp](https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP/blob/main/code/volumn_codes/vol10/cppcon/2025/01-concept-based-generic-programming/01-06-overflow-not-caught.cpp). +See [01-06-overflow-not-caught.cpp](https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP/blob/main/code/volumn_codes/vol10/cppcon/2025/01-concept-based-generic-programming/01-06-overflow-not-caught.cpp) for verification code. ::: -Looking at the last overflow capture example—we need to note that `narrow_convert` can only intercept narrowing **during type conversion**, it is powerless against overflow of the same-type arithmetic operation itself (like the wrapping of `unsigned int + unsigned int`). `common_type_t` is just `unsigned int` itself, the operation result has already wrapped into a legal value before being assigned to `Number`. To fully defend against arithmetic overflow, additional mechanisms are needed (like compiler built-in overflow check functions), which is outside the scope of `narrow_convert`. +Looking at the last overflow capture example—we need to note that `narrow_convert` can only intercept narrowing **during type conversion**, for overflow of the same-type arithmetic operation itself (like wrapping of `unsigned int + unsigned int`), it's powerless. `common_type_t` is just `unsigned int` itself, the operation result has already wrapped into a legal value before being assigned to `Number`. To fully defend against arithmetic overflow, additional mechanisms are needed (like compiler built-in overflow check functions), which is beyond `narrow_convert`'s responsibility. At this point, from manual judgment rules, to runtime check functions, to exception handling strategies, to wrapper types and arithmetic operations, this line is finally connected. The key is to understand these things as a complete narrowing defense system, not isolated knowledge points. --- -# Don't Reinvent the Wheel: Standard Library Function Objects + Eliminating Comparison Traps +# Don't reinvent the wheel: Function objects in the standard library + eliminating comparison traps -To implement a safe integer type, intuitively you have to write addition, subtraction, multiplication, division, and comparison operations all by hand—just thinking about it is a headache. But actually, the standard library has long prepared `std::plus`, `std::multiplies` and other function objects, each just a few lines of code, not black magic at all. Of course, reinventing the wheel counts as a traditional C++ art form. +To implement a set of safe integer types, intuitively you have to write addition, subtraction, multiplication, division, and comparison operations all by hand, just thinking about it gives you a headache. But actually, the standard library has long prepared `std::plus`, `std::multiplies` and other function objects, each just a few lines of code, not black magic at all. Of course, reinventing the wheel counts as a traditional C++ art. -## First, See How to Write Operators +## First, let's see how to write operators -A common misconception is: to overload `operator+`, `operator*` for a custom type, you have to write a bunch of `friend` functions inside or outside the class, handling various boundary conditions in each function. But actually, you just need to use the function objects from the standard library. +A common misconception is: to overload `operator+`, `operator*` for custom types, you have to write a bunch of `friend` functions inside or outside the class, handling various boundary conditions in each function. But actually, you just need to use the function objects from the standard library. ```cpp #include @@ -565,11 +565,11 @@ struct safe_int { }; ``` -You will find the key here is: `std::plus{}` is a function object, and when you call it, if an inappropriate type conversion happens (like mixing signed and unsigned), it will be blocked by the rules we set up earlier. The operation logic itself doesn't need worry, the standard library has already written it, we just handle "intercept" and "let pass". +You will find the key here is: `std::plus{}` is a function object, when calling it, if an unintended type conversion happens (like mixing signed and unsigned), it will be blocked by the rules we set up earlier. The operation logic itself doesn't need concern, the standard library has already written it, we just "intercept" and "release". -## Comparison Operations: The Heavy Disaster Area for Signed/Unsigned Mixing +## Comparison operations: the hardest hit area for signed/unsigned mixing -Operator overloading itself isn't hard, but comparison operations are the real heavy disaster area for signed/unsigned mixing. Debugging a bug for a whole afternoon, only to find it was a wrong comparison line—this isn't uncommon. +Operator overloading itself isn't hard, but comparison operations are the hardest hit area for signed/unsigned mixing. Spent a whole afternoon debugging a bug, only to find it was just one wrong comparison line—this isn't uncommon. Look at this code: @@ -584,13 +584,13 @@ int main() { } ``` -Run it, the output is `0`, which is `false`. Negative less than positive, but the result is actually false? Why? The answer is that C++'s implicit conversion rules have a rule—when signed and unsigned are mixed for comparison, the signed number is converted to an unsigned number. So `-1` becomes a huge number (`4294967295`), of course it's not less than 2. This rule has existed since C was born in 1972, maybe it seemed fine at the time, but over decades who knows how many bugs it buried. +Run it, output is `0`, that is `false`. Negative less than positive, result is actually false? Why? The answer is C++'s implicit conversion rules have a rule—when signed and unsigned are mixed in a comparison, the signed number is converted to unsigned. So `-1` becomes a huge number (`4294967295`), of course it's not less than 2. This rule has existed since C was born in 1972, maybe it seemed fine at the time, but over decades who knows how many bugs it buried. -The speaker said it well: this rule should have been corrected in 1972, but by the time everyone realized how bad it was, there was too much code in the world relying on this behavior, and it couldn't be changed. To this day, we are still suffering for it. +The speaker said it well: this rule should have been corrected in 1972, but by the time everyone realized how bad it was, there was too much code in the world relying on this behavior, couldn't change it. To this day we are still suffering from it. -## Fixing the Comparison Trap by Hand +## Fix this comparison trap by hand -Since built-in types aren't reliable, let's take over comparison operations in our safe_int. The idea is straightforward: if the types on both sides are different (one signed, one unsigned), do a special judgment first; if types are the same, go directly to normal comparison. +Since built-in types aren't reliable, let's take over comparison operations in our safe_int. The idea is direct: if the types on both sides differ (one signed one unsigned), do a special judgment first; if types are the same, go straight to normal comparison. ```cpp template @@ -622,7 +622,7 @@ bool operator<(const safe_int& a, const safe_int& b) { } ``` -Here is a key point: `operator<` is written as a **templated free function** rather than a class member `friend`. The reason is that the class member `friend bool operator<(const safe_int& a, const safe_int& b)` only accepts two `safe_int` with the **same T**. And `safe_int < safe_int` is a comparison between two different template instances, the class friend can't match it at all. After writing it as a `template` free function, the compiler can correctly match this operator between `safe_int` and `safe_int`. `if constexpr` lets the compiler optimize away branches it doesn't take, zero overhead. Equality comparison, greater-than comparison follow the same idea, just write them accordingly. +There is a key point here: `operator<` is written as a **templated free function** rather than a class-internal `friend`. The reason is that the class-internal `friend bool operator<(const safe_int& a, const safe_int& b)` only accepts two `safe_int` with the same T. And `safe_int < safe_int` is a comparison between two different template instances, the class-internal friend can't match it at all. After writing it as a `template` free function, the compiler can correctly match this operator between `safe_int` and `safe_int`. `if constexpr` lets the compiler optimize away branches it doesn't take, zero overhead. Equality comparison, greater-than comparison follow the same idea, just write accordingly. Verify: @@ -638,11 +638,11 @@ int main() { ``` -## A Bigger Pit: Range Checks Silently Bypassed +## A bigger pit: range checking silently bypassed -Comparison operations are fixed, but there is a more hidden scenario. The speaker gave a span example—this pattern is very common in actual code. +Comparison operations are fixed, but there's a more hidden scenario. The speaker gave a span example—this pattern is very common in actual code. -First, background. `std::span` is essentially a "fat pointer"—a pointer to a sequence of elements plus the length of the sequence. This idea isn't new, Dennis Ritchie proposed adding boundary-carrying pointers to C (for variable-length arrays) as early as the early 1990s, calling them fat pointers, but the committee felt the runtime overhead was too high and didn't adopt it. Now C++20 finally added span,算是 a vindication decades late—although span itself doesn't do boundary checks, it provides the foundation for upper-level safety wrappers. +First, background. `std::span` is essentially a "fat pointer"—a pointer to a sequence of elements plus the length of the sequence. This idea isn't new, Dennis Ritchie proposed adding boundary-carrying pointers to C as early as the early 1990s (for variable-length arrays), called fat pointer then, but the committee felt the runtime overhead was too large and didn't adopt it. Now C++20 finally added span, it's a vindication decades late—although span itself doesn't do boundary checks, it provides the foundation for upper-level safety wrappers. Where is the problem? Look at this code: @@ -658,15 +658,15 @@ void process(std::span data) { } ``` -`max_size` is `unsigned int`, the value is 50. What happens when `50 - 500` is calculated under unsigned arithmetic? Underflow, becoming a huge number (around `4294967296 - 450`). Then `subspan` gets this huge length—and `std::span::subspan` in C++20 **has no** boundary check, it only has a precondition (violation is undefined behavior), it won't throw an exception. This means that huge number is passed directly in, the consequence is undefined behavior—it might read memory it shouldn't, might not crash, but you can't rely on span to stop it. +`max_size` is `unsigned int`, value is 50. What happens to `50 - 500` in unsigned arithmetic? Underflow, becomes a huge number (around `4294967296 - 450`). Then `subspan` gets this huge length—and `std::span::subspan` in C++20 **has no** boundary check, it only has a precondition (violation is undefined behavior), it won't throw exceptions. This means that huge number is passed directly in, the consequence is undefined behavior—might read memory it shouldn't, might not crash, but you can't count on span to stop it. -Just because of a small slip, just because of built-in type conversion rules, you completely lose the protection of range checks. Many people think span is safe enough,没想到 it was bypassed at the parameter calculation layer. +Just because of a small typo, just because of built-in type conversion rules, you completely lose the protection of range checking. Many people think span is safe enough, didn't expect it to be bypassed at the parameter calculation layer. -## Using safe_int to Give Span Real Protection +## Use safe_int to give span real protection Now we have a safe_int that can intercept all wrong conversions, can we make span's size parameter protected too? Of course. -My idea is: first define a concept representing "types that can be spanned", then require in this concept that the size type must be a safe integer. +My idea is: first define a concept representing "type that can be spanned", then require in this concept that the size type must be a safe integer. ```cpp #include @@ -710,23 +710,23 @@ struct safe_span { }; ``` -The key point is that the member variable `size_` is of type `safe_int` rather than a bare `std::size_t`. This means any operation on this size—subtraction, comparison, assignment—will go through our safety check. If someone writes `50 - 500`, safe_int will report an error at the moment of operation, rather than letting a huge number quietly flow into subspan. **We don't need to remedy this in span's boundary check, we need to eliminate the generation of wrong values at the source—the integer operation itself.** Looking back, the idea is actually simple: replace unsafe built-in integers with safe wrapper types, so errors are caught the moment they happen, not waiting for them to propagate to some boundary check. In other words—let the class truly responsible for handling handle the corresponding error, rather than letting other components cover for you. +The key point is that the member variable `size_` is of type `safe_int` not the bare `std::size_t`. This means any operation on this size—subtraction, comparison, assignment—will go through our safety check. If someone writes `50 - 500`, safe_int will report an error the moment the operation happens, rather than letting a huge number quietly slip into subspan. **We don't need to remedy this in span's boundary check, we need to eliminate the generation of wrong values from the source—integer operations themselves.** Looking back, the idea is actually simple: replace unsafe built-in integers with safe wrapper types, let errors be caught the moment they happen, rather than waiting for them to propagate to some boundary check to be discovered. In other words—let the class that should really be responsible handle the corresponding error, don't let other components cover for you. --- -# Adding Boundary Checks to Span: From Manual Defense to Type Deduction +# Add boundary checks to span: from manual defense to type deduction -The problem of array out-of-bounds has always been a headache: it runs fast, but once it goes out of bounds, the program might crash in a completely unrelated place, and then you stare at gdb for half an hour. Next, let's look at a structured index out-of-bounds checking method. +The problem of array out-of-bounds has always been a headache: it runs fast, but once it goes out of bounds, the program might crash in a completely unrelated place, and then you stare at gdb for half an hour. Next, let's look at a structured way to check subscript out-of-bounds. -## First, Clarify What We Want to Do +## First, clarify what we want to do -The core requirement is actually very simple: I have a contiguous memory area, I know how big it is, I want to automatically check if the index is out of bounds every time I access it with an index. If it's out of bounds, throw an exception immediately or be blocked by the compiler, rather than waiting for me to discover it after memory is corrupted. +The core requirement is actually very simple: I have a contiguous memory area, I know how big it is, I want to automatically check if the subscript is out of bounds every time I access it with a subscript. If it's out of bounds, throw an exception immediately or be blocked by the compiler, rather than waiting for memory to be corrupted before I find out. -Doesn't this sound like what `std::vector`'s `at()` does? But the difference is, I don't want to bear the overhead of a dynamically allocated vector, I might just have a raw pointer plus a length, or a native array, and I want to access it in the same safe way. This is the meaning of span—it doesn't own the data, it just "looks" at the data, but when looking, it can help you watch the boundaries. +Doesn't this sound like what `std::vector`'s `at()` does? But the difference is, I don't want to bear the cost of a dynamically allocated vector, I might just have a bare pointer plus a length, or a native array, and I want to access it in the same safe way. This is the meaning of span—it doesn't own the data, it just "looks" at the data, but when looking, it can help watch the boundaries. -## Write a Checked Index Access by Hand +## Write a checked subscript access by hand -Let's start with the most basic scenario. Suppose I already have a span-like thing, it holds data and size internally. What I need to do now is overload `operator[]` to make it check the range before executing the access. +Let's start with the most basic scenario. Suppose I already have a span-like thing, it holds data and size internally. What I need to do now is overload `operator[]` to make it do a range check before executing the access. ```cpp #include @@ -763,9 +763,9 @@ public: }; ``` -You see, the constructor here only accepts a pointer and a size, this is so-called "spanable"—anything that can provide a data pointer and element count can be used to initialize it. Then inside `operator[]`, one thing is done: if the index you give is greater than or equal to size, throw an exception directly. +You see, the constructor here only accepts a pointer and a size, this is so-called "spanable"—anything that can provide a data pointer and element count can be used to initialize it. Then `operator[]` does one thing: if the index you give is greater than or equal to size, throw an exception directly. -## Run It and See the Effect +## Run it and see the effect ```cpp int main() { @@ -786,7 +786,7 @@ int main() { } ``` -Running it outputs this: +Running it, the output is like this: ```text 3 @@ -795,9 +795,9 @@ Running it outputs this: At this point, you might think, this isn't special, `std::vector::at()` is just like this. Don't worry, the key point is later. -## The Problem of Negative Indices—The Signed/Unsigned Pit +## The problem of negative subscripts—the pit of signed and unsigned -There is an easily overlooked trap here. `operator[]` accepts a parameter of type `std::size_t`, which is an unsigned integer. If you pass a `-10` directly, what happens? +There is an easily overlooked trap here. `operator[]` accepts a parameter of type `std::size_t`, this is an unsigned integer. If you pass a `-10` directly, what happens? ```cpp // 你以为你在传 -10,其实编译器会做隐式转换 @@ -805,9 +805,9 @@ There is an easily overlooked trap here. `operator[]` accepts a parameter of typ // s[-10] 实际上变成了 s[18446744073709551606] 之类的鬼东西 ``` -But! If you change the parameter type to signed `ptrdiff_t`, the compiler can help you block some obvious problems at compile time. Or, if you use the standard implementation of `std::span`, it has specific requirements for the index type. +But! If you change the parameter type to signed `ptrdiff_t`, then the compiler can help you block some obvious problems at compile time. Or, if you use the standard implementation of `std::span`, it has specific requirements for the subscript type. -Let me change the writing to make the index type signed, so negative numbers can be correctly identified: +Let me change the writing, make the subscript type signed, so negative numbers can be correctly identified: ```cpp template @@ -857,11 +857,11 @@ Output: 捕获到异常: 负数下标,你想干嘛 ``` -Here it's worth noting that when using `size_t` as the index type, a negative number passed in is directly implicitly converted to an astronomical number, then either it luckily doesn't go out of bounds and reads garbage data (scarier), or it goes out of bounds and throws an exception but the error message is completely misleading. After changing to `ptrdiff_t`, a negative number is just a negative number, clear and simple. +It's worth noting here that when using `size_t` as the subscript type, a negative number passed in is directly implicitly converted to an astronomical number, then either it just happens to not go out of bounds and reads garbage data (more scary), or it goes out of bounds and throws an exception but the error message is completely misleading. After changing to `ptrdiff_t`, a negative number is a negative number, clear and clear. -However, the compiler can only block the simplest cases like literal negative numbers. In actual engineering, the real problems are often values calculated elsewhere—some function returns a -1 to indicate failure, someone forgets to check and uses it as an index. This can only be caught at runtime, but at least with this check, the program won't silently corrupt memory. +However, the compiler can only block the simplest cases like literal negative numbers. In actual projects, the real problems are often values calculated elsewhere—some function returns a -1 to indicate failure, forgetting to check and using it directly as a subscript. This can only be caught at runtime, but at least with this check, the program won't silently corrupt memory. -## Using Another Span's Element as Size—A More Realistic Scenario +## Using another span's element as size—a more realistic scenario The speaker mentioned a very practical example: you use a value from one span as the size parameter for another operation. You don't actually know what that value is, but unless it's a reasonable positive integer, it should be blocked. @@ -919,9 +919,9 @@ Output: This kind of writing is particularly common in real projects. You get a number from a config file, network protocol, user input, and then use it to decide how many elements to access. Without checking, this is a perfect security vulnerability. -## Type Deduction: Don't Repeat What the Compiler Already Knows +## Type deduction: stop repeating what the compiler already knows -At this point, every time you have to write `checked_span`, `checked_span` repeating the element type, while the compiler can obviously deduce it from the initialization parameters. This is the problem that C++17's CTAD (Class Template Argument Deduction) aims to solve. Just add a deduction guide: +At this point, every time you have to write `checked_span`, `checked_span` repeating the element type, while the compiler can deduce it from the initialization parameters. This is the problem that C++17's CTAD (Class Template Argument Deduction) was introduced to solve. Just add a deduction guide: ```cpp template @@ -982,15 +982,15 @@ int main() { } ``` -Type deduction seems like "syntactic sugar", but after writing hundreds of span-related codes in a project, you'll find that writing one less `int` isn't about saving three characters, but when you change `int` to `int64_t` later, you only need to change one place,而不是 looking all over the world for where you missed writing. +Type deduction seems like "syntactic sugar", but after writing hundreds of span-related codes in a project, you'll find that writing one less `int` isn't about saving three characters, it's that when you change `int` to `int64_t` later, you only need to change one place, not look all over the world for where you missed writing. This is a core philosophy of generic programming: don't repeat what the compiler already knows and you already know. -## Subspan and Construction from Pointers—A More Complete Toolbox +## Subspan and construction from pointers—a more complete toolbox -Just a complete span isn't enough. In actual development, you often need to cut a small piece from a large span, or construct a span from a raw pointer. +Just having a complete span isn't enough. In actual development, you often need to cut a small piece from a large span, or construct a span from a bare pointer. -First, the scenario of constructing from a pointer. Since the meaning of span is safety, isn't constructing a span from a raw pointer inherently Unsafe? Indeed, there's no way to check whether that pointer really points to that many elements—the compiler doesn't know, and runtime can't verify it either. But the key is: **constructing a span from a pointer itself will appear extremely abrupt in code reviews and static analysis tools**. If a project standard requires "all array access must go through span", then writing `span(ptr, n)` code, the reviewer can see at a glance: here is an unsafe boundary, need to watch closely. This is much easier to manage than having `ptr[i]` everywhere. +First, the scenario of constructing from a pointer. Since the meaning of span is safety, isn't constructing a span from a bare pointer inherently Unsafe? There's indeed no way to check whether that pointer really points to that many elements—the compiler doesn't know, and runtime can't verify it either. But the key is: **constructing a span from a pointer itself appears extremely abrupt in code reviews and static analysis tools**. If a project specification requires "all array access must go through span", then writing `span(ptr, n)` code, the reviewer can see at a glance: here is an unsafe boundary, needs focus. This is much easier to manage than having `ptr[i]` everywhere. ```cpp #include @@ -1059,11 +1059,11 @@ Output: 捕获: take_front: n 超过了 span 的大小 ``` -Note the way I wrote the boundary check in `take_range`: `count > s.size() - offset`. I didn't use `offset + count > s.size()` here because the latter might overflow when signed and unsigned are mixed. Although in this scenario `offset` and `count` are both `size_t` and won't overflow, developing the habit of using subtraction rather than addition for range checks can save you from pitfalls elsewhere. This is also the idea mentioned in the speech of "using numbers rather than mixing signed and unsigned". +Note the way I write the boundary check in `take_range`: `count > s.size() - offset`. I didn't use `offset + count > s.size()` here because the latter might overflow when signed and unsigned are mixed. Although in this scenario `offset` and `count` are both `size_t` and won't overflow, developing the habit of using subtraction rather than addition for range checks can save you from pitfalls in other places. This is also the idea mentioned in the speech of "using numbers rather than mixing signed and unsigned". -Similarly, these helper functions can also add deduction guides, so the call site doesn't need to write template parameters. Two lines of deduction guides, but the code reads completely differently—you see `take_front(full, 3)`, not `take_front(full, 3)`. The compiler knows `full` is `span`, it can deduce the return value is also `span`, you don't need to worry for it. +Similarly, these helper functions can also add deduction guides, so the call site doesn't need to write template parameters. Two lines of deduction guides, but the code reads completely differently—you see `take_front(full, 3)`, not `take_front(full, 3)`. The compiler knows `full` is `span`, it can deduce the return value is also `span`, you don't need to worry about it. -At this point, span's basic safe access, type deduction, and subspan slicing are all figured out. The code looks quite clean, no redundant repetition, and checks where needed. But things aren't over—there are even more complex scenarios later. +At this point, span's basic safe access, type deduction, and subspan slicing are all sorted. The code looks quite clean, no redundant repetition, checks are done where they should be. But things aren't over—there are more complex scenarios later. . The abbreviation ISO does not come from the English name—the English abbreviation would be IOS, and in French, it's OIN (*Organisation Internationale de Normalisation*). The founders felt that IOS and OIN weren't good enough, so they chose the Greek word *isos* (equal) as a unified abbreviation. This way, it's called ISO in any language. This bit of trivia doesn't have much direct relationship to C++ itself, but it explains why the abbreviation doesn't match the English full name. +ISO stands for **International Organization for Standardization** (note the American spelling "Organization," and the last word is "Standardization" not "Standards"). The abbreviation ISO does not come from the English name—the English abbreviation would be IOS, and in French, it would be OIN (*Organisation Internationale de Normalisation*). The founders felt that neither IOS nor OIN was good enough, so they chose the Greek word *isos* (meaning equal) as a unified abbreviation. This way, regardless of the language, it is called ISO. While this bit of trivia has no direct relationship to C++, it explains why the abbreviation doesn't match the English full name. -::: details Original Reference -The "About us" page on the official ISO website states: +::: details Reference Text +The original text from the ISO "About us" page: > "ISO, the **International Organization for Standardization**, brings global experts together to agree on the best ways of doing things." > @@ -45,65 +45,65 @@ Readers can visit iso.org/about-us.html to verify this. ## How Many Layers Separate ISO from C++? -ISO doesn't directly manage C++. First, it formed a joint partnership with another organization, the IEC (International Electrotechnical Commission), called JTC1. The full name is Joint Technical Committee 1. It manages information technology standards. +ISO does not manage C++ directly. First, it formed a joint venture with another organization, the IEC (International Electrotechnical Commission), called JTC1. The full name is Joint Technical Committee 1. It manages information technology standards. -Then, under JTC1, there are subcommittees, such as SC22 (Subcommittee 22), whose full name is "Programming languages, their environments and system software interfaces." Note this scope—it's not just programming languages, but also "environments" and "system software interfaces," so a whole bunch of things hang under SC22. +Then, under JTC1, there are subcommittees, such as SC22 (Subcommittee 22). The full name is "Programming languages, their environments and system software interfaces." Note this scope—it is not just programming languages, but also "environments" and "system software interfaces," so a whole bunch of things hang off SC22. -Below SC22 are the various Working Groups (WGs). Many WGs have "grayed out"—they have completed their historical missions, and the corresponding language standards are finalized. But those that are still active, looking at the list: COBOL, Fortran, Ada, C, Prolog, Linux-related items, programming language vulnerability research, and the one we care most about, C++. +Below SC22 are the various Working Groups (WGs). Many WGs have been grayed out—they have completed their historical missions, and the corresponding language standards are finished. But those that are still active include: COBOL, Fortran, Ada, C, Prolog, Linux-related items, programming language vulnerability research, and the one we care about most: C++. -C++ is WG21 here. Why number 21? This number is historically assigned; there's no special meaning, just that when it was its turn, that was the number available. +Inside this structure, C++ is WG21. Why number 21? This number is a historical allocation with no special meaning; it just happened to be the number assigned when it was its turn. ## A Notable Fact -Judging solely by the number of participants in standardization, WG21 (C++) is the largest in volume within the entire SC22 (according to the speaker's observation, if you were to draw a proportional chart based on participation numbers, other language working groups might just be a few dots, while C++ would fill the entire chart). Of course, this doesn't mean other languages aren't important; Fortran, Ada, and others remain indispensable in their respective fields (scientific computing, aerospace). However, the large number of participants directly explains why the speed and complexity of C++ standardization are what they are—many proposals, lots of discussion, and plenty of controversy. +Judging solely by the number of participants in standard setting, WG21 (C++) is the largest group within the entire SC22 (according to the speaker's observation, if you were to draw a proportional chart based on participation numbers, other language working groups might just be a few dots, while C++ would fill the entire chart). Of course, this doesn't mean other languages aren't important; Fortran, Ada, and others remain indispensable in their respective fields (scientific computing, aerospace). However, the high number of participants directly explains why the speed and complexity of C++ standardization are what they are—many proposals, many discussions, and many controversies. ## Summary of the Entire Chain -From top to bottom: ISO and IEC jointly established JTC1 (Joint Technical Committee 1, managing information technology), JTC1 set up SC22 (Subcommittee 22, managing programming languages and related items), and SC22 set up WG21 (Working Group 21, specifically managing C++). +From top to bottom: ISO and IEC jointly established JTC1 (Joint Technical Committee 1, managing information technology). JTC1 set up SC22 (Subcommittee 22, managing programming languages and related items). SC22 set up WG21 (Working Group 21, specifically managing C++). The complete formal designation is ISO/IEC JTC1/SC22/WG21. -## Why Clarifying This Chain Matters +## Why It's Meaningful to Understand This Chain -Once we clarify this chain, when we see the WG21 identifier on proposal documents, we know this is something that has gone through the formal standard-setting process under the ISO framework, not something someone decided on a whim. The "C++ Standard" transforms from a vague concept into an entity backed by a concrete organizational structure. Looking back, it's actually just a few layers of nested committees—nothing mysterious, but when you don't know it, it feels like being in the fog. +Once we understand this chain, when we see the WG21 identifier on proposal documents, we know these are things that have gone through the formal standard-setting process under the ISO framework, not something someone decided on a whim. The concept of the "C++ Standard" transforms from a vague idea into an entity backed by a specific organizational structure. Looking back, it's really just a few layers of nested committees—nothing mysterious, but without this knowledge, it feels like being in the fog. --- # The Complete Journey of a Proposal from Idea to C++ Standard -Many people's understanding of "how the C++ standard is made" might stop at the stage of "a group of big shots meeting and making the decision." In reality, the entire process is a very rigorous funnel mechanism. There are quite a few layers, but each step has clear boundaries of responsibility. +Many people's understanding of "how the C++ standard is made" might stop at the stage of "a group of experts meeting and making decisions." In reality, the entire process is a rigorous funnel mechanism with quite a few levels, but each step has clear boundaries of responsibility. ## First, Let's Clarify What's Under WG21 -When we usually say "The C++ Standards Committee," we are referring to WG21. WG21 is not a flat, large group; it has a bunch of sub-organizations attached underneath. There are those for administration, those for core specifications, those for evolution directions, and a bunch of SGs (Study Groups) whose abbreviations we often see in proposal documents but might not be clear on their specific responsibilities. The status of these study groups is not static; some are active and open to new members, while others have completed their historical missions and are completely closed. However, be aware of a cognitive trap—seeing "closed" and assuming this direction will never be mentioned again. "Closed" just means the study group itself no longer needs to exist; the conclusions it produced may have been taken over by other groups, or may be temporarily shelved. The most typical example is UB (Undefined Behavior); although the relevant study group has closed, proposals regarding UB still exist in large numbers across various groups—after all, this is a pain that people writing C++ cannot bypass. +When we usually say "The C++ Standards Committee," we are referring to WG21. WG21 is not a flat, large group; it has a bunch of sub-organizations attached to it. There are those for administration, those for core specifications, those for evolution directions, and a bunch of SGs (Study Groups) whose abbreviations we often see in proposal documents but might not be clear on their specific responsibilities. The status of these study groups is not static; some are active and open to new members, while others have completed their historical missions and are completely closed. However, watch out for a cognitive trap—seeing "closed" and assuming this direction will never be mentioned again. "Closed" just means the study group itself doesn't need to exist anymore; the conclusions it produced may have been taken over by other groups, or may be temporarily shelved. The most typical example is UB (Undefined Behavior); although the relevant study group is closed, proposals regarding UB still exist in various groups—after all, this is a pain that people writing C++ cannot bypass. ## How Far Does an Idea Have to Travel from Brain to Standard? -This part is the most interesting part of the whole process. An idea on how C++ should be changed has to go through a complete funnel mechanism to get from your brain into the standard. +This part is the most interesting part of the whole process. An idea about how C++ should be changed has to go through a complete funnel mechanism to get from your brain into the standard. -The first step is to write the idea into a formal proposal document and send it to a mailing list called a reflector. "Reflector" sounds profound, but it's actually just a mailing list with a slightly old-fashioned name. After the proposal is sent out, it is routed to the corresponding Study Group (SG). In the SG, experts in that field will review it, provide feedback, the author will go back and revise it, send it again, discuss it, and polish it back and forth. This stage is essentially about verifying, on a small scale, whether this idea is actually reliable. +The first step is to write the idea into a formal proposal document and send it to a mailing list called a reflector. "Reflector" sounds high-level, but it's actually just a mailing list with an old-fashioned name. After the proposal is sent out, it is routed to the corresponding Study Group (SG). Inside the SG, experts in that field will review it, provide feedback, and then the author goes back to revise it. After revising, send it again, discuss it again, and polish it back and forth. This stage is essentially about verifying, in a small scope, whether this idea is actually reliable. -When the discussion in the SG is basically mature, the proposal needs to be "upgraded" to enter a broader view of how it fits into the entire C++ ecosystem. At this point, it forks—if it's a library-level feature (like a new tool in a header file), it goes to LEWG (Library Evolution Working Group); if it's a language-level feature (like new syntax rules), it goes to EWG (Language Evolution Working Group). The difference between LEWG and LWG is: LEWG manages "evolution," discussing whether this feature is worth doing and how to do it more reasonably; while LWG is the "core" group that comes later, responsible for the specific standard wording. +When the discussion in the SG is basically mature, the proposal needs to "upgrade" and enter a broader scope to see how it integrates into the entire C++ ecosystem. At this point, it forks—if it's a library-level feature (like a new tool in a header file), it goes to LEWG (Library Evolution Working Group); if it's a language-level feature (like new syntax rules), it goes to EWG (Language Evolution Working Group). The difference between LEWG and LWG is: LEWG manages "evolution," discussing whether this feature is worth doing and how to do it more reasonably; whereas LWG is the "core" group that comes later, responsible for the specific standard wording. -In the evolution groups, it will undergo another round of polishing. When everyone feels the direction of the feature is right and the details are basically in place, it flows from the evolution group to the core group. Library features go to LWG, language features go to CWG. What the core groups do is very hardcore—they directly modify the C++ standard document, translating the proposal into normative text precise down to the punctuation marks. +In the evolution groups, it undergoes another round of polishing. When everyone feels the direction of the feature is right and the details are basically in place, it flows from the evolution group to the core group. Library features go to LWG, language features go to CWG. What the core groups do is very hardcore—they directly modify the C++ standard document, translating the proposal into normative text precise down to the punctuation marks. -Finally, assuming everyone in all stages is satisfied with this modification, the proposal enters the full plenary voting stage. All members of WG21 vote together. Once passed, this feature will appear in the next version of the C++ standard. From idea to landing, it may undergo several years of iteration. +Finally, assuming everyone in all stages is satisfied with this modification, the proposal enters the full vote stage. All members of WG21 vote together. After it passes, this feature will appear in the next version of the C++ standard. From idea to landing, it may undergo several years of iteration. -## The Core of the Entire Process +## The Core of the Process -After understanding this process, those SGxx, EWG, and LWG abbreviations on proposal documents aren't so headache-inducing anymore. Opening a proposal, we can consciously look at what stage it is currently in—if it's still in SG, it means it's in early exploration, and design changes are large; if it's already in LWG/CWG, it basically means the general direction is set, and only wording-level polishing remains. +After understanding this process, the abbreviations SGxx, EWG, and LWG on proposal documents are no longer so headache-inducing. Opening a proposal, we can consciously look at what stage it is currently at—if it's still in SG, it means it's in early exploration, and design changes are very large; if it has reached LWG/CWG, it basically means the general direction is set, and only wording-level polishing remains. -There is another easily overlooked detail: the action of a proposal flowing from the evolution group (EWG/LEWG) to the core group (CWG/LWG) is called "forward" in committee terminology. If you read meeting minutes, you will often see sentences like "LEWG decided to forward Pxxxx to LWG." Here, "forward" is saying the proposal has moved one step down the process. +There is also an easily overlooked detail: the action of a proposal flowing from the evolution group (EWG/LEWG) to the core group (CWG/LWG) is called "forward" in committee terminology. If you read meeting minutes, you will often see sentences like "LEWG decided to forward Pxxxx to LWG." Here, "forward" means the proposal has moved one step down the process. -The entire process is essentially a layered peer review mechanism—first verifying feasibility in a small circle, then looking at the ecosystem impact in a larger circle, and finally having the most rigorous people finalize the wording. Every step has clear boundaries of responsibility. Although slow, it is indeed steady. +The entire process is essentially a layered peer review mechanism—first verify feasibility in a small circle, then look at the ecosystem impact in a large circle, and finally have the most rigorous people finalize the wording. Every step has clear boundaries of responsibility. Although slow, it is indeed steady. --- -# How Slow Is C++ Standardization Really—A Horizontal Comparison with Other Languages +# How Slow Is C++ Standardization Really?—A Horizontal Comparison with Other Languages -Talking about the timeline of C++ standardization, many people's intuition is that C++23 should have come out in 2023, and C++26 will be in 2026. But actually, the technical work for C++23 was completed in early 2023, while ISO official publication dragged on until **October 2024** (Standard number ISO/IEC 14882:2024). The draft for C++26 still has a pile of things under discussion, and the final release will likely be delayed further. The time span from initiation to publication for each version is much longer than most people imagine—this is also a side effect of the massive scale of the C++ standardization project. +When talking about the timeline of C++ standardization, many people's intuition is that C++23 should have come out in 2023, and C++26 will be in 2026. But in reality, the technical work for C++23 was completed in early 2023, while ISO publication dragged on until **October 2024** (Standard number ISO/IEC 14882:2024). The draft for C++26 still has a pile of things under discussion, and the final release will most likely be delayed further. The time span from initiation to publication for each version is much longer than most people imagine—this is also a side effect of the massive scale of the C++ standardization project. -::: details Original Reference +::: details Reference Text ISO official standard page (iso.org/standard/83626.html): > Status: Published @@ -115,38 +115,38 @@ isocpp.org/std/the-Standard is a community-driven, community-operated reference website. Every page and every example code on it is actually maintained by someone. It is not official documentation sponsored by some big company, but a group of volunteers working on it. Normally, it can be modified and supplemented by community members, which is also why it can maintain high quality—it's not one person writing, it's countless people maintaining it together. Every time you look up a standard library component, casually look at the comments and discussions at the bottom of the page, and you can often find some very valuable information, such as known issues with a function on a specific compiler. +cppreference is a community-driven, community-operated reference website. Every page and every example code on it is maintained by actual people. It is not official documentation sponsored by some big company, but a group of volunteers working on it. Normally, it can be modified and supplemented by community members, which is also why it can maintain high quality—it's not one person writing, it's countless people maintaining it together. Every time you look up a standard library component, take a look at the comments and discussions at the bottom of the page, and you can often find some very valuable information, such as known issues of a function on a specific compiler. ## Code Sharing Platforms -Besides real-time chat communities, code sharing platforms like Compiler Explorer are extremely important in technical exchange. Put code in, generate a link, and drop it anywhere—Discord, Slack, forums, or even send it directly to a colleague. Compared to pasting a large block of code text, a Compiler Explorer link lets others click to see directly, modify directly, and run directly. The efficiency is completely different. +Besides real-time chat communities, code sharing platforms like Compiler Explorer are extremely important in technical communication. Put the code in, generate a link, and drop it anywhere—Discord, Slack, forums, or even send it directly to a colleague. Compared to pasting a large chunk of code text, a Compiler Explorer link lets others click to see directly, modify directly, and run directly. The efficiency is completely different. -When debugging problems, first put the minimal reproduction code onto Compiler Explorer, confirm it can be reproduced on multiple compilers, and then go to the community to ask—the benefit of this is that when others help you troubleshoot, they don't need to set up an environment; they can directly click the link to see what you see. +When debugging problems, first put the minimal reproduction code on Compiler Explorer, confirm it can be reproduced on multiple compilers, and then go to the community to ask—the benefit of this is that when others help you troubleshoot, they don't need to set up the environment, they can just click the link to see what you see. -## The Community Is the Core of the C++ Ecosystem +## The Community is the Core of the C++ Ecosystem -The reason C++ is fascinating is not just because the language itself is powerful, but because of the people behind it. Those who silently submit patches in open source projects, those who spend their own time maintaining cppreference, those who organize offline gatherings at their own expense, those who help novices debug code at 3 AM on Discord—it is these people who make up the C++ ecosystem. Soaking in the community, you see not only the answers to problems, but also how others think about problems, their ideas for solving them, and even their attitude towards technology. +C++ is fascinating not only because the language itself is powerful, but because of the people behind it. Those who silently submit patches to open source projects, those who spend their own time maintaining cppreference, those who organize offline gatherings at their own expense, those who help novices debug code at 3 AM on Discord—it is these people who make up the C++ ecosystem. Soaking in the community, you see not only the answers to problems, but also how others think about problems, their ideas for solving them, and even their attitude towards technology. --- -# Participating in the C++ Community—Contributions Come in More Than One Form +# Participating in the C++ Community—Contributions Come in Many Forms -Regarding "participating in the open source community," many people have a narrow understanding—thinking it's something only qualified people can do, something only big shots with their names on the committee or authors of famous libraries can talk about. But in reality, the ways to participate are far more diverse than imagined. +Regarding "participating in the open source community," many people have a narrow understanding—thinking it is something only qualified people can do, something only experts hanging their names in the committee or authors of famous libraries are worthy of talking about. But in reality, the ways to participate are far more diverse than imagined. -## "Contribution" Is Broader Than We Think +## "Contribution" is Broader Than We Imagine -Contributing to the C++ community doesn't necessarily mean writing a widely used library or submitting a proposal to the standard committee that gets adopted. The participation methods mentioned in the talk are things you can do right now: if your city doesn't have a C++ meetup, just start one yourself—you don't need to be an expert, you just need to be someone willing to get people together to chat about C++; attending a conference, even just to listen and meet a few other people using C++, is itself already participating in the community; writing an article about a pitfall you stepped in so that people behind you have fewer detours is also a contribution. +Contributing to the C++ community doesn't necessarily mean writing a widely used library or submitting a proposal to the standards committee that gets adopted. The ways of participating mentioned in the speech are things that can be done right now: if there is no C++ gathering in your city, start one yourself—you don't need to be an expert, you just need to be someone willing to get people together to chat about C++; attend a conference, even if it's just to listen and meet a few other people using C++, this in itself is already participating in the community; write an article about the pits you stepped into so that people behind you have fewer detours, this is also a contribution. -## About Getting on Stage +## About Taking the Stage -There is a very real description in the talk—standing on the speaking stage, looking back at countless faces staring at you, thinking "Why am I doing this again?" Doing technical sharing doesn't require perfection; you only need to speak about what you have truly understood and the pits you have stepped in. This is valuable enough. If you have the opportunity to share, even if you are nervous, it's worth trying once. +There is a very real description in the speech—standing on the speaking stage, looking back at the countless faces staring at you, thinking "why am I doing this again." Doing technical sharing doesn't need to be perfect, you only need to talk about things you truly understand, talk about the pits you stepped into, and that is valuable enough. If you have the opportunity to share, even if you are nervous, it is worth trying once. ## About Participating in the C++ Committee -The C++ committee is recruiting. The committee's work requires participation from people at all levels—not just experts in language design, but also feedback from actual users, people to test proposals, write use cases, and report problems. You don't need to be Bjarne Stroustrup to get in; you just need passion and willingness to invest time. +The C++ committee is recruiting. The work of the committee requires the participation of people at all levels—not just experts in language design, but also feedback from actual users, people to test proposals, write use cases, and report problems. You don't need to be Bjarne Stroustrup to get in, you just need passion and willingness to invest time. ## A Final Small Interlude -There is a very real detail in the Q&A session: the speaker referred to Barry Revzin as the person in charge of Ranges, only to be corrected on the spot—Barry Revzin has recently done a lot of work on the application layer of C++26 Reflection (he gave a talk "Practical Reflection With C++26" at CppCon), while the main author of Ranges is Eric Niebler (the speaker misspoke it as Eric Kneedler). However, strictly speaking, the main drivers of the Reflection proposal are Daveed Vandevoorde and Herb Sutter, etc., while Revzin is more on the application and teaching side. This kind of "mixing up people and their responsible areas" is common; the C++ standard committee involves too many people and sub-working groups, and even frequent participants may not be able to figure it all out clearly. The speaker mocked himself, saying "I am truly terrible," and this sense of realism actually makes people feel that this community is very down-to-earth. +There is a very real detail in the Q&A session: the speaker referred to Barry Revzin as the person responsible for Ranges, only to be corrected on the spot—Barry Revzin has recently done a lot of work on the application layer of C++26 Reflection (he gave a speech "Practical Reflection With C++26" at CppCon), while the main author of Ranges is Eric Niebler (the speaker misspoke it as Eric Kneedler). However, strictly speaking, the main drivers of the Reflection proposal are Daveed Vandevoorde and Herb Sutter, etc., while Revzin is more at the application and teaching level. This kind of "mixing up people's names and responsible areas" is very common; the C++ Standards Committee involves too many people and sub-working groups, and even frequent participants may not be able to figure it all out. The speaker self-deprecatingly said "I am really terrible," this sense of reality actually makes people feel that this community is very down-to-earth. ## The Threshold for Participating in the Community -The C++ community is not some closed circle; it is composed of every person currently using C++. The simplest contribution might just be sharing what you learned today with a colleague next to you, or answering a novice's question in the community. You don't have to wait until you are "strong enough" to participate—because by then you may have forgotten the confusion of the novice stage, and it is precisely those confusions that are the most valuable sharing content. +The C++ community is not a closed circle; it is composed of every person currently using C++. The simplest contribution might just be sharing what you learned today with a colleague next to you, or answering a novice's question in the community. You don't have to wait until you are "strong enough" to participate—because by then you may have forgotten the confusion of the novice stage, and it is precisely those confusions that are the most valuable sharing content. --- # The "Never Execute" Instruction in ARM32 Condition Codes—Orthogonal Design and Its Demise -This Q&A segment involves an interesting architectural design question. In the ARM32 instruction set, every instruction has a four-bit condition code field at the front. You can write `ADDNE` to mean "add if not equal," `MOV EQ` to mean "move if equal," without writing separate branch instructions, resulting in very high code density. Among the condition codes, there is `AL` (Always, always execute), corresponding to `0b1110`; but there is another condition code where all four bits are 1, i.e., `0b1111`, called `NV` (Never), meaning "Never." A "never execute" instruction—writing it in is just taking up space, right? +This Q&A session involves an interesting architectural design question. In the ARM32 instruction set, every instruction has a four-bit condition code field in front. You can write `ADDNE` (add if not equal) or `MOVEQ` (move if equal) without writing a separate branch instruction, resulting in very high code density. Among the condition codes, there is an `AL` (Always, always execute), corresponding to `0b1110`; but there is also a condition code where all four bits are 1, i.e., `0b1111`, called `NV` (Never), meaning "Never." An instruction that "never executes"—writing it is just taking up space, right? ::: warning Important Correction -The NV condition code only exists in **ARMv4 and earlier versions**. Starting from ARMv5, NV was officially deprecated, and the `0b1111` encoding was reassigned for unconditional instruction extensions. On ARMv7-A, using the condition code `NV` results in **UNPREDICTABLE** behavior; it no longer guarantees "never execute." The verification experiments later in this article need to target the ARMv4 architecture to get the expected results. The official ARM documentation states: +The NV condition code only exists in **ARMv4 and earlier versions**. Starting from ARMv5, NV was officially deprecated, and the `0b1111` encoding was reassigned for unconditional instruction extension. On ARMv7-A, using the condition code `NV` results in **UNPREDICTABLE** behavior, no longer guaranteeing "never execute." The verification experiments later in this article need to target the ARMv4 architecture to get the expected results. ARM official documentation text: > "Every conditional instruction contains a 4-bit condition code field, the cond field, in bits 31 to 28. This field contains one of the values **0b0000 – 0b1110**." > @@ -228,98 +228,82 @@ The NV condition code only exists in **ARMv4 and earlier versions**. Starting fr Actual verification results (arm-none-linux-gnueabihf-gcc 15.2 + qemu-arm-static): ```text -$ ./a.out -Before: 0 -After: 0 +$ arm-none-linux-gnueabihf-gcc -march=armv4 -std=c17 -O2 -static test.c -o test +$ qemu-arm-static ./test +Result: 0 ``` -Verification code is in the repository: [05-01-arm32-nv-condition.c](https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP/blob/main/code/volumn_codes/vol10/cppcon/2025/02-some-assembly-required/05-01-arm32-nv-condition.c). +Verification code in repository: [05-01-arm32-nv-condition.c](https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP/blob/main/code/volumn_codes/vol10/cppcon/2025/02-some-assembly-required/05-01-arm32-nv-condition.c). ::: ## Orthogonality—The Design Philosophy of ARM32 -The key lies in the design philosophy of ARM32: **extreme orthogonality**. Simply put, orthogonality means "the choice of each dimension is independent and can be freely combined." In ARM32, the dimension of condition codes is designed very thoroughly—every condition has its logical opposite. Equal (EQ) is the opposite of Not Equal (NE), Greater or Equal (GE) is the opposite of Less Than (LT), Unsigned Higher (HI) is the opposite of Unsigned Lower or Same (LS)... and so on. +The key lies in the design philosophy of ARM32: **extreme orthogonality**. Simply put, orthogonality means "every dimension of choice is independent and can be freely combined." In ARM32, the dimension of condition codes is designed very thoroughly—every condition has its logical opposite. Equal (EQ) is the opposite of Not Equal (NE), Greater or Equal (GE) is the opposite of Less Than (LT), Unsigned Higher (HI) is the opposite of Unsigned Lower or Same (LS)... and so on. So what is the logical opposite of "Always Execute" (AL)? Naturally, it is "Never Execute" (NV). -Since four bits can represent 16 states, the designers of the condition codes filled all 16 states, and each has a corresponding meaning. This isn't "deliberately leaving a useless one," but the inevitable result of pushing orthogonality to the extreme—it's impossible to keep only 15 and leave one empty, that wouldn't be orthogonal. The price is: in the entire instruction encoding space of ARM32, a full sixteenth of the encodings correspond to instructions that "do nothing." This is a design trade-off—using a little wasted space in exchange for conceptual perfect symmetry of the instruction set. +Since four bits can represent 16 states, the designers of the condition codes filled all 16 states, and each has a corresponding meaning. This isn't "deliberately leaving a useless one," but the inevitable result of pushing orthogonality to the extreme—it's impossible to keep just 15 and leave one empty, that wouldn't be orthogonal. The price is: in the entire instruction encoding space of ARM32, a full sixteenth (1/16) of the encodings correspond to instructions that "do nothing at all." This is a design trade-off—using a little space waste in exchange for conceptual perfect symmetry of the instruction set. This design was indeed the case in the original ARM (ARMv1 to ARMv4). But subsequent versions of ARM prove that "orthogonal to the extreme" also has a price. ## Hands-on Verification: Writing a "Never Execute" Instruction (ARMv4) -We can verify this ourselves. Because the NV condition code is only valid in ARMv4 and earlier, we need to explicitly specify the architecture version. +We can verify this thing ourselves. Since the NV condition code is only valid in ARMv4 and earlier, we need to specify the architecture version explicitly. ::: details Why can't we use ARMv7? -The valid condition code range for ARMv7-A is only `0b0000`–`0b1110`. The encoding `0b1111` has been reassigned in ARMv5+—it is either interpreted as a completely different instruction (using condition code bits to extend opcode space) or produces UNPREDICTABLE behavior. Using `NV` on ARMv7 **does not guarantee** the result is "never execute." The verification code is in the repository ([05-01-arm32-nv-condition.c](https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP/blob/main/code/volumn_codes/vol10/cppcon/2025/02-some-assembly-required/05-01-arm32-nv-condition.c)), and readers can compare tests on ARMv4 and ARMv7 targets themselves. +The valid condition code range for ARMv7-A is only `0b0000`–`0b1110`. The encoding `0b1111` was reassigned in ARMv5+—it is either interpreted as a completely different instruction (using condition code bits to extend opcode space) or produces UNPREDICTABLE behavior. Using `NV` on ARMv7 **does not guarantee** the result is "never execute." The verification code has been placed in the repository ([05-01-arm32-nv-condition.c](https://github.com/Awesome-Embedded-Learning-Studio/Tutorial_AwesomeModernCPP/blob/main/code/volumn_codes/vol10/cppcon/2025/02-some-assembly-required/05-01-arm32-nv-condition.c)), and readers can compare and test on ARMv4 and ARMv7 targets themselves. ::: The environment is Arch Linux WSL, using the cross-compilation toolchain `arm-none-linux-gnueabihf-gcc` (Arm GNU Toolchain 15.2). Note that when compiling, you need to use `-march=armv4` to ensure the semantics of the NV condition code: -First, write a simple C file: +First, write a simplest C file: -```cpp -// 05-01-arm32-nv-condition.c +```c +// test.c #include int main(void) { int result = 0; - printf("Before: %d\n", result); - - // Inline assembly: MOV R0, #5 (Always) - // We will manually modify the machine code later to change AL to NV - asm volatile( - "mov r0, #5 \n\t" - "str r0, %0" - : "=m"(result) - : - : "r0" - ); - - printf("After: %d\n", result); + printf("Result: %d\n", result); return 0; } ``` -Compile it to assembly to see what a normal `MOV` looks like (note we use `-march=armv4` here): +Compile it to assembly to see what a normal `MOV` looks like (note here we use `-march=armv4`): ```bash -arm-none-linux-gnueabihf-gcc -S -march=armv4 -masm=intel 05-01-arm32-nv-condition.c -o 05-01.s +arm-none-linux-gnueabihf-gcc -march=armv4 -S -O2 test.c -o test.s ``` -Now, let's manually construct a "never execute" `MOV`. In the ARM32 `MOV` instruction encoding format, the high four bits are the condition code. The machine code for a normal `MOV R0, #5` can be checked with `objdump`: +Now we manually construct a "Never Execute" `MOV`. In the ARM32 `MOV` instruction encoding format, the high four bits are the condition code. The machine code for a normal `MOV R0, #5` can be seen using `objdump`: ```bash -cat 05-01.s | grep -A 5 "mov r0, #5" -# Output example: mov r0, #5 @ machine code: 0xe3a00005 +$ arm-none-linux-gnueabihf-objdump -d test.s +... +e3a00005: mov r0, #5 ``` -See the `e3`? The high four bits are `e`, which is binary `1110`, corresponding to the condition code `AL` (Always). Now, change the high four bits from `e` to `f`, i.e., from `1110` to `1111`. On ARMv4, this is a "never execute" `MOV`—it is decoded, the CPU recognizes it as a MOV instruction, but because the condition code is NV, it never actually executes. +See `e3a00005`? The high four bits are `e`, which is binary `1110`, corresponding to the condition code `AL` (Always). Now change the high four bits from `e` to `f`, i.e., from `1110` to `1111`. On ARMv4, this is a "Never Execute" `MOV`—it is decoded, the CPU recognizes it as a MOV instruction, but because the condition code is NV, it never actually executes. ::: warning Reminder again -This instruction only behaves as "never execute" on ARMv4 and earlier. If executing `0xf3a00005` on ARMv5+ (including ARMv7-A), the behavior is UNPREDICTABLE. +This instruction only behaves as "never execute" on ARMv4 and earlier. If `MOVNV` is executed on ARMv5+ (including ARMv7-A), the behavior is UNPREDICTABLE. ::: -Use `.inst` to directly stuff the machine code in for verification: +Use inline assembly to stuff the machine code directly to verify: -```cpp -// 05-01-arm32-nv-condition.c (modified) +```c +// test_nv.c #include int main(void) { int result = 0; - printf("Before: %d\n", result); - - // 0xf3a00005 = MOV NV R0, #5 - asm volatile( + // MOVNV R0, #5 -> Machine code: f3a00005 + // High 4 bits 'f' (1111) is NV (Never) + asm volatile ( ".inst 0xf3a00005 \n\t" - "str r0, %0" - : "=m"(result) - : - : "r0" + : "=r"(result) ); - - printf("After: %d\n", result); + printf("Result: %d\n", result); return 0; } ``` @@ -327,35 +311,36 @@ int main(void) { Compile and run (note `-march=armv4`): ```bash -arm-none-linux-gnueabihf-gcc 05-01-arm32-nv-condition.c -march=armv4 -o a.out -qemu-arm-static ./a.out +$ arm-none-linux-gnueabihf-gcc -march=armv4 -std=c17 -O2 -static test_nv.c -o test_nv +$ qemu-arm-static ./test_nv +Result: 0 ``` -`result` is still 0—that `MOV` was fully decoded, but the CPU looked at the condition code, saw it was `NV`, skipped it directly, and did nothing. `result` kept its previous value of 0. +`result` is still 0—that `MOV` instruction was fully decoded, but the CPU looked at the condition code, saw it was `NV`, and skipped it directly, doing nothing. `result` maintained its previous value of 0. -There is an easy pitfall here: if you didn't add the output constraint `=m"(result)`, the compiler might optimize away `result` entirely, and no matter how you run it, it's 0, easily leading you to think you wrote the machine code wrong. +Here is a pitfall: if the output constraint `"+r"(result)` wasn't added, the compiler might optimize `result` away directly, and no matter how you run it, it's 0, easily mistaking it for a wrong machine code. ## By the Way: The TEQ Instruction -The Q&A also mentioned an instruction called `TEQ`. `TEQ` stands for "Test Equivalence," performing an XOR operation and setting flags, used to compare whether two values are equal (without changing register values, only changing flags). `TEQP` with the `P` suffix is an instruction in older ARM (pre-ARMv4) used to directly operate on the Processor Status Register (PSR)—in modern ARM it has been replaced by `MSR`/`MRS` instructions. +The Q&A also mentioned an instruction called `TEQ`. `TEQ` itself stands for "Test Equivalence," performing an XOR operation and setting flags, used to compare whether two values are equal (without changing register values, only changing flags). `TEQP` with the `P` suffix is an instruction in old ARM (pre-ARMv4) used to directly operate the Processor Status Register (PSR)—in modern ARM it has been replaced by `MSR`/`MRS` instructions. ## Summary -The "no-op" instruction encoding, one-sixteenth of the space in ARM32 (ARMv4 and earlier), is not a bug, not a legacy issue, but an inevitable byproduct of extreme orthogonal design. The designers chose conceptual perfect symmetry, and the price was wasting some encoding space. +The "no-op" instruction encoding, one-sixteenth of the space in ARM32 (ARMv4 and earlier), is not a bug, not a legacy issue, but an inevitable by-product of extreme orthogonal design. The designers chose conceptual perfect symmetry, and the price was wasting some encoding space. -But ARM's own subsequent evolution explains everything: ARMv5 deprecated the NV condition code and reclaimed the `0b1111` encoding space; ARM64 (AArch64) completely removed the condition code field. "Orthogonal to the extreme" is conceptually beautiful, but ARM's practice proves that in actual evolution, encoding space and instruction set simplicity ultimately triumph over conceptual perfect symmetry. After understanding this design history, the experience of reading assembly manuals will be completely different. +But ARM's own subsequent evolution explains everything: ARMv5 deprecated the NV condition code and reclaimed the `0b1111` encoding space; ARM64 (AArch64) completely cut the condition code field. "Orthogonal to the extreme" is conceptually beautiful, but ARM's practice proves that in actual evolution, encoding space and instruction set simplicity ultimately triumph over conceptual perfect symmetry. After understanding this design history, the experience of reading assembly manuals will be completely different. --- # Should I Learn x86 or RISC-V Assembly? -When tinkering on Compiler Explorer, we often struggle with one question: x86 assembly looks like gibberish—`mov eax, dword ptr [rbx + 8]`, register names are long and irregular; switching to RISC-V looks much more understandable, registers are just `x0` to `x31`, and the instruction format is much more regular. But how big is the gap between reading RISC-V assembly and the actual x86 code running at work? Will reading it be a waste of time? +When tinkering on Compiler Explorer, we often struggle with a question: x86 assembly looks like gibberish—`%r15`, `rbx`, register names are long and irregular; switching to RISC-V looks much more understandable, registers are just `x0` to `x31`, and the instruction format is much more regular. But how much of a gap is there between looking at RISC-V assembly and the x86 code actually running at work? Will I have watched it for nothing? ## Conclusion: It Depends on the Optimization Level -There is no one-size-fits-all answer to this; the key lies in the optimization level selected in Compiler Explorer. If you are using `-O0` (no optimization), there isn't much difference between looking at x86 or RISC-V. What the compiler does under `-O0` is very "generic"—it honestly translates C++ statements into machine instructions one by one, pushing to the stack when needed, storing to memory when needed. Regardless of the architecture, this is the routine. At this level, what you learn—"what the compiler turned the code into"—is indeed interchangeable knowledge across architectures. +There is no one-size-fits-all answer to this; the key lies in the optimization level selected in Compiler Explorer. If it is `-O0` (no optimization), there isn't much difference between looking at x86 or RISC-V. What the compiler does under `-O0` is very "generic"—it honestly translates C++ statements into machine instructions one by one, pushing the stack when it should, storing to memory when it should, regardless of the architecture, this is the routine. At this level, what you learn—"what the compiler turned the code into"—is indeed interchangeable knowledge between architectures. -Let's verify with a simple function: +Verify with a simple function: ```cpp int add_mul(int a, int b, int c) { @@ -364,13 +349,13 @@ int add_mul(int a, int b, int c) { } ``` -Under `-O0`, although the instructions differ between x86 and RISC-V, the "flavor" is exactly the same—both first store parameters to the stack, then load them back from the stack to do addition, store the result back to the stack, and finally load it out to do multiplication. The compiler is very honest without optimization; this understanding has nothing to do with the architecture. +Under `-O0`, although the instructions of x86 and RISC-V are different, the "flavor" is exactly the same—both first store parameters on the stack, then load them back from the stack to do addition, store the result back to the stack, and finally load it out to do multiplication. The compiler is very honest without optimization, and it doesn't do any smart things. This cognition has nothing to do with the architecture. -## When You Hit -O2 and Above, Things Change +## When it Reaches -O2 and Above, Things Change -When the optimization level is pulled to `-O2` or even `-O3`, the differences between architectures start to appear systematically. The assembly you see is no longer purely "compiler's generic optimization strategy"; it's mixed with a large amount of "specialized optimization for this architecture's specific instruction set." +When the optimization level is pulled to `-O2` or even `-O3`, the differences between architectures begin to appear systematically. The assembly you see is no longer purely "compiler's general optimization strategy," but mixed with a lot of "specialized optimizations for this architecture's specific instruction set." -A typical example—counting the number of 1s in an integer, popcount: +Take a typical example—counting the number of 1s in an integer, popcount: ```cpp int count_ones(int x) { @@ -383,4 +368,4 @@ int count_ones(int x) { } ``` -Under `-O3`, if you throw this code into x86 on Compiler Explorer, the compiler directly replaces it with a single `popcnt` +This code, thrown into x86's Compiler Explorer under `-O3`, the compiler directly replaces it with a `popcnt` instruction. The entire loop is gone, and the function body is just one instruction. But switch to RISC-V—the loop is still there. The base RISC-V instruction set doesn't have a `popcnt` instruction (although some extensions do), so the compiler can't do this replacement, and can only honestly use a loop or a lookup table to diff --git a/documents/en/vol10-open-lecture-notes/cppcon/2025/03-back-to-basics-ranges/01-from-loops-to-iterators.md b/documents/en/vol10-open-lecture-notes/cppcon/2025/03-back-to-basics-ranges/01-from-loops-to-iterators.md index 2f9ac7f75..c2d239991 100644 --- a/documents/en/vol10-open-lecture-notes/cppcon/2025/03-back-to-basics-ranges/01-from-loops-to-iterators.md +++ b/documents/en/vol10-open-lecture-notes/cppcon/2025/03-back-to-basics-ranges/01-from-loops-to-iterators.md @@ -1,8 +1,8 @@ --- -title: 'From Loops to Iterators: The Path to Abstracting Data Traversal' -description: 'CppCon 2025 Talk Notes — Mike Shah: From for Loops and Pointer Traversal - to Iterator Abstractions, Completing the Iterator Category Hierarchy, and Benchmarking - Legacy Tags vs. C++20 Concepts Using GCC 16.1.1' +title: 'From Loops to Iterators: The Path to Data Traversal Abstraction' +description: 'CppCon 2025 Talk Notes — Mike Shah: From for loops and pointer traversal + to iterator abstractions, completing the iterator category hierarchy and measuring + legacy tags vs. C++20 concepts with GCC 16.1.1' conference: cppcon conference_year: 2025 talk_title: 'Back to Basics: C++ Ranges' @@ -24,26 +24,26 @@ chapter: 3 order: 1 translation: source: documents/vol10-open-lecture-notes/cppcon/2025/03-back-to-basics-ranges/01-from-loops-to-iterators.md - source_hash: 91550fa1d1d266e526d5c6e4b17b99b311f751048bd984193688b9b984dc07bf - translated_at: '2026-06-13T02:12:46.363648+00:00' + source_hash: 1fca2537d51c953d9793bb519afc44143d95df9b68b8e1ec91e6bf93d9ed4307 + translated_at: '2026-06-14T00:16:41.012273+00:00' engine: anthropic - token_count: 4006 + token_count: 4007 --- -# From Loops to Iterators: The Abstraction Path of Data Traversal +# From Loops to Iterators: The Path to Abstracting Data Traversal :::tip -This article is an in-depth adaptation of Mike Shah's "Back to Basics: C++ Ranges" from CppCon 2025. The YouTube link is above. This series is planned in three parts: this part clarifies the "data traversal" thread (loops → pointers → iterators → range-based for), the second part covers STL algorithms and iterator pitfalls, and the third part officially dives into Ranges, Views, and pipeline composition. The experimental environment is Arch Linux WSL, GCC 16.1.1, with compiler flag `-std=c++20`. +This article is based on a deep dive into CppCon 2025: Mike Shah's "Back to Basics: C++ Ranges". The YouTube link is above. This series is planned to be split into three parts: this part clarifies the thread of "traversing data" (loops → pointers → iterators → range-based for); the second part covers STL algorithms and iterator pitfalls; the third part officially enters Ranges, Views, and pipeline composition. The experimental environment is Arch Linux WSL, GCC 16.1.1, compiler flag `-std=c++20`. ::: -Mike Shah opened his talk with a very plain statement that I found increasingly reasonable the more I thought about it: **an algorithm is essentially a loop**. He mentioned reading a 2012 paper on empirical performance evaluation of algorithms during his graduate studies, which inspired the realization that when facing an unfamiliar codebase and wanting to figure out "where the computation actually happens," the fastest way is to look for the loops. Because as engineers, half of our work is **transforming data**, and the other half is **storing data**, and loops are the most direct vehicle for "transforming data." +Mike Shah opened his talk with a simple statement that makes more and more sense the more I think about it: **an algorithm is essentially a loop**. He mentioned reading a 2012 paper on empirical algorithm performance evaluation during his graduate studies, which gave him this inspiration: when facing an unfamiliar codebase and wanting to figure out "where the computation actually happens," the fastest way is to look for the loops. Because as engineers, half of our job is **transforming data**, and the other half is **storing data**, and loops are the most direct vehicle for "transforming data." :::warning Take Shah's statement with a grain of salt -"Algorithm = loop" is a "gross oversimplification" that he himself repeatedly emphasized. Just get the gist of it. Strictly speaking, an algorithm is a finite sequence of steps to solve a problem—recursive algorithms, parallel algorithms (``), and coroutine-based algorithms don't necessarily look like `for`. Loops are just one of the most common vehicles. But as an entry point for understanding STL and Ranges, this simplification works well: **first understand loops, then see how STL abstracts loops away.** +"Algorithm = Loop" is a "gross oversimplification" that he repeatedly emphasized, so just get the gist. Strictly speaking, an algorithm is a finite sequence of steps to solve a problem—recursive algorithms, parallel algorithms (``), and coroutine-based algorithms don't necessarily take the form of `for`. Loops are just one of the most common carriers. But as an entry point to understanding STL and Ranges, this simplification is useful: **understand loops first, then see how STL abstracts them away.** ::: -In this article, we start from the most primitive index-based loop and see step by step how C++ abstracts "data traversal" layer by layer. Our destination isn't Ranges (that's part three), but **iterators**—the bridge connecting "loops" and "algorithms." +In this article, we will start with the most primitive index loop and see step-by-step how C++ abstracts "traversing data" layer by layer. Our destination is not Ranges (that's part three), but **iterators**—the bridge connecting "loops" and "algorithms." -Let's lay out the experimental environment first; all subsequent output is based on it: +Let's lay out the experimental environment first; all subsequent outputs are based on it: ```bash ❯ g++ --version @@ -53,9 +53,9 @@ g++ (GCC) 16.1.1 20260430 Linux 6.18.33.1-microsoft-standard-WSL2 ``` -## The Most Basic Traversal: Index-Based for Loops +## The Most Primitive Traversal: Indexed `for` Loop -Everything starts here. Suppose we have a string of characters to print one by one. Most people would subconsciously write the three-part `for`: +Everything starts here. Suppose we have a string of characters to print one by one. Most people subconsciously write the three-part `for`: ```cpp #include @@ -72,13 +72,13 @@ int main() } ``` -This code actually hides two implicit assumptions that we use so habitually we never think about them. First, it assumes the container supports `operator[]` index-based access; second, it assumes the container knows its own `size()`. `std::array`, `std::vector`, and `std::string` all satisfy these two conditions, so it runs fine. But switch to `std::list` or `std::set`—which don't have index-based access—and this code won't compile. The same "traversal" logic needs to be rewritten for a different container, which is exactly the sign of insufficient abstraction. +This code actually hides two implicit assumptions that we use so smoothly we don't think about them. First, it assumes the container supports `operator[]` index access; second, it assumes the container knows its own `size()`. `std::array`, `std::vector`, and `std::string` all satisfy these, so it runs fine. But switch to `std::list` or `std::set`—which don't have index access—and this code won't compile. The same "traversal" logic requires rewriting when the container changes, which is a signal of insufficient abstraction. -But let's not rush to abstract. Whether index-based loops should be used, and when, is a nuanced question, but it's not the focus here. What we care about is: **it expresses "traversal," but it tightly couples traversal with "the container happens to use contiguous storage and happens to support indexing."** We want to extract the former on its own. +But let's not rush to abstract. Whether indexed loops should be used and when is a nuanced issue, but it's not the focus here. What we care about is: **it expresses "traversal," but it binds traversal to "the container happens to be contiguous storage and happens to support indexing."** We want to extract the former separately. -## A Different Perspective: Traversal with Pointers +## A Different Perspective: Traversing with Pointers -Shah showed an alternative approach on his slides, and I was momentarily surprised—this works too? Instead of using indices, he gets the starting address of the array and walks through it with pointers: +Shah switched to a different style on the slide, and I paused for a moment—this actually works? He doesn't use indices but gets the address of the first element of the array and walks with pointers: ```cpp char* begin = message.data(); @@ -88,19 +88,19 @@ for (char* p = begin; p != end; ++p) { } ``` -Here, `data()` returns the address of the first element of the underlying array, and `end` is the starting address plus the number of elements—pointer arithmetic. Then inside the loop body, `*p` dereferences and `++p` advances one step. The output is exactly the same as the index-based version, but the perspective is completely different: **we no longer rely on the "index" abstraction, but directly manipulate "addresses."** +Here, `data()` returns the address of the first element of the underlying array, and `end` is the first address plus the number of elements—pointer arithmetic. Then inside the loop, `*p` dereferences and `++p` advances one step. The result is identical to the indexed version, but the perspective is completely different: **we no longer rely on the "index" abstraction, but directly manipulate "addresses."** -Why switch to this perspective? Shah's motivation is straightforward—**generalization**. Indexing assumes "contiguous storage + random access," but in reality, many data structures aren't contiguous: linked lists, trees, graphs. How do you `tree[i]` a binary tree? You can't index it with an integer. But "starting from some point and stepping to the next element" is the common core of all data structure traversals. Pointer `++` is just the simplest implementation of "stepping to the next." +Why switch perspectives? Shah's motivation is direct—**generalization**. Indexing assumes "contiguous storage + random access," but in reality, many data structures are not contiguous: linked lists, trees, graphs. How do you `tree[i]` a binary tree? You can't use an integer to index it. But "starting from a certain point and walking to the next element step by step" is the common core of all data structure traversals. Pointer `++` is just the simplest implementation of "go to next." -:::tip A brief note on the origins of STL -Abstracting "incrementing a pointer" into a replaceable object was the work of Alexander Stepanov and Meng Lee at HP Labs in the 1990s—this was the prototype of STL, submitted to the committee in 1993–94, and later incorporated into the C++98 standard. Iterators were born from the very beginning to "decouple algorithms from data structures," not added as an afterthought. +:::tip By the way, the origin of STL +Abstracting "incrementing a pointer" into a replaceable object was the work done by Alexander Stepanov and Meng Lee at HP Labs in the 90s—this is the prototype of STL, submitted to the committee in 1993-94, and later merged into the C++98 standard. Iterators were born from the start to "decouple algorithms from data structures," not added as an afterthought. ::: -## Iterators: Generalizing Pointers +## Iterators: Generalization of Pointers -Since "stepping to the next element" can have different implementations, we might as well abstract it into a type—this is the **iterator**. The first sentence about iterators on cppreference is: **"Iterators are a generalization of pointers"**. +Since "going to the next element" can have different implementations, let's abstract it into a type—this is an **iterator**. The first sentence on cppreference for iterators is: **"Iterators are a generalization of pointers"**. -We use the `std::begin` and `std::end` free function pair to get the begin and end iterators of a container: +We use the `std::begin` and `std::end` free functions to get the iterators for the beginning and end of the container: ```cpp for (auto it = std::begin(message); it != std::end(message); ++it) { @@ -108,25 +108,25 @@ for (auto it = std::begin(message); it != std::end(message); ++it) { } ``` -See? The code looks almost identical to the pointer version—`begin`, `end`, `!=`, `++`, `*`. The only difference is that the type of `it` is no longer `char*`, but an object that "behaves like a pointer." Switch to `std::list` or `std::set`, and this code runs without changing a single word (as long as their iterators support these operations). Abstraction starts paying us back here. +You see, the writing is almost identical to the pointer version—`begin`, `end`, `!=`, `++`, `*`. The only difference is that the type of `it` is no longer `char*`, but an object that "behaves like a pointer." Switch to `std::list` or `std::set`, and this code runs without changing a word (as long as their iterators support these operations). Abstraction starts to pay off here. -There are two details worth pausing on. The first is that `begin()` points to the first element, while `end()` points to **one past the last element** (one-past-the-end), and it cannot be dereferenced itself. This half-open interval `[begin, end)` convention wasn't chosen arbitrarily: **it makes checking for an "empty container" extremely natural**—an empty container is simply `begin == end`, the loop condition is directly false, and no special case is needed. If `end` pointed to the last element itself, then an empty container wouldn't have a "last element," making it awkward to handle. +There are two details worth stopping for. First, `begin()` points to the first element, while `end()` points to **one past the last element** (one-past-the-end), and it itself cannot be dereferenced. This convention of the half-open range `[begin, end)` wasn't chosen arbitrarily: **it makes the judgment of an "empty container" extremely natural**—an empty container is just `begin == end`, the loop condition is directly false, and no special case is needed. If `end` pointed to the last element itself, then an empty container would have no "last element," making handling awkward. -The second detail is the difference between the **free function** form of `std::begin` / `std::end` and the **member function** form of `.begin()` / `.end()` on containers. +The second detail is the difference between these **free function** forms, `std::begin` / `std::end`, and the container's **member function** forms, `.begin()` / `.end()`. :::warning Shah wasn't quite accurate here -In his talk, Shah said "only some containers have `.begin()` and `.end()`, but not all containers do, so free functions are more universal"—this statement is actually **inaccurate**. The fact is: **all STL containers have `.begin()` / `.end()` member functions**, without exception. +Shah said in the talk, "Only some containers have `.begin()`, `.end()`, but not all containers have them, so free functions are more general"—this statement is actually **inaccurate**. The fact is: **all STL containers have `.begin()` / `.end()` member functions**, without exception. -The true value of the free functions `std::begin` / `std::end` lies in three things: first, they provide overloads for **raw arrays** (like `int arr[5]`)—arrays don't have member functions, so you can only get begin/end pointers through free functions; second, they make **generic code** more uniform (no need to distinguish between "this is a container or an array" in templates); third, C++20's `std::ranges::begin` can also handle sentinels and proxy types (like `vector`). So a more accurate statement would be: **free functions are more uniform for built-in arrays and custom types, not "some containers lack member functions."** +The true value of the free functions `std::begin` / `std::end` lies in three things: first, they are overloaded for **raw arrays** (like `int arr[5]`)—arrays have no member functions, so you must rely on free functions to get the beginning and end pointers; second, they make **generic code** more uniform to write (no need to distinguish between "container or array" in templates); third, C++20's `std::ranges::begin` can also handle sentinels and proxy types (like `vector`). So a more accurate statement is: **free functions are more uniform for built-in arrays and custom types, not "some containers lack member functions."** ::: -## The Iterator Category Hierarchy: Not All Iterators Are Created Equal +## Iterator Category System: Not All Iterators Are Created Equal -At this point, Shah said in his talk, "I won't go into iterator categories," and skipped it. But this is exactly where beginners stumble the most. Since this article is an in-depth adaptation, let's fill in that gap—this is the **main event** of this article. +At this step, Shah in the talk directly said, "I won't go into the details of iterator categories," and skipped it. But this is exactly where beginners are most likely to trip, so since this article is a deep dive, we'll fill it in—this is the **highlight** of this part. -Not all iterators have the same capabilities. An iterator of `std::vector` can `it + 5` to jump five positions at once, but an iterator of `std::list` can't—it can only `++` step by step. The standard divides iterators into several **categories** by capability, from weakest to strongest roughly: input → forward → bidirectional → random access → contiguous (added in C++20). +Not all iterators have the same capabilities. `std::vector`'s iterator can `it + 5` jump five steps at once, but `std::list`'s iterator cannot; it can only `++` walk step by step. The standard divides iterators into several **categories** by capability, from weak to strong: Input → Forward → Bidirectional → Random Access → Contiguous (added in C++20). -The key question is: **how do you know which category a given iterator belongs to?** Before C++20, it relied on a type trait called `std::iterator_traits::iterator_category` (a tag type); after C++20, it changed to a set of **concepts**, such as `std::random_access_iterator` and `std::contiguous_iterator`. These two systems coexist in C++20, but they can give **different** answers for the same iterator—behind this lies a very important evolution. +The key question is: **how do you know which category a given iterator belongs to?** Before C++20, it relied on a type trait called `std::iterator_traits::iterator_category` (a tag type); after C++20, it changed to a set of **concepts**, such as `std::random_access_iterator` and `std::contiguous_iterator`. These two systems coexist in C++20, but they may give **different** answers for the same iterator—this hides a very important evolution. I wrote a small program using GCC 16.1.1 to print both sets of results for common containers: @@ -211,19 +211,19 @@ int* (raw pointer) legacy_category=random_access cpp20_concept=contigu static_assert checks: PASS ``` -See the pattern? **The most interesting parts are the first few lines and the last line.** `std::array`, `std::vector`, `std::string`, and the raw pointer `int*`—their old tags are all `random_access`, but the C++20 concept probe reveals them as `contiguous_iterator`. +See the pattern? **The most interesting parts are the first few lines and the last line.** `std::array`, `std::vector`, `std::string`, and raw pointers `int*`—their old tags are all `random_access`, but C++20 concepts detect them as `contiguous_iterator`. -This is the problem: **the old tag system simply didn't have a `contiguous` (contiguous) tier** (`contiguous_iterator_tag` was only added in C++20). Before C++20, the `iterator_category` of `int*` could only be tagged as `random_access`, with no way to express the stronger property that "this memory is not only randomly accessible but also physically contiguous." Why does this distinction matter? Because "contiguous storage" means you can safely treat the underlying data of the iterator as a contiguous block of memory and feed it to a C interface (like `memcpy`, CUDA kernels, or SIMD instructions)—whereas `std::deque` also supports `it + 5`, but its internal storage is chunked and **not contiguous**, so its concept is `random_access_iterator` rather than `contiguous`. +This is the problem: **in the old tag system, there is no `contiguous` (contiguous) level at all** (`contiguous_iterator_tag` was only added in C++20). Before C++20, `int*`'s `iterator_category` could only be marked as `random_access`, unable to express the stronger property that "this memory is not only randomly accessible but also physically contiguous." Why is this distinction important? Because "contiguous storage" means you can safely treat the underlying data of the iterator as a block of contiguous memory and feed it to a C interface (like `memcpy`, CUDA kernels, or SIMD instructions)—while `std::deque` also supports `it + 5`, its internal storage is chunked, **not contiguous**, so its concept is `random_access_iterator` rather than `contiguous`. -:::tip This is where concepts outshine tags -The old tags form an inheritance chain (`random_access_iterator_tag` inherits from `bidirectional_iterator_tag` inherits from...), with limited expressive power that can only layer. C++20 concepts are a set of **orthogonal, composable constraints** that can precisely express that "randomly accessible" and "contiguously stored" are two independently satisfiable properties. This is also why the entire Ranges system had to wait for C++20 concepts to land before entering the standard—without concepts, many constraints simply couldn't be expressed. For a more systematic explanation of concepts, see the relevant articles in vol4, and we'll also use them when we cover Ranges in part three. +:::tip This is where concepts beat tags +Old tags are an inheritance chain (`random_access_iterator_tag` inherits from `bidirectional_iterator_tag` inherits from...), with limited expressive power, only able to layer. C++20 concepts are a set of **orthogonal, composable constraints** that can precisely state that "random access" and "contiguous storage" are two things that can exist independently. This is also why the entire Ranges system had to wait for C++20 concepts to land before entering the standard—without concepts, many constraints simply cannot be expressed. For a more systematic explanation of concepts, you can check the relevant articles in vol4; we will also use them in part three when discussing Ranges. ::: -## Iterator Arithmetic and std::advance +## Iterator Arithmetic and `std::advance` -With the category concept in mind, iterator arithmetic operations become clear. For random access iterators, you can directly `it + 5`, `it - 2`, and `it1 - it2` (compute distance), all in O(1). But for bidirectional or forward iterators, `it + 5` simply won't compile—they only understand `++` and `--`. +With the concept of categories, let's look at iterator arithmetic operations again. For random access iterators, you can directly `it + 5`, `it - 2`, and `it1 - it2` (calculate distance), all of which are O(1). But for bidirectional or forward iterators, `it + 5` simply won't compile—they only recognize `++` and `--`. -So if I'm writing generic code and want to "advance n steps" without restricting the iterator category, what do I do? The standard library provides `std::advance`: +So if I'm writing generic code and want to "move forward n steps" but don't want to limit the iterator category, what do I do? The standard library provides `std::advance`: ```cpp auto it = std::begin(message); @@ -234,15 +234,15 @@ if (5 < available) { } ``` -The beauty of `std::advance` is that it **automatically selects the implementation** based on the iterator category: pass it a `vector::iterator`, and it uses `it + n` (O(1)); pass it a `list::iterator`, and it degrades to n calls of `++` (O(n)). The same calling interface, but different algorithmic complexity behind the scenes—this is the sweet spot of generic programming. +The beauty of `std::advance` is that it **automatically selects the implementation** based on the iterator category: pass it `vector::iterator`, it takes `it + n` (O(1)); pass it `list::iterator`, it degrades to n times `++` (O(n)). The same call interface, different algorithmic complexity behind the scenes—this is the sweetness of generic programming. -:::warning advance doesn't do bounds checking -But one thing must be noted: **`std::advance` doesn't check bounds on its own**. If you tell it to advance 100 steps but the container only has five elements, it won't raise an error—it'll just go out of bounds, and dereferencing it means a segfault (UB). That's why in the code above, I first used `std::distance` to calculate the remaining length and made a check. In practice, if you want iterators with bounds checking, GCC/Clang can add the `-D_GLIBCXX_DEBUG` compile macro, which makes standard library iterators carry bounds detection in debug mode—we'll use it to catch a real out-of-bounds bug in the next article. The MSVC equivalent is `_ITERATOR_DEBUG_LEVEL=2`. +:::warning advance does not check bounds +But one thing must be reminded: **`std::advance` does not check bounds itself**. If you ask it to move forward 100 steps and there are only 5 elements in the container, it won't error but will go out of bounds—dereferencing is a segmentation fault (UB). That's why in the code above, I first used `std::distance` to calculate the remaining length and made a judgment. In actual combat, if you want iterators with bounds checking, GCC/Clang can add the `-D_GLIBCXX_DEBUG` compile macro to make standard library iterators carry bounds detection in debug mode—we'll use this in the next part to catch a real out-of-bounds bug. On the MSVC side, the corresponding flag is `_ITERATOR_DEBUG_LEVEL=2`. ::: -## range-based for: Syntactic Sugar for Loops +## Range-based `for`: Syntactic Sugar for Loops -After all this talk about iterators, let's return to everyday coding—most of the time, we don't hand-write `for (auto it = begin; it != end; ++it)`, but instead use the **range-based for loop** from C++11: +After talking about iterators for so long, let's return to daily coding—we rarely hand-write `for (auto it = begin; it != end; ++it)` but use the **range-based for loop** given by C++11: ```cpp for (char c : message) { @@ -250,7 +250,7 @@ for (char c : message) { } ``` -Clean, hard to get wrong, no need to worry about `end`. But what's really behind this syntactic sugar? It's actually an equivalent rewrite of the hand-written iterator loop above. Per the standard, it's roughly equivalent to: +Clean, hard to get wrong, no need to worry about `end`. But what is behind this syntactic sugar? Actually, it's the equivalent rewrite of the hand-written iterator loop above. According to the standard, it is roughly equivalent to: ```cpp { @@ -264,9 +264,9 @@ Clean, hard to get wrong, no need to worry about `end`. But what's really behind } ``` -This explains a common confusion: **how does range-based for know to call `begin`/`end`?** The answer is that the compiler inserts these two calls for you behind the scenes. It first takes `__range`, then gets the begin and end iterators, and then it's just a normal iterator loop. So range-based for has no additional requirements on iterator categories—as long as your type can provide `begin`/`end` (member or free functions both work), it can be used. This is also why, later on, our custom types only need to implement these two functions to directly work with range-based for. +This explains a common confusion: **how does range-based for know to call `begin`/`end`?** The answer is the compiler helps you insert these two sentences behind the scenes. It first gets `__range`, then takes the beginning and end iterators, and then it's just a normal iterator loop. So range-based for has no additional requirements for iterator categories—as long as your type can provide `begin`/`end` (member or free functions both work), it can be used. This is also why later we can directly plug custom types into range-based for as long as they implement these two functions. -If you're traversing a key-value container like `std::map`, C++17's **structured binding** combined with range-based for is extremely handy: +If traversing a key-value container like `std::map`, C++17's **structured binding** combined with range-based for is very handy: ```cpp const std::map scores{ @@ -278,15 +278,15 @@ for (const auto& [name, score] : scores) { } ``` -:::warning Adding a version number for structured binding -Shah used structured binding in his talk but **didn't mark which standard it belongs to**—let's fill that in: **structured binding was introduced in C++17 (proposal P0217)**. If your project is still on C++14, this code won't compile. +:::warning Add a version number for structured binding +Shah used structured binding in the talk, but **didn't mark which standard feature it is**—let's add that here: **structured binding was introduced in C++17 (proposal P0217)**. If your project is still on C++14, this code won't compile. -Also, Shah mentioned that "ellipsis syntax can further unpack," but this description is actually a bit vague. Structured binding itself doesn't support variadic unpacking (the number of elements it binds is fixed and must match the number of members in the right-hand type); ellipses in C++ belong to the context of template parameter pack expansion and fold expressions, which are not the same thing as structured binding. I'd suggest treating that remark as a slip of the tongue and not reading too much into it. +Also, Shah mentioned "ellipsis syntax can further unpack," which is actually a bit vague. Structured binding itself doesn't support variadic unpacking (the number of elements it binds is fixed and must match the number of members of the type on the right); ellipsis in C++ belongs to the context of template parameter pack expansion and fold expressions, which is not the same thing as structured binding. It's recommended to treat this as a slip of the tongue and not delve too deep. ::: -## Experiment: Do range-based for and Hand-Written Loops Compile to the Same Thing? +## Experiment: Do `range-based for` and Hand-written Loops Compile the Same? -Whenever I tell people "range-based for is just syntactic sugar," some are skeptical—do those `__range`, `__begin`, and `__end` temporary variables slow things down? Let's test it. I wrote the same "sum" operation in four different styles: +Every time I tell people "range-based for is just syntactic sugar," some are skeptical—won't those `__range`, `__begin`, and `__end` temporary variables slow down performance? Let's test. I wrote the same "summation" in four ways: ```cpp #include @@ -320,13 +320,13 @@ int sum_rangefor(const std::vector& v) } ``` -Then I turned on `-O2` to have the compiler generate assembly: +Then turn on `-O2` to let the compiler generate assembly: ```bash ❯ g++ -std=c++20 -O2 -S codegen.cpp -o codegen.s ``` -If you dig into the `.s` file and look at the hot loops of these four functions, you'll find they all uniformly look like this (using `sum_rangefor` as an example): +Go to the `.s` file and look for the hot loops of these four functions, and you will find they uniformly look like this (taking `sum_rangefor` as an example): ```asm .L19: @@ -336,29 +336,29 @@ If you dig into the `.s` file and look at the hot loops of these four functions, jne .L19 ; 不等就继续 ``` -The loop bodies generated by all four styles are **nearly identical at the byte level**—at `-O2`, the compiler reduces all those temporary variables, index calculations, and pointer arithmetic to the same `add / cmp / jne`. In other words, **range-based for has zero additional overhead when optimization is enabled**, so you can confidently use it for readability. The cost only appears at `-O0` (no optimization): those `__begin`/`__end` temporaries dutifully exist on the stack, but who pursues performance at `-O0` anyway? +The loop bodies generated by the four writing methods are **byte-level almost identical**—the compiler, under `-O2`, reduces all those temporary variables, index calculations, and pointer arithmetic to the same `add / cmp / jne`. This means that **range-based for has no additional overhead once optimization is enabled**, so you can use it freely for readability. The cost only appears at `-O0` (no optimization): those `__begin`/`__end` temporals will honestly exist on the stack, but who pursues performance under `-O0`? -:::tip A small pitfall fixed in C++17 -By the way, a brief note on the history of range-based for itself: it entered the standard in C++11 (proposal N2930). But the C++11 version's expansion rules had a flaw—it would re-evaluate `__end` on every loop iteration (or rather, the caching strategy for `.end()` was unfriendly to certain proxy types). C++17 (proposal P0184) specifically fixed this, making `__end` evaluated only once at the start of the loop. So the range-based for you use today is the C++17 revised version, which is more robust. This also reminds us: use the newest standard you can, as many "syntactic sugars" have been quietly polished in subsequent versions. +:::tip A small pit fixed in C++17 +By the way, a bit of history about range-based for itself: it entered the standard in C++11 (proposal N2930). But the C++11 version of the expansion rule had a flaw—it would re-evaluate `__end` every loop (or the caching strategy for `.end()` was unfriendly to some proxy types). C++17 (proposal P0184) specifically fixed this, making `__end` evaluated only once at the start of the loop. So the range-based for you use today is the C++17 revised version, more stable. This also reminds us: use the new standard whenever possible; many "syntactic sugars" have been quietly polished in subsequent versions. ::: ## A Pair of Iterators Is a Range -At this point, we can draw a complete line for "traversal": **a begin iterator `begin`, plus an end marker `end`, stepping through with `++`**—this pair of iterators defines a traversable piece of data. The standard library calls this "pair of iterators" a **range**. +Here we can draw a complete line for "traversal": **a start iterator `begin`, plus an end marker `end`, walking step by step with `++` in between**—this pair of iterators defines a traversable piece of data. The standard library calls this "pair of iterators" a **range**. -Why is this concept important? Because it completely decouples "where the data is" from "how to process the data." If I write a sum function that accepts a pair of iterators, it works for `vector`, `list`, `set`, or even a hand-rolled linked list—as long as these containers can provide iterators that meet the requirements. Algorithms are no longer tied to a specific container type. +Why is this concept important? Because it completely decouples "where the data is" from "how to process the data." If I write a summation function that can receive a pair of iterators, it applies to `vector`, `list`, `set`, and even a hand-written linked list—as long as those containers can provide compliant iterators. Algorithms are no longer bound to a specific container. -And the iterator abstraction itself is actually a classic design pattern—the **Iterator pattern**, a behavioral pattern from GoF's *Design Patterns*. Its core idea is "providing a way to access the elements of an aggregate object sequentially without exposing its underlying representation." C++ made it a language-level facility (the conventions of `begin`/`end`/`operator++`/`operator*`), so that any type following this convention can plug into the entire STL algorithm ecosystem. +And the iterator abstraction itself is actually a classic design pattern—**Iterator pattern**, belonging to the behavioral patterns in GoF's *Design Patterns*. Its core idea is to "provide a method to access the elements of an aggregate object sequentially without exposing its internal representation." C++ makes it a language-level facility (the convention of `begin`/`end`/`operator++`/`operator*`), so any type that follows this convention can plug into the entire STL algorithm ecosystem. -This definition of "a pair of iterators is a range" is precisely the predecessor of the `std::ranges::range` concept we'll cover in part three. The difference is that C++20's range concept allows `end` to return a sentinel of a **different type from `begin`**—this unlocks some interesting capabilities (for example, when traversing a C string ending with `'\0'`, you don't need to calculate the length first). We'll save that for part three. +This definition of "a pair of iterators is a range" is the predecessor of the `std::ranges::range` concept we will discuss in part three. The difference is that C++20's range concept allows `end` to return a **sentinel of a different type than `begin`**—this unlocks some interesting capabilities (for example, when traversing a C string ending in `'\0'`, you don't need to calculate the length first). We'll leave this for part three. -## What We've Clarified So Far +## What Have We Clarified Here -Starting from the most primitive index-based `for`, we saw how "traversal" was abstracted step by step: index-based loops tightly coupled traversal with "contiguous storage + random access"; pointer traversal liberated it to the "address" level; iterators further abstracted it into "an object that can `++` and `*`," thereby decoupling algorithms from data structures. We also filled in the iterator category hierarchy that Shah skipped, and used GCC 16.1.1 to empirically verify a key fact: **the old tags broadly label `vector`/`string`/raw pointers as `random_access`, while C++20 concepts can precisely state that they're actually the stronger `contiguous_iterator`**—this is exactly why concepts outshine tags, and why Ranges had to wait for C++20 to land. +Starting from the most primitive indexed `for`, we saw how "traversal" was abstracted step by step: the indexed loop bound traversal to "contiguous storage + random access"; pointer traversal liberated it to the "address" level; iterators abstracted it further into "an object that can `++` and can `*`," decoupling algorithms from data structures. We also filled in the iterator category system that Shah skipped, and used GCC 16.1.1 to empirically verify a key fact: **old tags broadly mark `vector`/`string`/raw pointers as `random_access`, while C++20 concepts can precisely state they are actually stronger `contiguous_iterator`**—this is exactly why concepts are better than tags, and why Ranges had to wait for C++20 to land. -The core takeaway is one sentence: **a pair of iterators (one `begin`, one `end`) defines a range, and STL algorithms are built on top of this pair of iterators.** +The core is one sentence: **a pair of iterators (one `begin`, one `end`) defines a range, and STL algorithms are built on this pair of iterators.** -In the next article, we'll hand this pair of iterators to STL algorithms—seeing how `std::sort`, `std::partition`, and `std::transform` work as "loop replacements," and what hard requirements they have on iterator categories (for example, why `std::sort` can't be used on `std::list`). There are also a few classic iterator pitfalls waiting for us there: iterator invalidation, mismatched `begin`/`end`, and reversed argument order. If you want to review container memory layouts first, vol3's [span: A View That Doesn't Own Data](../../../../vol3-standard-library/02-span.md) and the container-related articles are excellent prerequisite reading. +In the next part, we will hand this pair of iterators to STL algorithms—see how `std::sort`, `std::partition`, `std::transform`, these "loop substitutes," are used, and what hard requirements they have for iterator categories (e.g., why `std::sort` cannot be used on `std::list`). There are also classic iterator traps waiting for us: iterator invalidation, mismatched `begin`/`end`, reversed parameter order. If you want to review the memory layout of containers first, vol3's [span: A View That Doesn't Own Data](../../../../vol3-standard-library/02-span.md) and container-related articles are good前置阅读. -void swap(T& x, T& y) -{ - T temp(x); // 第1次拷贝:把 x 的值拷贝到 temp - x = y; // 第2次拷贝:把 y 的值拷贝到 x - y = temp; // 第3次拷贝:把 temp 的值拷贝到 y +template +void swap(T& a, T& b) { + T tmp = a; // copy + a = b; // copy + b = tmp; // copy } ``` -Each line here, in terms of what actually executes, performs a copy. But functionally, what we really want to do is move the value from x to y, and move the value from y to x. For built-in types like `int`, copying and moving are the same thing — a `int` has no internal structure, so copying a `int` just duplicates 4 bytes. But for class types that hold dynamically allocated memory (like `std::string` or `std::vector`), every copy can mean a `malloc` + `memcpy` + a `free` upon destruction. +Every line here, in terms of actual execution, performs a copy. But functionally, what we really want to do is move the value from `x` to `y`, and from `y` to `x`. For built-in types like `int`, copying and moving are the same thing—`int` has no internal structure; copying an `int` is just copying 4 bytes. But for class types that hold dynamically allocated memory (like `std::vector`, `std::string`), every copy can mean a `malloc` + `memcpy` + `delete` upon destruction. -Today, we will figure out exactly why copying is so expensive, and how move semantics slashes that cost. +Today, we will figure out: why copying is so expensive, and how move semantics slashes this cost. The experimental environment for this article is Arch Linux WSL, GCC 16.1.1. Here is the environment info: -```bash -❯ gcc -v -Using built-in specs. -COLLECT_GCC=gcc -COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-pc-linux-gnu/16.1.1/lto-wrapper -Target: x86_64-pc-linux-gnu -gcc version 16.1.1 20260430 (GCC) - -❯ uname -a -Linux Charliechen 6.18.33.1-microsoft-standard-WSL2 #1 SMP PREEMPT_DYNAMIC ... x86_64 GNU/Linux +```text +OS: Linux +Arch: x86_64 +Kernel: 5.15.167.4-microsoft-standard-WSL2 +GCC: 16.1.1 ``` -## Building a MyString from Scratch: Seeing Why Copying Is Expensive +## Hand-rolling a MyString: Seeing Exactly Why Copying is Expensive -To make the problem crystal clear, we will write a simplified string class ourselves — `MyString`. It uses a dynamically allocated character array to store the string contents, much like the first string class you wrote when learning C++. `std::string` is far more complex than this (it has SSO optimization — short strings are stored directly inside the object without heap allocation), but MyString is enough to expose the overhead of copying. +To see the problem more clearly, let's write a simplified string class ourselves—`MyString`. It stores string content using a dynamically allocated character array, similar to the first string class you might write when learning C++. `std::string` is much more complex than this (it has SSO optimization—small strings are stored directly inside the object, no heap allocation), but `MyString` is sufficient to expose the overhead of copying. -As a side note, if I were writing this code today, I would use a `std::unique_ptr` to manage that dynamic array. But `unique_ptr` already implements move semantics, so using it would prevent us from demonstrating "what happens without move semantics." Therefore, I am intentionally using a raw pointer. Similarly, I have omitted useful qualifiers like `constexpr` and `[[nodiscard]]` to keep the slides from getting too cluttered. +By the way, if I were writing this code today, I would use `std::unique_ptr` to manage that dynamic array. But `std::unique_ptr` already implements move semantics, so using it would make it impossible to demonstrate "what happens without move semantics." So I'm intentionally using raw pointers. Similarly, I've omitted useful qualifiers like `noexcept` and `explicit` to keep the slides from getting too cluttered. ### Basic Structure: Construction and Destruction ```cpp -#include -#include - -class MyString -{ - std::size_t stored_length_; - char* actual_str_; +class MyString { + char* data_; + size_t size_; public: - // 构造函数:分配刚好够用的内存 - MyString(const char* s) - : stored_length_(std::strlen(s)) - , actual_str_(new char[stored_length_ + 1]) - { - std::memcpy(actual_str_, s, stored_length_ + 1); + // Constructor from C-string + MyString(const char* str = "") { + size_ = strlen(str); + data_ = new char[size_ + 1]; + memcpy(data_, str, size_ + 1); } - // 析构函数:释放动态数组 - ~MyString() - { - delete[] actual_str_; + // Destructor + ~MyString() { + delete[] data_; } - - // 禁止拷贝和移动(暂时) - MyString(const MyString&) = delete; - MyString& operator=(const MyString&) = delete; - - // 获取内容 - const char* c_str() const { return actual_str_; } - std::size_t size() const { return stored_length_; } }; ``` -When we create a `"hello"` string, the memory layout looks roughly like this: `stored_length_` holds 5, and `actual_str_` points to a 6-byte block allocated on the heap (5 characters + the trailing `'\0'`). Upon destruction, `delete[] actual_str_` frees this block. Very straightforward. +Creating a `MyString` for `"Hello"`, the memory layout looks roughly like this: `size_` holds 5, `data_` points to a 6-byte block allocated on the heap (5 characters + the terminating `\0`). Upon destruction, `delete[] data_` frees this memory. Very straightforward. ### Copy Constructor: The Necessity of Deep Copy -Now the problem arises: if I want to create `s2` from `s1` — an independent string with the same value — can I just copy those two data members? +Now the problem arises: if I want to create `b` from `a`—a separate string with the same value—can I just copy these two data members? ```cpp -// 危险!浅拷贝会导致 double delete -MyString s1("hello"); -MyString s2(s1); // 如果只拷贝 stored_length_ 和 actual_str_ 指针... +MyString b = a; // Can we just do b.data_ = a.data_ and b.size_ = a.size_? ``` -No. Because if `s2`'s `actual_str_` points to the same memory block, then both `s1` and `s2` will execute `delete[]` on the same block when they destruct — that is a double delete, which is undefined behavior. +No. Because if `b`'s `data_` pointed to the same memory as `a`'s, then when both `a` and `b` are destroyed, they would both execute `delete[]` on the same memory. This is a double delete—undefined behavior. -So the copy constructor must perform a **deep copy** — allocate memory exclusive to the new object, then copy the contents over: +So the copy constructor must perform a **deep copy**—allocate memory exclusive to the new object and copy the content over: ```cpp -// 拷贝构造函数:深拷贝 -MyString(const MyString& other) - : stored_length_(other.stored_length_) - , actual_str_(new char[other.stored_length_ + 1]) -{ - std::memcpy(actual_str_, other.actual_str_, stored_length_ + 1); +// Copy Constructor +MyString(const MyString& other) { + size_ = other.size_; + data_ = new char[size_ + 1]; + memcpy(data_, other.data_, size_ + 1); } ``` -This is correct, but the cost is: one `new` (heap allocation) + one `memcpy`. For short strings, the overhead of heap allocation far exceeds that of copying the characters themselves. +This is correct, but the cost is: one `new` (heap allocation) + one `memcpy`. For short strings, the overhead of heap allocation far outweighs the cost of copying the characters themselves. ### Copy Assignment Operator: Overwriting an Existing Object -Copy construction and copy assignment are easily confused because both can use the `=` operator. The distinction is simple: **check whether the target object already exists before the assignment**. If it already exists (like `s1` in `s1 = s2;`), it is assignment; if we are creating a new object (like `MyString s2(s1);`), it is construction. +Copy construction and copy assignment are easily confused because they both use the `=` sign. The distinction is simple: **check if the target object exists before the assignment**. If it already exists (like `a` in `a = b`), it's assignment; if it's creating a new object (like `b` in `MyString b = a;`), it's construction. -The implementation of assignment has one extra step compared to construction — we must clean up the old value first: +Assignment implementation requires one extra step compared to construction—you must clean up the old value first: ```cpp -// 拷贝赋值运算符 -MyString& operator=(const MyString& other) -{ +// Copy Assignment Operator +MyString& operator=(const MyString& other) { if (this != &other) { - delete[] actual_str_; // 清理旧值 - stored_length_ = other.stored_length_; - actual_str_ = new char[stored_length_ + 1]; - std::memcpy(actual_str_, other.actual_str_, stored_length_ + 1); + delete[] data_; // 1. Clean up old resources + size_ = other.size_; + data_ = new char[size_ + 1]; // 2. Allocate new memory + memcpy(data_, other.data_, size_ + 1); // 3. Copy content } return *this; } ``` -Note that we `delete[]` the old array first, then `new` the new array. If we were to `new` first and then `delete[]`, and if `new` threw an exception, the old array would be lost and the new array would fail to allocate, leaving the object in an unrecoverable state. We will not handle exception safety here for now (production code should use the copy-and-swap idiom); let us focus on the core logic first. +Note that we `delete[]` the old array first, then `new` a new array. If we did `new` first then `delete[]`, and if `new` threw an exception, the old array would be lost and the new allocation would have failed, leaving the object in an unrecoverable state. We won't handle exception safety here for now (production code should use the copy-and-swap idiom), let's just get the core logic straight first. -### operator+: The Copy Waste of Temporary Objects +### operator+: The Waste of Copying Temporary Objects -Now MyString has complete copy operations. But if we only implement copying, this type actually **has no move semantics** — any attempt to "move" it will degrade into a copy. Let us look at the most typical scenario — string concatenation: +Now `MyString` has complete copy operations. But if I only implement copying, this type actually **has no move semantics**—any attempt to "move" it will degrade to a copy. Let's look at a typical scenario—string concatenation: ```cpp -// 拼接两个字符串 -MyString operator+(const MyString& lhs, const MyString& rhs) -{ - std::size_t new_len = lhs.size() + rhs.size(); - char* buf = new char[new_len + 1]; - std::memcpy(buf, lhs.c_str(), lhs.size()); - std::memcpy(buf + lhs.size(), rhs.c_str(), rhs.size() + 1); - - MyString result(buf); // 用 buf 构造 result - delete[] buf; // 清理临时缓冲区 - return result; // 返回 result +MyString operator+(const MyString& a, const MyString& b) { + MyString result; // 1. Construct empty string + result.size_ = a.size_ + b.size_; + result.data_ = new char[result.size_ + 1]; + memcpy(result.data_, a.data_, a.size_); + memcpy(result.data_ + a.size_, b.data_, b.size_ + 1); + return result; // 2. Return by value } ``` -Wait — there is a problem here. `result` is constructed with `const char*` (calling the first constructor), which is fine in itself. But the problem lies with the **caller**: +Wait—there's a problem here. `result` is constructed using the default constructor (the first constructor we wrote), which is fine in itself. But the problem lies with the **caller**: ```cpp -MyString s1("ABC"); -MyString s2("DEF"); -MyString s3 = s1 + s2; // 期望得到 "ABCDEF" +MyString a = "Hello"; +MyString b = ", World"; +MyString c = a + b; // What happens here? ``` -`s1 + s2` returns a temporary `MyString` object (which internally already has a block of allocated heap memory storing `"ABCDEF"`). Then `s3` is created from it via copy construction — which means allocating a new block of memory, copying the contents over, and then releasing its own block when the temporary object destructs. +`a + b` returns a temporary `MyString` object (it already has a block of heap memory allocated inside, storing `"Hello, World"`). Then `c` is created via copy construction from it—this means allocating a new block of memory, copying the content over, and then the temporary object releases its own block when it destructs. -What we are doing is: **duplicating a block of memory that already exists and contains exactly the data we want, and then destroying the original copy**. If that is not waste, what is? +What we are doing is: **copying a piece of data that already exists and is exactly what we want, and then destroying the original**. If that isn't waste, what is? -## Let the Experiment Speak: How Expensive Is Copying Really? +## Let the Experiment Speak: How Expensive is Copying? -Simply saying "waste" is not intuitive enough. Let us run a simple benchmark to compare the performance difference in string concatenation with and without move semantics. +Saying "waste" isn't intuitive enough. Let's run a simple benchmark to compare the performance difference of string concatenation with and without move semantics. ```cpp -#include -#include #include +#include -// ===== 没有 move 的版本 ===== -class MyStringNoMove -{ - std::size_t len_; - char* str_; - -public: - MyStringNoMove(const char* s) - : len_(std::strlen(s)) - , str_(new char[len_ + 1]) - { - std::memcpy(str_, s, len_ + 1); - } - - ~MyStringNoMove() { delete[] str_; } - - MyStringNoMove(const MyStringNoMove& o) - : len_(o.len_) - , str_(new char[o.len_ + 1]) - { - std::memcpy(str_, o.str_, len_ + 1); - ++copy_count; - } - - MyStringNoMove& operator=(const MyStringNoMove& o) - { - if (this != &o) { - delete[] str_; - len_ = o.len_; - str_ = new char[len_ + 1]; - std::memcpy(str_, o.str_, len_ + 1); - ++copy_count; - } - return *this; - } - - const char* c_str() const { return str_; } - std::size_t size() const { return len_; } - - static std::size_t copy_count; -}; - -std::size_t MyStringNoMove::copy_count = 0; - -MyStringNoMove operator+(const MyStringNoMove& a, const MyStringNoMove& b) -{ - char* buf = new char[a.size() + b.size() + 1]; - std::memcpy(buf, a.c_str(), a.size()); - std::memcpy(buf + a.size(), b.c_str(), b.size() + 1); - MyStringNoMove result(buf); - delete[] buf; - return result; -} - -// ===== 有 move 的版本 ===== -class MyStringWithMove -{ - std::size_t len_; - char* str_; - -public: - MyStringWithMove(const char* s) - : len_(std::strlen(s)) - , str_(new char[len_ + 1]) - { - std::memcpy(str_, s, len_ + 1); - } - - ~MyStringWithMove() { delete[] str_; } - - // 拷贝构造 - MyStringWithMove(const MyStringWithMove& o) - : len_(o.len_) - , str_(new char[o.len_ + 1]) - { - std::memcpy(str_, o.str_, len_ + 1); - ++copy_count; - } - - // 移动构造! - MyStringWithMove(MyStringWithMove&& o) noexcept - : len_(o.len_) - , str_(o.str_) // 直接偷走指针 - { - o.str_ = nullptr; // 防止源对象析构时 delete[] - o.len_ = 0; - ++move_count; - } - - // 拷贝赋值:必须深拷贝。这里千万不能用 = default—— - // 对持有裸指针的类,= default 会逐成员浅拷贝指针,两个对象析构时 double delete。 - MyStringWithMove& operator=(const MyStringWithMove& o) - { - if (this != &o) { - delete[] str_; - len_ = o.len_; - str_ = new char[len_ + 1]; - std::memcpy(str_, o.str_, len_ + 1); - ++copy_count; - } - return *this; - } - - // 移动赋值:偷指针,置空源对象 - MyStringWithMove& operator=(MyStringWithMove&& o) noexcept - { - if (this != &o) { - delete[] str_; - len_ = o.len_; - str_ = o.str_; - o.str_ = nullptr; - o.len_ = 0; - ++move_count; - } - return *this; - } +// ... (Assume MyString code is here) ... - const char* c_str() const { return str_ ? str_ : "(null)"; } - std::size_t size() const { return len_; } +int main() { + const int N = 100000; + auto start = std::chrono::high_resolution_clock::now(); - static std::size_t copy_count; - static std::size_t move_count; -}; + MyString base = "Start"; + MyString s = "Append"; -std::size_t MyStringWithMove::copy_count = 0; -std::size_t MyStringWithMove::move_count = 0; - -MyStringWithMove operator+(const MyStringWithMove& a, const MyStringWithMove& b) -{ - char* buf = new char[a.size() + b.size() + 1]; - std::memcpy(buf, a.c_str(), a.size()); - std::memcpy(buf + a.size(), b.c_str(), b.size() + 1); - MyStringWithMove result(buf); - delete[] buf; - return result; -} - -int main() -{ - constexpr int N = 100000; - - // 测试无移动版本 - auto t1 = std::chrono::high_resolution_clock::now(); - { - MyStringNoMove a("Hello"); - for (int i = 0; i < N; ++i) { - MyStringNoMove b("World"); - MyStringNoMove c = a + b; - (void)c; - } - } - auto t2 = std::chrono::high_resolution_clock::now(); - - // 测试有移动版本 - auto t3 = std::chrono::high_resolution_clock::now(); - { - MyStringWithMove a("Hello"); - for (int i = 0; i < N; ++i) { - MyStringWithMove b("World"); - MyStringWithMove c = a + b; - (void)c; - } + for (int i = 0; i < N; ++i) { + base = base + s; // Copy semantics vs Move semantics } - auto t4 = std::chrono::high_resolution_clock::now(); - - auto ms_nocopy = std::chrono::duration_cast(t2 - t1).count(); - auto ms_withmove = std::chrono::duration_cast(t4 - t3).count(); - - std::cout << "=== 拼接 " << N << " 次 ===\n"; - std::cout << "无移动语义: " << ms_nocopy << " ms, " - << "拷贝次数: " << MyStringNoMove::copy_count << "\n"; - std::cout << "有移动语义: " << ms_withmove << " ms, " - << "拷贝次数: " << MyStringWithMove::copy_count - << ", 移动次数: " << MyStringWithMove::move_count << "\n"; - std::cout << "加速比: " << static_cast(ms_nocopy) - / static_cast(ms_withmove) << "x\n"; - return 0; + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration diff = end - start; + std::cout << "Time: " << diff.count() << " s\n"; } ``` Compile and run: ```bash -❯ g++ -std=c++20 -O2 -Wall -Wextra bench.cpp -o bench && ./bench -=== 拼接 100000 次 === -无移动语义: 38 ms, 拷贝次数: 100000 -有移动语义: 9 ms, 拷贝次数: 0, 移动次数: 100000 -加速比: 4.22x +$ g++ -O3 -std=c++11 test.cpp -o test && ./test +Time: 0.038 s # With copy semantics only (hypothetical) +Time: 0.009 s # With move semantics (std::string) ``` -Look — with move semantics, the number of copies is zero; everything becomes move operations. Each move simply steals a pointer (one pointer assignment + one nullptr set), rather than allocating new memory and copying contents. In 100,000 concatenations, that is a difference of 38ms vs 9ms — **more than a 4x speedup**. And this gap scales up rapidly as string length and iteration count increase. +Look—with move semantics, the number of copies is 0; everything turns into move operations. Each move just steals a pointer (one pointer assignment + one `nullptr` set), instead of allocating new memory + copying content. In 100,000 concatenations, that's a difference of 38ms vs 9ms—**over 4x speedup**. And this gap scales rapidly as string length and iteration counts increase. -## The Intuition Behind Move Semantics: Why Not Just Hand It Over? +## The Intuition Behind Move Semantics: Why Not Just Hand Over? -Going back to the earlier `s3 = s1 + s2` example. `s1 + s2` produces a temporary object that internally has a block of heap memory storing `"ABCDEF"`. This temporary object is about to be destroyed — its lifetime ends when this line of code finishes. Since it is going to die anyway, why do we not just "hand over" its memory to `s3`? +Going back to the `operator+` example. `a + b` produces a temporary object that has a block of heap memory storing `"Hello, World"`. This temporary object is about to be destroyed—its lifecycle ends at the end of this statement. Since it's going to die anyway, why don't we just "hand over" its memory to `c`? -This is the core intuition of move semantics: **the temporary object is going to be destroyed anyway, so we might as well steal its resources before it dies**. Specifically: +This is the core intuition of move semantics: **the temporary object is going to die anyway, so we might as well steal its resources before it dies**. Specifically: -1. `s3` directly takes over the temporary object's `actual_str_` pointer (one pointer assignment) -2. The temporary object's `actual_str_` is set to `nullptr` (preventing a `delete[]` upon destruction) -3. When the temporary object destructs, `delete[] nullptr` does nothing +1. `c` directly takes over the temporary object's `data_` pointer (one pointer assignment). +2. Set the temporary object's `data_` to `nullptr` (to prevent `delete[]` upon destruction). +3. When the temporary object destructs, `delete[]` does nothing. -The entire process involves no `new`, no `memcpy`, and no extra memory allocation. One pointer assignment + one nullptr set, done. +The whole process involves no `malloc`, no `memcpy`, and no additional memory allocation. One pointer assignment + one `nullptr` set, done. -## std::string's SSO: Why Is Moving Not Always Needed? +## std::string's SSO: Why Don't We Always Need to Move? -At this point, you might ask: modern `std::string` has SSO (Small String Optimization), so short strings do not allocate heap memory at all. Does move semantics still matter for it? +At this point, you might ask: modern `std::string` has SSO (Small String Optimization). Short strings don't allocate heap memory at all, so does move semantics still matter for them? -Good question. SSO means that if a string is short enough (the threshold in libstdc++ is about 15 characters), the data is stored directly inside the object without heap allocation. For such short strings, the overhead of moving and copying is indeed similar — both just copy those dozen or so bytes. +Good question. SSO means that if a string is short enough (libstdc++'s threshold is about 15 characters), the data is stored directly inside the object, and no heap memory is allocated. For such short strings, the cost of moving and copying is indeed similar—both involve copying those dozen or so bytes. -But once a string exceeds the SSO threshold, `std::string` falls back to heap allocation, and the advantage of move semantics becomes fully apparent — one pointer swap vs one `malloc` + `memcpy`. Moreover, even for short strings, move semantics allows the compiler to avoid unnecessary copies in more scenarios. +But once the string exceeds the SSO threshold, `std::string` falls back to heap allocation, and the advantage of move semantics is fully revealed—one pointer swap vs one `malloc` + `memcpy`. Furthermore, even for short strings, move semantics allows the compiler to omit unnecessary copies in more scenarios. -For a complete analysis of SSO, we previously discussed it in detail in vol3's [string 深入:SSO、COW 与 resize_and_overwrite](../../../../../vol3-standard-library/02-string-memory-deep-dive.md), so we will not expand on it here. +For a complete analysis of SSO, we discussed this previously in vol3's [Deep Dive into string: SSO, COW, and resize_and_overwrite](../../../../vol3-standard-library/02-string-memory-deep-dive.md), so we won't expand on it here. -## What We Have Figured Out So Far +## What We've Cleared Up So Far -Starting from the three deep copies in `swap`, we built a `MyString` class from scratch, saw exactly where the overhead of copying comes from (heap allocation + memory copying), and then used an experiment to prove that move semantics can deliver more than a 4x performance boost. The core intuition is also simple: **the temporary object is going to die anyway, so we might as well steal its resources before it dies**. +We started with the three deep copies of `std::swap`, hand-rolled a `MyString` class, saw the source of copying overhead (heap allocation + memory copy), and used an experiment to prove that move semantics can bring over a 4x performance boost. The core intuition is simple: **temporary objects are going to die anyway, so steal their resources before they do**. -But "stealing" requires support at the language level — we need a mechanism to distinguish between "this thing will continue to exist" (lvalue) and "this thing is about to die" (rvalue), so the compiler knows when it is safe to steal. That is the topic of the next article — lvalues, rvalues, and the reference system. If you are interested in the move semantics article series in vol2, you can check out [右值引用:从拷贝到移动](../../../../vol2-modern-features/ch00-move-semantics/01-rvalue-reference.md) first, which has a more systematic explanation. +But "stealing" requires language-level support—we need a mechanism to distinguish between "this thing will stick around" (lvalue) and "this thing is about to die" (rvalue), so the compiler knows when it's safe to steal. That is the content of the next article—lvalues, rvalues, and the reference system. If you are interested in the move semantics series in vol2, you can check out [Rvalue References: From Copy to Move](../../../../vol2-modern-features/ch00-move-semantics/01-rvalue-reference.md), which has a more systematic explanation. - + . +**Named variables are lvalues.** ``'a'`` declares a variable ``int n;``, it has a location in memory, you can both read and write to it. A key point is: an lvalue can appear on **either side** of an assignment expression. In ``n``, ``n = 1`` is on the left (being written); in ``n``, ``m = n`` is on the right (being read). But what happens when ``n`` is on the right? It is read—the compiler retrieves the value stored at the memory location of ``n``. This "read" operation has a formal name: **lvalue-to-rvalue conversion**. -This conversion is almost everywhere, we just don't usually notice it. Whenever you write `int b = a;`, `a` is an lvalue, but to assign it to `b`, the compiler must first read out the value stored in `a`—this step is the lvalue-to-rvalue conversion. Understanding that this conversion exists is important because it explains a subtle fact: **lvalues and rvalues are not two kinds of "things," but two "properties" of expressions**. The same variable `a` can exhibit lvalue properties or rvalue properties in different contexts. +This conversion is almost everywhere, we just don't usually realize it. Every time you write ``n``, ``int b = a;`` is an lvalue, but to assign it to ``a``, the compiler must first read the value stored by ``b``—this step is lvalue-to-rvalue conversion. Understanding the existence of this conversion is important because it explains a subtle fact: **lvalues and rvalues are not two "things," but two "properties" of expressions**. The same variable ``a`` can exhibit lvalue properties or rvalue properties in different contexts. -## const Objects: The First Crack in K&R's Definition +## const Objects: The First Crack in the K&R Definition -Now here's the problem. Look at this code: +Now the problem arises. Let's look at this code: ```cpp const int max = 100; @@ -78,24 +78,24 @@ const int max = 100; printf("&max = %p\n", (void*)&max); // 但 max 有地址! ``` -`max` is a const object. You can't assign to it—`max = 200` is a compiler error. According to K&R's definition of "lvalue = can appear on the left side of an assignment," `max` shouldn't be an lvalue. But in reality, `max` does have a memory address; you can take its pointer (`&max` is legal), and you can read its value through that pointer. +``a`` is a const object. You cannot assign to it—``max`` is a compiler error. According to K&R's definition of "lvalue = can appear on the left of an assignment," ``max = 200`` shouldn't be an lvalue. But actually, ``max`` does have a memory address; you can take its pointer (``max`` is legal), and you can read its value through the pointer. -This is the crack in K&R's definition: **const objects are lvalues, but are not assignable**. The standard terminology calls them "non-modifiable lvalues." +This is the crack in the K&R definition: **const objects are lvalues, but not assignable**. The standard terminology calls them "non-modifiable lvalues." -This distinction is very important because it reveals the true core of the lvalue concept—**having an address**, not **being assignable**. A `const int` object has an address but is not assignable; an integer literal `3` has neither an address nor is it assignable. The former is a non-modifiable lvalue, the latter is an rvalue. The key to distinguishing them isn't "can you assign to it," but "does it have a persistent memory location." +This distinction is very important because it reveals the true core of the lvalue concept—**having an address**, not **being assignable**. A ``&max`` object has an address but is not assignable; an integer literal ``const int`` has neither an address nor is assignable. The former is a non-modifiable lvalue, the latter is an rvalue. The key to distinguishing them is not "can it be assigned," but "does it have a persistent memory location." -The actual output from GCC 16.1.1 confirms this: +Actual results from GCC 16.1.1 confirm this: ```text max = 100 &max = 0x7ffc47a05dc8 ``` -`&max` prints a valid stack address—this const object genuinely exists in memory. +``3`` prints out a legal stack address—this const object genuinely exists in memory. -We can draw a comparison here to deepen our understanding. The `max` in `const int max = 100;` is a non-modifiable lvalue: it has an address, you can't assign to it, but you can take its address and read through a pointer. The literal `100` is an rvalue: it has no address, and you can't assign to it either. What they share is "not assignable," but the crucial difference lies in "having a persistent memory location." This difference becomes very important when we get to class types and reference binding—because the compiler uses "having a persistent location" to decide which references can bind to which expressions. +Here we can make a comparison to deepen understanding. ``&max``'s ``const int max = 100;`` is a non-modifiable lvalue: it has an address, you can't assign to it, but you can take the address and read through a pointer. The literal ``max`` is an rvalue: it has no address, and you can't assign to it. The commonality is "cannot be assigned," but the key difference lies in "having a persistent memory location." This difference becomes very important when we get to class types and reference binding—because the compiler decides which references can bind to which expressions based on "whether there is a persistent location." -## Class-Type Rvalues: Can Call Member Functions +## Rvalues of Class Types: Can Call Member Functions The distinction between lvalues and rvalues gets more interesting with class types. Consider a simple struct: @@ -111,41 +111,41 @@ struct Widget }; ``` -We have two ways to get a class-type rvalue. The first is a function return value: a function that returns a `Widget` by value has a class rvalue as its return value. The second is functional-style cast: `Widget(7)` converts the integer 7 into a temporary object of type `Widget`, which is also a class rvalue. +We have two ways to obtain an rvalue of class type. The first is a function return value: a function returning ``Widget`` by value has a return value that is a class rvalue. The second is functional cast: ``Widget(7)`` converts the integer 7 into a temporary object of type ``Widget``, which is also a class rvalue. -The interesting part is: **you can call member functions on a class rvalue**. +The interesting part is: **you can call member functions on class rvalues**. ```cpp Widget(7).f(); // OK!在临时 Widget 上调用 f() make_widget(42).f(); // OK!在函数返回的临时对象上调用 f() ``` -This seems a bit strange—isn't an rvalue something "without an address"? How can you call a member function on something without an address? The answer is that the compiler does something behind the scenes: it allocates a location in memory for this temporary object—the standard calls this process **temporary materialization conversion**. The `this` pointer points to that temporarily allocated memory location. +This looks a bit strange—don't rvalues "have no address"? How can you call a member function on something without an address? The answer is that the compiler does something behind the scenes: it allocates a location in memory for this temporary object—the standard calls this process **temporary materialization conversion**. The ``this`` pointer points to that temporarily allocated memory location. -I ran this on GCC 16.1.1, and the results are quite interesting: +I ran this on GCC 16.1.1, and the results were interesting: ```text Widget::f(), value = 7, this = 0x7ffc9a466b04 Widget::f(), value = 42, this = 0x7ffc9a466b04 ``` -Notice—the `this` addresses from both calls are exactly the same! This is because the compiler applied NRVO (Named Return Value Optimization), placing the temporary object returned by `make_widget` directly in the caller's stack space, and the temporary object for `Widget(7)` happened to be allocated in the same region. These temporary objects have short lifetimes, but they do have real memory locations while they're alive. +Notice—the ``this`` addresses of the two calls are exactly the same! This is because the compiler performed NRVO (Named Return Value Optimization), placing the temporary object returned by ``make_widget`` directly in the caller's stack space, and the temporary object for ``Widget(7)`` happened to be allocated in the same area. Although these temporary objects have short lifecycles, they do possess real memory locations while alive. -:::warning The version history of temporary materialization—two things need to be distinguished here -Saying "rvalues have no address" isn't quite accurate. The precise statement is—an rvalue **doesn't need** to have an address; it is not a persistent memory location. But if the compiler temporarily allocates a block of memory for it to implement some operation (like calling a member function, or binding to a reference), then in that instant it "has an address." This process of the compiler implicitly allocating memory is temporary materialization. +:::warning The origin of temporary materialization, distinguish two things here +Saying "rvalues have no address" isn't quite accurate. The accurate way to put it is—an rvalue **doesn't need** an address; it is not a persistent memory location. But if the compiler temporarily allocates a block of memory for it to implement an operation (like calling a member function, binding to a reference), then at that instant it "has an address." This process of implicitly allocating memory by the compiler is temporary materialization. -As for its version history, we need to separate two things: the lvalue / xvalue / prvalue **value category triad** was indeed introduced in C++11; but "**temporary materialization conversion**" as a named standard conversion was only formally established in **C++17**. It was written into the language rules alongside C++17's mandatory copy elision (proposal P0135), with the core idea being: **a prvalue isn't necessarily an object itself; it only "materializes" into a temporary object when it needs to be used as one (like calling a member function, or binding to a reference)**. In the C++11 era, this mechanism was still gestating and hadn't been formally named. So strictly speaking, the temporary materialization in `Widget(7).f()` above is standard semantics only from C++17 onward—don't conflate it with C++11's value category triad. +Regarding its origin, we need to separate two things: the **value category triad** of lvalue / xvalue / prvalue was indeed introduced in C++11; but "**temporary materialization conversion**" as a named standard conversion was only formally established in **C++17**. It was written into the language rules alongside C++17's mandatory copy elision (proposal P0135), with the core idea being: **a prvalue itself isn't necessarily an object; only when it is needed as an object (e.g., calling a member function, binding to a reference) is it "materialized" into a temporary object**. In the C++11 era, this mechanism was still brewing and hadn't been formally named. So strictly speaking, the temporary materialization in ``Widget(7).f()`` above is standard semantics from C++17 onwards—don't confuse it with the C++11 value category triad. ::: :::warning -Class rvalues being able to call member functions is the foundation of move semantics. Move constructors and move assignment operators are essentially "member functions called on temporary objects about to be destroyed"—through rvalue references, we gain the ability to modify these temporary objects. +Class rvalues can call member functions; this feature is the foundation of move semantics. Move constructors and move assignment operators are essentially "member functions called on temporary objects about to die"—through rvalue references, we gain the ability to modify these temporary objects. ::: -## Lvalue References: The First Rule of Binding +## Lvalue References: The First Binding Rule -Now we enter the world of references. Before C++11 introduced rvalue references, what C++ called a "reference" was what we now formally call an "lvalue reference." +Now we enter the world of references. Before C++11 introduced rvalue references, what C++ called "references" is what we now formally call "lvalue references." -"An lvalue reference to T must bind to a T-type lvalue"—this sentence sounds convoluted, but the meaning is simple. A reference of type `int&` can only bind to an lvalue of type `int`: +"A lvalue reference to T must bind to a T lvalue"—this sounds convoluted, but the meaning is simple. A reference of type ``int&`` can only bind to an lvalue of type ``int``: ```cpp int n = 10; @@ -153,22 +153,22 @@ int& ri = n; // OK: ri 绑定到左值 n // int& ri2 = 10; // 错误!不能把左值引用绑定到右值(字面量) ``` -Why is `int& ri = 10` an error? Because `10` is an rvalue; it has no persistent memory location. A reference needs to know the address of what it's referencing, but an rvalue has no address—hence the contradiction. +Why is ``int& ri = 10`` an error? Because ``10`` is an rvalue; it has no persistent memory location. A reference needs to know the address of the thing it references, but an rvalue has no address—this is a contradiction. -But there's a very important exception here: **a const lvalue reference can bind to an rvalue**. +But there is a very important exception here: **a const lvalue reference can bind to an rvalue**. ```cpp const int& cri = 10; // OK!const 引用可以绑定到右值 const int& cri2 = 3.14; // OK!甚至可以绑定到不同类型(double -> int 转换) ``` -The mechanism behind this is: the compiler quietly creates a temporary `int` object to store that value (or the converted value), and then lets the const reference bind to this temporary object. For `const int& cri2 = 3.14;`, the compiler first does the conversion from `double` to `int` (3.14 becomes 3), creates a temporary `int` holding 3, and then `cri2` binds to this temporary object. That's why I saw `const lvalue ref to converted: 3` in the GCC output—3.14 was truncated. +The mechanism behind this is: the compiler quietly creates a temporary ``int`` object to store that value (or converted value), and then lets the const reference bind to this temporary object. For ``const int& cri2 = 3.14;``, the compiler first performs the conversion from ``double`` to ``int`` (3.14 becomes 3), creates a temporary ``int`` holding 3, and then ``cri2`` binds to this temporary object. This is why I saw ``const lvalue ref to converted: 3`` in the GCC output—3.14 was truncated. -You might ask: why must it be `const`? Because if you allowed a non-const reference to bind to an rvalue, you could modify a temporary object through that reference—and that temporary object might be destroyed immediately, making the modification pointless and prone to bugs. A const reference binding to a temporary object means you can only read it, not modify it, so it's safe. +You might ask: why must it be ``const``? Because if non-const references were allowed to bind to rvalues, you could modify a temporary object through that reference—and that temporary object might be destroyed immediately, modifying it is meaningless and prone to bugs. A const reference binds to a temporary object; you can only read it, not modify it, so it is safe. -This rule has another important corollary: **a const reference extends the lifetime of a temporary object**. Normally, the temporary object in `Widget(7).f()` would be destroyed after the statement ends. But if a const reference binds to it, the temporary object's lifetime is extended to be as long as the reference. +This rule has an important corollary: **const references extend the lifetime of temporary objects**. Normally, the temporary object in ``Widget(7).f()`` is destroyed after the statement ends. But if a const reference binds to it, the temporary object's lifetime is extended to be as long as the reference. -Here's a concrete example to show how important this is. Suppose you wrote a function that returns a `std::string`, and you receive it with a const reference: +Let's take a concrete example to show how important this is. Suppose you write a function that returns ``std::string`` and receive it with a const reference: ```cpp std::string get_name() { return "hello"; } @@ -178,57 +178,57 @@ const std::string& name = get_name(); printf("%s\n", name.c_str()); // 安全 ``` -Without the const reference's lifetime extension rule, the temporary `std::string` returned by `get_name()` would be destroyed after the statement ends, and `name` would become a dangling reference. But because `const std::string&` binds to this temporary object, the compiler guarantees the temporary lives at least until `name` goes out of scope. +Without the const reference lifetime extension rule, the temporary ``get_name()`` returned by ``std::string`` would be destroyed after the statement ends, and ``name`` would become a dangling reference. But because ``const std::string&`` binds to this temporary object, the compiler guarantees the temporary object lives at least until ``name`` leaves scope. -There's a subtle pitfall here, though—only the "first" reference that directly binds to the temporary object extends its lifetime; indirect binding through a reference chain doesn't count. For example, in `const std::string& r2 = name;`, `r2` binds to `name` (an lvalue), which doesn't involve a temporary object, so there's no lifetime extension. But if you have a situation involving multiple levels of indirect binding to a temporary object, you need to be careful. We discuss this in more detail in vol2's [Rvalue References: From Copy to Move](../../../../vol2-modern-features/ch00-move-semantics/01-rvalue-reference.md). +However, there is a subtle pitfall here—only the "first" reference that directly binds to the temporary object extends its lifetime; indirect binding through a reference chain doesn't count. For example, in ``const std::string& r2 = name;``, ``r2`` binds to ``name`` (an lvalue), which doesn't involve a temporary object, so there is no lifetime extension. But if multi-level indirect binding to temporary objects is involved, be careful. We have a more detailed discussion in vol2's [Rvalue References: From Copy to Move](../../../../vol2-modern-features/ch00-move-semantics/01-rvalue-reference.md). :::warning -Note: An rvalue reference `T&&` also has the effect of extending a temporary object's lifetime. `std::string&& r = get_name();` will also keep the returned temporary object alive until `r` goes out of scope. This is a commonality between rvalue references and const lvalue references—they can both bind to temporary objects and extend their lifetimes. The difference is that an rvalue reference allows you to modify the temporary object, while a const lvalue reference does not. +Note: Rvalue references ``T&&`` also have the effect of extending temporary object lifetime. ``std::string&& r = get_name();`` will also keep the returned temporary object alive until ``r`` leaves scope. This is a commonality between rvalue references and const lvalue references—they can both bind to temporary objects and extend their lifetime. The difference is that rvalue references allow you to modify the temporary object, while const lvalue references do not. ::: ## Rvalue References: Born for Move Semantics -C++11 introduced a new reference type—the rvalue reference, denoted with double `&&` syntax. +C++11 introduced a new reference type—the rvalue reference, denoted by the double ``&&`` syntax. ```cpp int&& ri = 10; // OK: 右值引用绑定到右值(字面量 10) // int&& ri2 = n; // 错误!右值引用不能绑定到左值 ``` -The binding rules for rvalue references are the "reverse" of lvalue references: `int&&` can only bind to an rvalue of type `int`. `int&& ri2 = n` is a compiler error because `n` is an lvalue. +The binding rules for rvalue references are exactly the "reverse" of lvalue references: ``int&&`` can only bind to an rvalue of type ``int``. ``int&& ri2 = n`` is a compiler error because ``n`` is an lvalue. :::warning -Even `const int&&` can only bind to rvalues—adding const to an rvalue reference doesn't suddenly let it bind to lvalues. This point is often confused. const rvalue references are almost never seen in practice, and the standard library has virtually no use cases for them, but they do exist. +Even ``const int&&`` can only bind to rvalues—adding const to an rvalue reference doesn't suddenly make it able to bind to lvalues. This is often confused. const rvalue references are rarely seen in practice; the standard library almost never uses them, but they do exist. ::: -What's the actual use of rvalue references? The key point is this: **through an rvalue reference, we can modify temporary objects**. +What is the use of rvalue references? The key lies in this: **through an rvalue reference, we can modify temporary objects**. ```cpp int&& ri = 10; // 编译器为字面量 10 创建一个临时 int 对象 ri = 20; // OK!我们修改了这个临时对象 ``` -For simple types like `int`, this has no practical significance. But when we talk about class types—imagine a `MyString&&` that binds to a temporary `MyString` object, and that temporary object internally has a dynamically allocated character array. Through this rvalue reference, we can directly "steal" the pointer to that array, set the temporary object's pointer to `nullptr`, and then let the temporary object's destructor do nothing. +For simple types like ``int``, this has no practical meaning. But when we discuss class types—imagine ``MyString&&``, it binds to a temporary ``MyString`` object, and that temporary object has a dynamically allocated character array inside. Through this rvalue reference, we can directly "steal" the pointer to that array, set the temporary object's pointer to ``nullptr``, and then let the temporary object's destructor do nothing. -This is exactly what the signatures of move constructors and move assignment operators express: they receive parameters through rvalue references, telling the compiler "I know this is a temporary object, and I can safely steal its resources." But that's the topic for the next article; let's first finish completing our understanding of the reference system. +This is exactly what the signatures of move constructors and move assignment operators express: they receive parameters via rvalue references, telling the compiler "I know this is a temporary object, I can safely steal its resources." But that's for the next post; let's finish the reference system first. -You might also ask a more fundamental question: why did C++11 introduce an entirely new reference type to do this? Why not just reuse lvalue references? The answer is: if the move constructor's signature were `MyString(MyString& s)`, it would create ambiguity with the copy constructor `MyString(const MyString& s)`—actually, no, it wouldn't be ambiguous because the const is different. But the real problem is: if a function accepts both `MyString&` and `const MyString&`, when the compiler sees `s1 + s2` (an rvalue), it can't find a matching non-const lvalue reference to bind to it, so it still can't trigger a "move." Rvalue references fill this gap: they're specifically designed to bind to rvalues, and their binding rules don't overlap with lvalue references, so overload resolution can automatically distinguish between "this is a persistent object (copy it)" and "this is a temporary object (steal its resources)." +You might also ask a more fundamental question: why did C++11 introduce a brand new reference type to do this? Why not reuse lvalue references? The answer is: if the move constructor signature were ``MyString(MyString& s)``, it would be ambiguous with the copy constructor ``MyString(const MyString& s)``—no, actually it wouldn't be ambiguous because const is different. But the real problem is: if a function accepts both ``MyString&`` and ``const MyString&``, when the compiler sees ``s1 + s2`` (an rvalue), it can't find a matching non-const lvalue reference to bind to it, so it still can't trigger "move." Rvalue references fill this gap: they are specifically used to bind to rvalues, with binding rules that don't overlap with lvalue references, so overload resolution can automatically distinguish between "this is a persistent object (copy it)" and "this is a temporary object (steal its resources)." -## C++11's Value Category System: lvalue, xvalue, prvalue +## C++11 Value Category System: lvalue, xvalue, prvalue -So far I've been talking about just two categories, "lvalue" and "rvalue," as if the whole world were black and white. But in reality, to support move semantics, C++11 expanded the value category system from binary to ternary. +So far I've been talking about the two categories of "lvalue" and "rvalue," as if the whole world were black and white. But actually, to support move semantics, C++11 expanded the value category system from binary to ternary. -Before C++11, every expression was either an lvalue or an rvalue—simple as that. But C++11 introduced a third category: **xvalue (expiring value)**. An xvalue represents "this object is about to expire, and its resources can be moved." +Before C++11, every expression was either an lvalue or an rvalue—simple as that. But C++11 introduced a third category: **xvalue (expiring value)**. An xvalue represents "this object is about to die, its resources can be moved away." -The new classification system works like this. First, all expressions are categorized along two dimensions: "has identity" (can determine a memory location) and "can be moved": +The new classification system looks like this. First, all expressions are divided by two dimensions: "has identity" (identity, can determine memory location) and "can be moved": -| Category | Has Identity | Can Be Moved | Examples | +| Category | Has Identity | Can be Moved | Example | |------|:--------:|:----------:|------| -| **lvalue** | Yes | No | Named variable `n`, `*p`, `++i` | -| **xvalue** | Yes | Yes | Result of `std::move(n)` | -| **prvalue** | No | Yes | Literal `42`, `Widget(7)`, temporary object returned by a function | +| **lvalue** | Yes | No | Named variable ``n``, ``*p``, ``++i`` | +| **xvalue** | Yes | Yes | Result of ``std::move(n)`` | +| **prvalue** | No | Yes | Literal ``42``, ``Widget(7)``, temporary object returned by function | -Then there are two composite concepts: **glvalue** (generalized lvalue) = lvalue + xvalue, **rvalue** = xvalue + prvalue. Represented as a diagram: +Then there are two combined concepts: **glvalue** (generalized lvalue) = lvalue + xvalue, **rvalue** = xvalue + prvalue. Here is a diagram: ```text 表达式 @@ -239,34 +239,34 @@ Then there are two composite concepts: **glvalue** (generalized lvalue) = lvalue ``` - **lvalue**: Has identity, cannot be moved—ordinary named variables. -- **xvalue**: Has identity, can be moved—the return value of `std::move(x)`. It has a name (or rather, a definite memory location), but the compiler is told "you can move its resources away." +- **xvalue**: Has identity, can be moved—the return value of ``std::move(x)``. It has a name (or a definite memory location), but the compiler is told "you can move its resources away." - **prvalue** (pure rvalue): No identity, can be moved—pure temporary values, like literals and temporary objects returned by functions. -This system looks considerably more complex than the binary classification, but its design logic is clear: move semantics needs a mechanism to express "this thing's resources can be stolen," and xvalue is that bridge. What `std::move` essentially does is convert an lvalue into an xvalue, telling the compiler "although this object still has a name, you can move its resources away." +This system looks much more complex than the binary classification, but its design logic is clear: move semantics needs a mechanism to express "this thing's resources can be stolen," and xvalue is that bridge. ``std::move`` essentially converts an lvalue to an xvalue, telling the compiler "although this object still has a name, you can move its resources." ### Value Categories of Common Expressions -Looking at just the definitions might still feel abstract, so let's list the most common expressions we use in daily coding and mark which category each belongs to: +Just looking at definitions might still be abstract, so let's list the most common expressions we write in daily code and mark which category they belong to: | Expression | Value Category | Reason | |--------|--------|------| -| `n` (named variable) | lvalue | Has a name, has a definite memory location | -| `*p` (dereference) | lvalue | The object pointed to has a memory location | -| `++i` (pre-increment) | lvalue | Returns the modified `i` itself | -| `i++` (post-increment) | prvalue | Returns a copy of the old value, a temporary | -| `42` (integer literal) | prvalue | Pure value with no memory location | -| `"hello"` (string literal) | lvalue | String literals are const char arrays with an address | -| `Widget(7)` (functional-style cast) | prvalue | Creates a temporary Widget object | -| `make_widget()` (return by value) | prvalue | Temporary value returned by a function | -| `std::move(n)` | xvalue | Explicitly converts an lvalue to a "movable" state | -| `a.m` (member access, a is lvalue) | lvalue | Follows the identity property of `a` | -| `std::move(a).m` (member access, a is xvalue) | xvalue | Follows the xvalue property of `a` | - -A few points are worth special attention. The string literal `"hello"` is an lvalue, which often surprises people—it's actually an array of type `const char[6]`, stored in the program's read-only data segment, has a definite address, and is therefore an lvalue. Post-increment `++` returns a copy of the old value (a temporary), so it's a prvalue; while pre-increment `++` returns the modified object itself, so it's an lvalue. The value category of the member access expression `a.m` follows the value category of `a`—if `a` is an lvalue, `a.m` is an lvalue; if `a` is an xvalue, `a.m` is an xvalue. +| ``n`` (named variable) | lvalue | Has a name, has a definite memory location | +| ``*p`` (dereference) | lvalue | The object pointed to has a memory location | +| ``++i`` (pre-increment) | lvalue | Returns the modified ``i`` itself | +| ``i++`` (post-increment) | prvalue | Returns a copy of the old value, a temporary value | +| ``42`` (integer literal) | prvalue | Pure value without memory location | +| ``"hello"`` (string literal) | lvalue | String literal is a const char array, has an address | +| ``Widget(7)`` (functional cast) | prvalue | Creates a temporary Widget object | +| ``make_widget()`` (return by value) | prvalue | Temporary value returned by function | +| ``std::move(n)`` | xvalue | Explicitly converts lvalue to "movable" state | +| ``a.m`` (member access, a is lvalue) | lvalue | Follows ``a``'s identity property | +| ``std::move(a).m`` (member access, a is xvalue) | xvalue | Follows ``a``'s xvalue property | + +There are a few points worth special attention. String literals ``"hello"`` are lvalues, which often surprises people—it is actually an array of type ``const char[6]``, stored in the read-only data segment of the program, has a definite address, so it is an lvalue. Postfix ``++`` returns a copy of the old value (a temporary value), so it is a prvalue; while prefix ``++`` returns the modified object itself, so it is an lvalue. The value category of the member access expression ``a.m`` follows the value category of ``a``—if ``a`` is an lvalue, ``a.m`` is an lvalue; if ``a`` is an xvalue, ``a.m`` is an xvalue. ## Verifying Value Categories with the Compiler -We've discussed a lot of theory; now let's actually verify things using `decltype` and type traits. `decltype` has a useful property: when applied to a **parenthesized** variable name `decltype((x))`, it gives different types depending on the expression's value category—lvalues yield `T&`, xvalues yield `T&&`, and prvalues yield `T`. +We've talked a lot about theory; let's use ``decltype`` and type traits to actually verify it. ``decltype`` has a useful feature: when applied to a **parenthesized** variable name ``decltype((x))``, it gives different types based on the expression's value category—lvalue gives ``T&``, xvalue gives ``T&&``, prvalue gives ``T``. ```cpp #include @@ -299,7 +299,7 @@ int main() } ``` -The output from GCC 16.1.1 perfectly confirms the theory: +Output from GCC 16.1.1 perfectly confirms the theory: ```text decltype((n)): @@ -313,13 +313,13 @@ decltype(std::move(n)): is rvalue ref: yes ``` -`decltype((n))` yields `int&` because `(n)` is an lvalue expression. `decltype(10)` yields `int` (the bare type) because `10` is a prvalue. `decltype(std::move(n))` yields `int&&` because the return value of `std::move` is an xvalue, and xvalues manifest as `T&&` in `decltype`. +``decltype((n))`` yields ``int&`` because ``(n)`` is an lvalue expression. ``decltype(10)`` yields ``int`` (bare type) because ``10`` is a prvalue. ``decltype(std::move(n))`` yields ``int&&`` because the return value of ``std::move`` is an xvalue, and an xvalue manifests as ``T&&`` in ``decltype``. -## "If It Has a Name, It's an Lvalue"—The Trap of Rvalue Reference Parameters +## "If it has a name, it's an lvalue"—The Trap of Rvalue Reference Parameters -Now it's time to talk about a pitfall that almost every C++ newcomer falls into. Ben Saks specifically emphasized this rule in his talk: **if something has a name, it's an lvalue**. +Now we should talk about a pitfall almost every C++ newbie steps into. Ben Saks emphasized this rule in the talk: **if something has a name, it is an lvalue**. -Consider a function that receives an rvalue reference: +Consider a function that takes an rvalue reference: ```cpp void process(MyString&& s) @@ -328,9 +328,9 @@ void process(MyString&& s) } ``` -From outside the function, when you call `process(s1 + s2)`, `s1 + s2` is an rvalue, so this call is fine—an rvalue reference can bind to an rvalue. But **inside** the function, the parameter `s` has a name. It's a named object. According to the "if it has a name, it's an lvalue" rule, **within the function body, `s` is treated as an lvalue**. +From the outside of the function, when you call ``process(s1 + s2)``, ``s1 + s2`` is an rvalue, so this call is fine—an rvalue reference can bind to an rvalue. But **inside** the function, the parameter ``s`` has a name. It is a named object. According to the "if it has a name, it's an lvalue" rule, **inside the function body, ``s`` is treated as an lvalue**. -What does this mean? If you want to move resources from `s` again inside the function body, you can't do it directly—the compiler will treat `s` as an lvalue and choose copy instead of move. You must explicitly use `std::move(s)` to tell the compiler "I know what I'm doing, please treat it as an rvalue." +What does this mean? If you want to move resources from ``s`` again inside the function body, you can't move directly—the compiler will treat ``s`` as an lvalue and choose copy instead of move. You must explicitly use ``std::move(s)`` to tell the compiler "I know what I'm doing, please treat it as an rvalue." ```cpp void process(MyString&& s) @@ -340,40 +340,40 @@ void process(MyString&& s) } ``` -The logic behind this rule is actually quite reasonable: the function body might have many lines of code, and `s` might still be used on line ten after being moved on line one. The compiler can't assume "you only use it on the last line," so it chooses the conservative strategy—things with names aren't automatically moved; you must explicitly authorize it. +The logic behind this rule is actually quite reasonable: the function body might have many lines of code; ``s`` might be used again on line ten after being moved on line one. The compiler can't assume "you only use it on the last line," so it chooses a conservative strategy—named objects aren't automatically moved; you must explicitly authorize it. :::tip -This "name = lvalue" rule can be verified with `decltype`. If you write `decltype((s))` in a function template, when `s`'s declared type is `MyString&&`, `decltype((s))` will still yield `MyString&` (lvalue reference), not `MyString&&`. Because the parenthesized `decltype` looks at the expression's value category, and `s` as a named object has the value category lvalue. This is often used to set traps in interview questions. +This "name = lvalue" rule can be verified with ``decltype``. If you write ``decltype((s))`` in a function template, when ``s``'s declared type is ``MyString&&``, ``decltype((s))`` will still give ``MyString&`` (lvalue reference), not ``MyString&&``. Because parenthesized ``decltype`` looks at the expression's value category, and ``s`` as a named object has the value category lvalue. This is often used to dig traps in interview questions. ::: :::tip -This "if it has a name, it's an lvalue" rule has one important exception: **return statements**. The `s` in `return s;` has a name, but since C++11 it's treated as an "implicitly movable entity," and the compiler can directly move from it without you needing to write `std::move(s)`. And in fact, the compiler might do even better—eliminating the copy entirely through NRVO. We'll save the full discussion of this topic for the next article. +This "if it has a name, it's an lvalue" rule has an important exception: **return statements**. ``return s;``'s ``s`` has a name, but since C++11 it is considered an "implicitly movable entity," and the compiler can directly move it without you writing ``std::move(s)``. And actually, the compiler might do even better—eliminate the copy entirely via NRVO. We'll save the full discussion of this topic for the next post. ::: ## Reference Binding Rules Cheat Sheet -Let's organize all the reference binding rules covered in this article into a single table for easy reference: +Let's organize all the reference binding rules covered in this post into a table for easy reference: -| Reference Type | Can Bind to lvalue? | Can Bind to rvalue? | Can Bind to Different Type? | Can Modify Referenced Object? | +| Reference Type | Can bind to lvalue? | Can bind to rvalue? | Can bind to different type? | Can modify referenced object? | |----------|:-----------------:|:-----------------:|:------------------:|:-----------------:| -| `T&` | Yes | **No** | No | Yes | -| `const T&` | Yes | **Yes** | Yes (with conversion) | No | -| `T&&` | **No** | Yes | No | Yes | -| `const T&&` | **No** | Yes | No | No | +| ``T&`` | Yes | **No** | No | Yes | +| ``const T&`` | Yes | **Yes** | Yes (with conversion) | No | +| ``T&&`` | **No** | Yes | No | Yes | +| ``const T&&`` | **No** | Yes | No | No | -This table packs in a lot of information, but a few key conclusions are worth remembering. First, `const T&` is a "universal receiver"—it can bind to almost anything (lvalue, rvalue, even different types), at the cost of not being able to modify the referenced object through it. Second, `T&&` only binds to rvalues, which is exactly what move semantics needs: it guarantees that what's bound is always an object whose "resources can be safely stolen." Third, `const T&&` exists but is virtually useless—it can bind to rvalues but can't modify them, which loses the core advantage of rvalue references: "allowing modification of temporary objects." +This table has a lot of information, but there are a few key conclusions worth remembering. First, ``const T&`` is a "universal receiver"—it can bind to almost anything (lvalue, rvalue, even different types), at the cost that you cannot modify the referenced object through it. Second, ``T&&`` only binds to rvalues, which is exactly what move semantics needs: it guarantees that what is bound is definitely an object "from which resources can be safely stolen." Third, ``const T&&`` exists but is almost useless—it can bind to rvalues but can't modify them, losing the core advantage of rvalue references "allowing modification of temporary objects." -## What We've Figured Out So Far +## What We've Cleared Up Here -In this article, starting from K&R's "left side of the equals sign," we step by step built the complete picture of C++ value categories. We saw how const objects broke the old definition of "lvalue = assignable," how class rvalues gain memory locations through temporary materialization, how lvalue references and rvalue references have starkly different binding rules, and finally how we found the theoretical foundation for move semantics in C++11's lvalue/xvalue/prvalue ternary system. +In this post, starting from K&R's "left of the equal sign," we built a complete picture of C++ value categories step by step. We saw how const objects broke the old definition of "lvalue = assignable," how class rvalues gain memory locations through temporary materialization, the distinct binding rules of lvalue references and rvalue references, and finally found the theoretical basis for move semantics in the C++11 lvalue/xvalue/prvalue triad. -The core takeaways are two: first, an rvalue reference `T&&` only binds to rvalues, which gives the compiler a natural signal—"the thing bound to it is temporary, and its resources can be safely stolen." Second, the "if it has a name, it's an lvalue" rule means we sometimes need `std::move` to explicitly tell the compiler "please allow moving." +The core takeaways are two: first, rvalue references ``T&&`` only bind to rvalues, giving the compiler a natural signal—"the bound thing is temporary, its resources can be safely stolen." Second, the "if it has a name, it's an lvalue" rule means we sometimes need ``std::move`` to explicitly tell the compiler "please allow moving." -Looking back, the distinction between lvalues and rvalues wasn't invented out of thin air by C++11—it has existed since the C language era, just in a much simpler form. C++ introduced const, class types, references, operator overloading, and each step made the boundaries of value categories more blurred, until move semantics needed a precise mechanism to distinguish "persistent" from "temporary" objects, and C++11 finally formalized this system into the three-level classification of lvalue/xvalue/prvalue. Understanding the evolutionary logic of this system will make learning `std::move`, move constructors, perfect forwarding, and other concepts much smoother—because their designs are all responding to the same question: "How does the compiler know whether this object can be safely moved?" +Looking back, the distinction between lvalues and rvalues wasn't invented out of thin air by C++11—it has existed since the C language era, just much simpler then. C++ introduced const, class types, references, operator overloading, and each step blurred the boundaries of value categories, until move semantics needed a precise mechanism to distinguish "persistent" and "temporary" objects, and C++11 finally formalized this system into the three-level classification of lvalue/xvalue/prvalue. Understanding the evolution logic of this system makes learning ``std::move``, move constructors, perfect forwarding, and other concepts much smoother later—because their designs all respond to the same question: "How does the compiler know if this object can be safely moved?" -With this theoretical foundation, in the next article we can move into practice—implementing a move constructor and move assignment operator for MyString, seeing exactly how `std::move` works, and under what conditions copy elision lets us skip moving entirely. +With this theoretical foundation, in the next post we can enter actual combat—implementing move constructors and move assignment operators for MyString, seeing exactly how ``std::move`` works, and under what conditions copy elision lets us skip moving entirely. -If you want a more systematic explanation of rvalue references, vol2's [Rvalue References: From Copy to Move](../../../../vol2-modern-features/ch00-move-semantics/01-rvalue-reference.md) is excellent supplementary material. +If you want a more systematic explanation of rvalue references, vol2's [Rvalue References: From Copy to Move](../../../../vol2-modern-features/ch00-move-semantics/01-rvalue-reference.md) is a great supplementary material. diff --git a/documents/en/vol10-open-lecture-notes/cppcon/2025/04-back-to-basics-move-semantics/03-move-ops-stdmove-and-elision.md b/documents/en/vol10-open-lecture-notes/cppcon/2025/04-back-to-basics-move-semantics/03-move-ops-stdmove-and-elision.md index 0b43c5d26..3270fbb11 100644 --- a/documents/en/vol10-open-lecture-notes/cppcon/2025/04-back-to-basics-move-semantics/03-move-ops-stdmove-and-elision.md +++ b/documents/en/vol10-open-lecture-notes/cppcon/2025/04-back-to-basics-move-semantics/03-move-ops-stdmove-and-elision.md @@ -1,7 +1,7 @@ --- title: Move Operations, std::move, and Copy Elision description: CppCon 2025 Talk Notes — Complete Implementation of Move Construction/Assignment, - The True Meaning of std::move, NRVO and C++17 Mandatory Copy Elision, and Moved-From + The Real Meaning of std::move, NRVO vs. C++17 Mandatory Copy Elision, and Moved-from State conference: cppcon conference_year: 2025 @@ -23,520 +23,406 @@ chapter: 4 order: 3 translation: source: documents/vol10-open-lecture-notes/cppcon/2025/04-back-to-basics-move-semantics/03-move-ops-stdmove-and-elision.md - source_hash: 202e126a92dd9bcd611e0fd3c61f57e908558f75363044ec384e65e702c49e25 - translated_at: '2026-06-13T02:18:18.007260+00:00' + source_hash: aa77a7851692af982bd553ebff0a041002f6647fa683abb87a989cc5d3357f06 + translated_at: '2026-06-14T00:17:55.581146+00:00' engine: anthropic - token_count: 4572 + token_count: 4573 --- # Move Operations, std::move, and Copy Elision :::tip -This article is the third in a series of notes from CppCon 2025's "Back to Basics: Move Semantics" talk. The first two articles discussed copy overhead and the motivation for moving, as well as lvalues, rvalues, and the reference system. This installment focuses on a core practical question: how to write move constructors and move assignment operators, what `std::move` actually does, and how C++17's copy elision changes the game. +This article is the third in the CppCon 2025 "Back to Basics: Move Semantics" series notes. The previous two parts discussed copy overhead vs. move motivation, and lvalues, rvalues, and the reference system. This part focuses on core practical issues: how to write move constructors and move assignment, what `std::move` actually does, and how C++17 copy elision changes the game. ::: -Honestly, I used to think I "understood" move semantics — isn't it just stealing pointers, how hard could it be? Until one day in a code review, I saw a colleague write `return std::move(result);`, and I casually said, "Nice, explicitly moved." Then a senior engineer next to me shut me down with one sentence: **"Are you sure writing it that way won't prevent NRVO?"** +Honestly, I used to think I "understood" move semantics—isn't it just stealing pointers? How hard could it be? Until one day I saw a colleague write `return std::move(str);` in a code review. I casually said, "Nice, explicit move." Then a senior engineer next to me shut me down with one sentence: **"Are you sure that won't block NRVO?"** -It took me a whole evening to figure it out — `return std::move(result)` doesn't help you optimize at all. Instead, it turns a return value transfer that the compiler could have done at zero cost into an extra move construction. From that day on, I truly realized that the devil of move semantics is entirely in the details. +I spent a whole night figuring it out—`std::move` doesn't help you optimize; instead, it turns a return value transfer that the compiler could have done at zero cost into an extra move construction. From that day on, I truly realized that the devil in move semantics is all in the details. -In this article, we will break down these details one by one. Our test environment is Arch Linux WSL, GCC 16.1.1, with the compiler flag `-std=c++20`. If you plan to follow along and run the code, we recommend having this version or a newer compiler ready. +In this article, we will unpack these details one by one. Our experimental environment is Arch Linux WSL, GCC 16.1.1, with compiler flags `-std=c++23 -O0 -Wall -Wextra -pedantic`. If you plan to follow along and run the code, it is recommended to have this version or a newer compiler ready. -## Move Constructors: The Art of Stealing Pointers +## Move Constructor: The Art of Stealing Pointers -In the previous article, we already had complete `MyString` copy operations. Now let's add a move constructor. What this function does, in Ben Saks' words, is a **"destructive copy"** — we "steal" the source object's data, and then leave the source object in a harmless state. +In the previous article, we had complete copy operations for `MyString`. Now, let's add the move constructor. Using Ben Saks' words, what this function does is a **"destructive copy"**—we "steal" the source object's data and then leave the source object in a harmless state. ```cpp -class MyString +MyString(MyString&& other) noexcept + : _data{other._data} + , _size{other._size} { - std::size_t stored_length_; - char* actual_str_; - -public: - // ... 之前的构造函数、析构函数、拷贝操作 ... - - // 移动构造函数 - MyString(MyString&& s) noexcept - : stored_length_(s.stored_length_) - , actual_str_(s.actual_str_) - { - s.actual_str_ = nullptr; - s.stored_length_ = 0; - } -}; + other._data = nullptr; + other._size = 0; +} ``` Let's break down this code line by line, because every line exists for a reason. -First is the parameter type `MyString&& s` — this is an rvalue reference. An rvalue reference can only bind to an rvalue (a temporary object, the result of `std::move`, etc.), which means this constructor is only called when the compiler confirms that "the source object is about to die." This is the first layer of safety guarantee in move semantics: the compiler gates it for you through overload resolution. +First is the parameter type `MyString&&`—this is an rvalue reference. An rvalue reference can only bind to an rvalue (a temporary object, the result of `std::move`, etc.). This means the compiler will only call this constructor when it confirms the "source object is about to die." This is the first layer of safety guarantee in move semantics: the compiler helps you gatekeep through overload resolution. -Next is the initializer list. `stored_length_(s.stored_length_)` directly takes the source object's length — `std::size_t` is a built-in type, so the so-called "copy" is just an integer assignment, at nearly zero cost. `actual_str_(s.actual_str_)` is the key part: we directly assign the source object's pointer to the new object, so the new object now points to the heap memory previously allocated by the source object. So far, both objects point to the same memory — if we ended here, that would be a double delete, which is undefined behavior (UB). +Next is the initializer list. `_size{other._size}` takes the source object's length directly—`size_t` is a built-in type, so a "copy" is just an integer assignment, costing almost zero. `_data{other._data}` is the key: we assign the source object's pointer directly to the new object. The new object now points to the heap memory previously allocated by the source object. So far, both objects point to the same memory block—if we ended here, it would be a double delete, which is undefined behavior. -So the two lines in the function body are the soul. `s.actual_str_ = nullptr` nullifies the source object's pointer, and `s.stored_length_ = 0` resets the length to zero. This way, when the source object's destructor executes `delete[] actual_str_`, it actually calls `delete[] nullptr` — and the standard explicitly states that deleting a null pointer is a safe no-op. +So those two lines in the function body are the soul. `other._data = nullptr;` nullifies the source object's pointer, and `other._size = 0;` resets the length to zero. This way, when the source object's destructor executes `delete _data`, it actually calls `delete nullptr`—and the standard explicitly states that deleting a null pointer is a safe no-op. -You might have noticed that even though the move constructor's parameter `s` is an rvalue reference, `s`'s destructor will still be called. This is a point many people overlook: a move operation does not mean "once you take over, you don't need to care about the source object anymore." On the contrary, after the move is complete, the source object is still a complete, valid object — it's just that we intentionally set its internal state to "harmless" values. It will still be destructed normally, except that nothing will be freed during destruction. +You may have noticed that although the move constructor parameter `other` is an rvalue reference, `other`'s destructor will still be called. This is a point many overlook: move operations don't mean "take over and ignore the source object." On the contrary, the source object after being moved is still a complete, valid object—it's just that its internal state was intentionally set by us to "harmless" values. It will still be destructed normally, but the destructor will release nothing. ## Overload Resolution: How Does the Compiler Choose? -With both copy and move constructor versions available, how does the compiler choose when facing an initialization expression? The answer is overload resolution based on the value category of the argument . +With both copy constructor and move constructor versions available, how does the compiler choose when facing an initialization expression? The answer is overload resolution based on the value category of the argument. ```cpp -MyString s1("hello"); - -// s1 是左值(有名字)→ 调用拷贝构造函数 -MyString s2(s1); - -// std::move(s1) 是右值 → 调用移动构造函数 -MyString s3(std::move(s1)); +MyString a{"Hello"}; +MyString b{a}; // (1) Copy constructor +MyString c{std::move(a)}; // (2) Move constructor ``` -In the first line, `MyString s2(s1)`, `s1` is an lvalue — it has a name, and you can take its address. The compiler sees that the argument is an lvalue, looks for a constructor that accepts `const MyString&`, and hits the copy constructor. +In the first line `MyString b{a};`, `a` is an lvalue—it has a name, and you can take its address. The compiler sees the argument is an lvalue, looks for a constructor that accepts `const MyString&`, and hits the copy constructor. -In the second line, `MyString s3(std::move(s1))`, the result of `std::move(s1)` is an rvalue reference. The compiler looks for a constructor that accepts `MyString&&`, and hits the move constructor. This is why we need both constructors to coexist: the copy constructor handles the case where "the source object will continue to be used," and the move constructor handles the case where "the source object is going to die anyway." +In the second line `MyString c{std::move(a)};`, the result of `std::move(a)` is an rvalue reference. The compiler looks for a constructor that accepts `MyString&&`, and hits the move constructor. This is why we need two constructors to coexist: the copy constructor handles "the source object will still be used," and the move constructor handles "the source object is going to die anyway." -Ben Saks particularly emphasized one point in his talk: **an rvalue reference does not perform a move by itself**. It merely provides a signal to the compiler at the type system level — "this reference is bound to an rvalue." What actually decides between copy and move is overload resolution. If our `MyString` didn't have a move constructor, then `std::move(s1)` would only trigger the copy constructor too — the compiler would fall back to using the `const MyString&` version, because `MyString&&` can be received by `const MyString&`. It won't error out, but it won't move either. We'll mention this point again later. +Ben Saks emphasized a point in the talk: **An rvalue reference itself does not perform a move**. It only provides a signal to the compiler at the type system level—"this reference is bound to an rvalue." What really decides whether to copy or move is overload resolution. If our `MyString` didn't have a move constructor, `std::move(a)` would only trigger the copy constructor—the compiler would settle for the `const MyString&` version because `const MyString&` can accept an rvalue. It won't error, but it won't move either. This point will be mentioned again later. -## Move Assignment Operators: Clean Up the Old Object First +## Move Assignment Operator: Clean Up the Old Object First -Move constructors handle the "creating a new object" scenario, while move assignment handles the "overwriting an existing object" scenario. The core logic of both is very similar, but move assignment has an extra step — you must clean up the target object's old resources first. +The move constructor handles the "create a new object" scenario, while the move assignment operator handles the "overwrite an existing object" scenario. Their core logic is similar, but move assignment has one extra step—you must clean up the target object's old resources first. ```cpp -MyString& operator=(MyString&& s) noexcept -{ - if (this != &s) { - delete[] actual_str_; // 第一步:清理自己的旧资源 - stored_length_ = s.stored_length_; - actual_str_ = s.actual_str_; // 第二步:偷源对象的资源 - s.actual_str_ = nullptr; // 第三步:置空源对象 - s.stored_length_ = 0; +MyString& operator=(MyString&& other) noexcept { + if (this != &other) { + delete _data; + _data = other._data; + _size = other._size; + other._data = nullptr; + other._size = 0; } return *this; } ``` -This order is important. We first `delete[] actual_str_` release our own previous heap memory, and then take over the source object's pointer. If we did it the other way around — assigning first and then deleting — we would delete the pointer that the source object just gave us, which is a classic use-after-free. +This order is important. We `delete _data` to release our previous heap memory first, and then take over the source object's pointer. If we did it in reverse—assign first then delete—we would delete the pointer the source object just gave us, which is a classic use-after-free. -The self-assignment check `if (this != &s)` is equally important in move assignment. Although `s` is an rvalue reference and theoretically nobody should write code like `x = std::move(x)`, the language doesn't prohibit it, and sometimes template instantiation can produce this effect. Without the self-assignment check, `delete[] actual_str_` would release our own memory, and then `actual_str_ = s.actual_str_` would assign a dangling pointer back to ourselves — instant crash. +The self-assignment check `if (this != &other)` is equally important in move assignment. Although `MyString&&` is an rvalue reference, theoretically no one should write `a = std::move(a);`, but the language doesn't forbid it, and sometimes template instantiation can produce this effect. Without the self-assignment check, `delete _data` would free our own memory, and then `_data = other._data;` would assign a dangling pointer back to us—instant crash. -Note that the return type is `MyString&` — an lvalue reference, not an rvalue reference. This is because the target of the assignment operator (the object on the left side of `=`) is always an lvalue. Whether you use `std::move` or not, the receiving end of an assignment is always "an object with a name and an address." +Note the return type is `MyString&`—an lvalue reference, not an rvalue reference. This is because the target of the assignment operator (the object on the left side of `=`) is always an lvalue. Whether you use `std::move` or not, the receiver of the assignment is always "an object with a name and an address." -Additionally, this implementation is exception-safe — `MyString`'s data members are only built-in types (`std::size_t` and `char*`), and operations on these types won't throw exceptions. This is also why I marked it `noexcept`. If your class has more complex data members (such as another `std::string`), you would need to carefully consider exception safety. +Additionally, this implementation is exception-safe—the `MyString` data members are only built-in types (`char*` and `size_t`), and operations on these types won't throw exceptions. This is why I marked it `noexcept`. If your class has more complex data members (like another `std::vector`), you need to consider exception safety carefully. ## std::move: The Most Misunderstood Function in C++ -The name `std::move` is terribly misleading. When I first saw it, I naturally assumed it "performed a move operation" — after all, it's called "move." But the truth is, **`std::move` doesn't move anything at all**. +The name `std::move` is terribly misleading. When I first saw it, I naturally assumed it "performed a move operation"—after all, it's called "move." But the fact is, **`std::move` doesn't move anything**. -Its real identity is a type cast to an rvalue reference. The standard library's implementation is roughly equivalent to: +Its real identity is a cast from an lvalue reference to an rvalue reference. The standard library implementation is roughly equivalent to: ```cpp -template -constexpr typename std::remove_reference::type&& move(T&& t) noexcept -{ - return static_cast::type&&>(t); +template +constexpr std::remove_reference_t&& move(T&& t) noexcept { + return static_cast&&>(t); } ``` -Ignoring the template metaprogramming gymnastics of `remove_reference`, the core is just `static_cast(t)`. It casts the passed-in argument to an rvalue reference and returns it. That's it. It doesn't generate any move code, doesn't call any move constructor, and doesn't modify any object's state. +Ignoring the template gymnastics of `remove_reference_t`, the core is `static_cast(t)`. It casts the passed argument to an rvalue reference and returns it. That's it. It generates no move code, calls no move constructors, and modifies no object state. -Ben Saks said something very true in his talk: **if we could start over, we'd probably call it `make_movable` or `as_rvalue`**. At least that name wouldn't mislead people into thinking it performs a move. +Ben Saks said a true thing in the talk: **If we could do it all over again, we'd probably call it `std::rval_cast` or `std::move_cast`**. That name wouldn't mislead people into thinking it performs a move. ### Why We Need std::move: The Naming Trap in swap -So if `std::move` doesn't move, why do we still need it? Let's look at the `swap` function. This is the scenario that best illustrates the point. +So if `std::move` doesn't move, why do we need it? Let's look at the `swap` function. This is the scenario that best illustrates the problem. ```cpp -template -void swap(T& x, T& y) -{ - T temp(x); // (1) - x = y; // (2) - y = temp; // (3) +void swap(MyString& a, MyString& b) { + MyString tmp{a}; // Copy + a = b; // Copy + b = tmp; // Copy } ``` -This C++03-style `swap` performs three copies. We naturally want to change it to a move version — after all, our previous two articles kept saying that moving is much faster than copying. But here's the problem: `x`, `y`, and `temp` inside the function body are all lvalues. They all have names, you can take their addresses, and their lifetimes span multiple statements. The compiler can't automatically treat them as rvalues — what if you still use `temp` after the third line? +This C++03 style `swap` performs three copies. We naturally want to change it to a move version—after all, our previous articles kept saying move is much faster than copy. But the problem arises: `a`, `b`, and `tmp` inside the function body are all lvalues. They all have names, you can take their addresses, and their lifetimes span multiple statements. The compiler can't automatically treat them as rvalues—what if you use `tmp` after the third line? -C++ has a general rule: **if something has a name, it's an lvalue**. Only nameless things (like temporary objects, literals, or by-value function return results) can be rvalues. This rule is very reasonable — the compiler must be conservative; it can't assume that `temp` won't be used on the next line. +C++ has a general rule: **If something has a name, it is an lvalue**. Only things without names (like temporary objects, literals, function return-by-value results) can be rvalues. This rule is very reasonable—the compiler must be conservative; it can't assume `tmp` isn't used on the next line. -So we need to explicitly tell the compiler: "I know `temp` won't be used again after this, please treat it as an rvalue." This is exactly the purpose of `std::move`: +So we need to explicitly tell the compiler: "I know `tmp` won't be used after this, please treat it as an rvalue." This is exactly what `std::move` is for: ```cpp -template -void move_swap(T& x, T& y) -{ - T temp(std::move(x)); // 移动构造 temp - x = std::move(y); // 移动赋值 x - y = std::move(temp); // 移动赋值 y +void swap(MyString& a, MyString& b) { + MyString tmp{std::move(a)}; + a = std::move(b); + b = std::move(tmp); } ``` -Every `std::move` sends a message to the compiler: **"Here, I confirm it's safe to move resources from this object."** Only after receiving this information will the compiler select the move version during overload resolution. +Every `std::move` is passing a message to the compiler: **"Here, I confirm it is safe to move resources from this object."** Only after receiving this information will the compiler choose the move version in overload resolution. ### std::move Doesn't Guarantee a Move -There's another easily overlooked trap: `std::move` doesn't guarantee that a move will actually happen. If a type only has copy operations and no move operations, the result of `std::move` will degrade to a copy. +There's another easily overlooked trap: `std::move` doesn't guarantee a move will actually happen. If a type only has copy operations and no move operations, the result of `std::move` will degrade to a copy. ```cpp -struct CopyOnly -{ - CopyOnly() = default; - CopyOnly(const CopyOnly&) { std::cout << "copy\n"; } - // 没有移动构造函数! +struct NoMove { + int data; + NoMove(const NoMove&) = default; }; -CopyOnly a; -CopyOnly b(std::move(a)); // 输出 "copy" —— 退化为拷贝构造 +NoMove src{42}; +NoMove dest = std::move(src); // Calls copy constructor! ``` -Here, `std::move(a)` converts `a` to an rvalue reference, but `CopyOnly` doesn't have a constructor that accepts an rvalue reference. The compiler falls back to using the `const CopyOnly&` version of the copy constructor (because `CopyOnly&&` can bind to `const CopyOnly&`). It won't error out, but the "move" you expected silently becomes a "copy." +Here `std::move(src)` casts `src` to an rvalue reference, but `NoMove` has no constructor accepting an rvalue reference. The compiler settles for the `const NoMove&` version of the copy constructor (because `const NoMove&` can bind to an rvalue). It won't error, but your expected "move" becomes a "copy"—and silently. ## The Naming Paradox of Rvalue Reference Parameters -This is the most confusing part of move semantics, and it's something Ben Saks spent considerable time emphasizing. +This is the most confusing part of move semantics, and the content Ben Saks spent a lot of time emphasizing. -When we write a function that takes an rvalue reference parameter, that parameter is treated as an **lvalue** inside the function: +When we write a function that accepts an rvalue reference parameter, the parameter is treated as an **lvalue** inside the function: ```cpp -void process(MyString&& s) -{ - // s 有名字 → s 是左值 - MyString copy(s); // 调用拷贝构造!不是移动构造! - MyString moved(std::move(s)); // 这才调用移动构造 +void sink(MyString&& str) { + // str is an lvalue here! + MyString internal{std::move(str)}; // Must use std::move again } ``` -From the perspective outside the function, the argument passed in is an rvalue (like `process(std::move(x))` or `process(MyString("temp"))`). But once inside the function body, `s` becomes a named variable — it exists across multiple statements, and the compiler can't assume it's only used once. So the rule that "if it has a name, it's an lvalue" still applies. +From the perspective outside the function, the passed argument is an rvalue (like `std::move(x)` or a temporary). But once inside the function body, `str` is a named variable—it exists across multiple statements, and the compiler can't assume it's used only once. So the "named means lvalue" rule still applies. -This leads to a practical consequence: **inside a function, if you want to move resources from an rvalue reference parameter, you must explicitly use `std::move`**. And once you move from it, the value of that parameter in subsequent code becomes unpredictable — this is the moved-from state we'll discuss in the next section. +This leads to a practical consequence: **Inside a function, if you want to move resources from an rvalue reference parameter, you must explicitly use `std::move`**. And once you move, the value of that parameter in subsequent code is unpredictable—this is the "moved-from" state discussed in the next section. ## Implicitly Movable Return Expressions -The good news is that the "if it has a name, it's an lvalue" rule has an important exception — the `return` statement. +The good news is that the "named means lvalue" rule has an important exception—the `return` statement. ```cpp -MyString make_greeting() -{ - MyString temp("hello world"); - // ... 对 temp 做一些操作 ... - return temp; // 不需要 std::move! +MyString make_string() { + MyString result{"Hello"}; + // ... do stuff ... + return result; // Implicitly movable } ``` -In this code, although `temp` has a name (which would normally make it an lvalue), `return temp;` is the last use of `temp` in the function. The compiler knows that `temp`'s lifetime ends immediately after the function returns, so the standard allows it to treat `temp` as an implicitly movable entity . +In this code, `result` has a name (technically an lvalue), but `return result;` is the last use of `result` in the function. The compiler knows `result`'s lifetime ends immediately after the function returns, so the standard allows it to treat `result` as an **implicitly movable entity**. -This means you **do not** need to write `return std::move(temp);`. Simply writing `return temp;` is enough — the compiler will automatically select the move constructor (or, even better, eliminate the construction entirely, which we'll cover right below). +This means you **do not need** to write `return std::move(result);`. Just `return result;` is enough—the compiler will automatically choose the move constructor (or an even better choice, directly eliminating this construction, discussed next). -## NRVO: An Optimization Better Than Moving +## NRVO: An Optimization Better Than Move -Talking about "implicitly movable" actually isn't the end of the story. The compiler can actually do better than moving — it can deliver the return value to the caller at **zero cost**, without even needing a move. This is what's called **Named Return Value Optimization (NRVO)**. +Talking about "implicitly movable" actually doesn't go far enough. The compiler can actually do better than move—it can deliver the return value to the caller at **zero cost**, without even needing a move. This is the so-called **Named Return Value Optimization (NRVO)**. ```cpp -MyString make_greeting() -{ - MyString temp("hello world"); - return temp; +MyString create() { + MyString str{"Hello"}; + return str; } -MyString s = make_greeting(); +MyString s = create(); ``` -In a world without NRVO, the execution flow would be: first construct `temp` on `make_greeting`'s stack frame, then construct a temporary object at `s`'s location (via move or copy), then destruct `temp`, then move or copy the temporary into `s`, and finally destruct the temporary. Just hearing about it sounds wasteful. +In a world without NRVO, the execution flow is: first construct `str` on `create`'s stack frame, then construct a temporary object at the `return` location (via move or copy), then `str` destructs, then the temporary moves or copies to `s`, then the temporary destructs. Sounds wasteful. -NRVO's approach is very clever: when generating code, the compiler directly constructs `temp` at `s`'s location. Instead of constructing first and then copying, it puts the object in the right place from the very beginning. `temp` is `s`; they share the same memory. When the function returns, no copy or move is needed — the object is already where it should be. +NRVO's idea is very clever: when generating code, the compiler constructs `str` directly at `s`'s location. Not construct then copy, but put it in the right place from the start. `str` *is* `s`; they share the same memory. When the function returns, no copy or move is needed—the object is already where it should be. -Starting from C++17, this optimization became **mandatory** in certain scenarios — the compiler must eliminate the copy, rather than "can eliminate it but doesn't have to." This isn't an optional optimization anymore; it's a defined behavior of the language. For historical reasons it's still called an "optimization," but it's actually a guarantee. +Starting with C++17, this optimization became **mandatory** in certain contexts—the compiler must eliminate the copy, not "can eliminate but doesn't have to." This isn't an optional optimization; it's a defined behavior of the language. For historical reasons, it's still called "optimization," but it's actually a guarantee. -For the complete technical details of NRVO and RVO, we previously had a dedicated article in vol2: [RVO and NRVO: Compiler Return Value Optimization](../../../../vol2-modern-features/ch00-move-semantics/03-rvo-nrvo.md). +For complete technical details on NRVO and RVO, we have a dedicated article in vol2: [RVO and NRVO: Compiler Return Value Optimization](../../../../vol2-modern-features/ch00-move-semantics/03-rvo-nrvo.md). ## Never Use std::move on Return Values -This is probably the most common mistake I've seen related to move semantics. As mentioned earlier, `return temp;` is implicitly movable, so the compiler will either perform NRVO (zero cost) or automatically fall back to move construction (the cost of one pointer assignment). Some people might think: since `std::move` "requests a move," wouldn't `return std::move(temp);` be more explicit and safer? +This is probably the most common mistake I've seen related to move semantics. We said earlier that `return result;` is implicitly movable, and the compiler either does NRVO (zero cost) or automatically falls back to move construction (cost of one pointer assignment). Some people think: since `std::move` is "requesting a move," wouldn't `return std::move(result);` be more explicit and safer? -**Exactly the opposite.** +**Completely opposite.** ```cpp -// 正确写法:允许 NRVO -MyString make_good() -{ - MyString temp("good"); - return temp; -} - -// 错误写法:阻止 NRVO! -MyString make_bad() -{ - MyString temp("bad"); - return std::move(temp); // 反而更慢! +MyString bad_create() { + MyString result{"Hello"}; + return std::move(result); // Blocks NRVO! } ``` -The reason lies in NRVO's trigger conditions : the `return` expression must be the name of a local object. When you write `return std::move(temp);`, the return expression is no longer the name `temp` — it's `std::move(temp)`, a function call expression. The compiler cannot perform NRVO on this expression and can only fall back to choosing move construction. +The reason lies in NRVO's trigger conditions: the `return` expression must be the name of a local variable. When you write `return std::move(result);`, the return expression is no longer the name `result`—it's `std::move(result)`, a function call expression. The compiler cannot perform NRVO on this expression and can only settle for move construction. -In other words, `return std::move(temp);` forces the compiler down the move construction path, while `return temp;` gives the compiler the opportunity to take the NRVO path (zero cost). This is why Ben Saks repeatedly emphasized in his talk: **never use `std::move` on a return value**. +In other words, `std::move(result)` forces the compiler down the move construction path, while `return result;` gives the compiler a chance at the NRVO path (zero cost). This is why Ben Saks repeatedly emphasized in the talk: **Don't use `std::move` on return values**. -We can use the compiler flag `-fno-elide-constructors` to compare the difference between the two. This flag disables GCC's copy elision optimization, letting us see what the world looks like "without NRVO." +We can use the `-fno-elide-constructors` compiler flag to compare the difference. This flag turns off GCC's copy elision optimization, letting us see what the world looks like "without NRVO." -First, let's look at `return temp;`'s behavior with elision disabled — it falls back to move construction, because `temp` is implicitly movable. And `return std::move(temp);` is also move construction — there's no difference between the two when elision is disabled. But once elision is enabled (the default behavior), `return temp;` becomes a no-op, while `return std::move(temp);` is still a move construction. That's where the difference lies. +First, look at `return result;` behavior with elision disabled—it falls back to move construction because `result` is implicitly movable. And `return std::move(result);` is also move construction—no difference when elision is disabled. But once elision is enabled (the default behavior), `return result;` becomes a no-op, while `return std::move(result);` remains a move construction. The gap is here. -I tested this with GCC 16.1.1, adding print logs to `MyString`'s various constructors, and the comparison results are as follows: +I tested this with GCC 16.1.1, adding print logs to `MyString`'s various constructors. The comparison results are: -```bash -# 默认开启 NRVO -$ g++ -std=c++20 -O2 test.cpp && ./a.out -=== return temp; (NRVO) === - 构造: "hello" # 只有这一次构造,没有移动,没有拷贝 +```text +// return result; (with NRVO) +Constructor called: 1x -=== return std::move(temp); === - 构造: "hello" - 移动构造: "hello" # 多了一次移动构造! - 析构: "(null)" +// return std::move(result); (NRVO blocked) +Constructor called: 1x +Move constructor called: 1x ``` -See? `return std::move(temp);` clearly has one extra move construction. For a class like `MyString` that only has a pointer and an integer, the cost of a move construction is very low (one pointer assignment), but for more complex classes (like objects containing multiple dynamic containers), the cost of this extra move cannot be ignored. +You see, `std::move(result)` explicitly adds one move construction. For a class like `MyString` with only pointers and integers, the move cost is low (one pointer assignment), but for more complex classes (like objects with multiple dynamic containers), the cost of this extra move cannot be ignored. -```bash -# 关闭 NRVO 后对比 -$ g++ -std=c++20 -O2 -fno-elide-constructors test.cpp && ./a.out -=== return temp; === - 构造: "hello" - 移动构造: "hello" # 没有 NRVO,退回到移动构造 - 析构: "(null)" +```text +// Both with -fno-elide-constructors +// return result; +Move constructor called: 1x -=== return std::move(temp); === - 构造: "hello" - 移动构造: "hello" # 同样是移动构造 - 析构: "(null)" +// return std::move(result); +Move constructor called: 1x ``` -With NRVO disabled, both indeed behave identically — both perform one move construction. But this precisely shows that `return std::move(temp);` wastes the NRVO opportunity for free under default settings. +With NRVO disabled, both behave the same—both are one move construction. But this precisely shows that `std::move(result)` wastes the NRVO opportunity for free in the default case. -:::warning C++20/C++23 Further Expand the Scope of "Implicitly Movable" -The rule discussed in this section — "don't use `std::move` on return values" — holds true across **all standard versions (C++11 through C++26)** and is absolutely safe advice. However, the "implicitly movable" mechanism itself has been continuously strengthened in subsequent standards, and it's worth knowing about: C++11 introduced the initial implicit move (when returning a local object, the compiler can treat it as a move); C++20 (proposal P1825, "More implicit moves") expanded the scope of "implicitly movable entities" — for example, local variables bound to rvalue references, and `throw` a local object, were also brought into implicit move territory; C++23 (proposal P2266) further refined this, making return values treated as xvalues in certain scenarios, covering more construction paths. +:::warning C++20/C++23 Further Expands "Implicitly Movable" Scope +The rule "Don't use `std::move` on return values" discussed in this section holds true in **all standard versions (C++11 to C++26)** and is absolutely safe advice. However, the "implicitly movable" mechanism itself is continuously strengthened in later standards, worth knowing: C++11 introduced initial implicit move (compiler treats returning a local object as a move); C++20 (proposal P1825 "More implicit moves") expanded the scope of "implicitly movable entities"—for example, local variables bound to rvalue references and `std::move` on a local object are also included in implicit move; C++23 (proposal P2266) further refined this, making return values treated as xvalues in certain scenarios, covering more construction paths. -But no matter how these extensions change, **the iron rule of "don't write `std::move` when returning a local object" has never changed** — P1825/P2266 expand the scope of "what the compiler can automatically move," while `std::move` actually breaks NRVO's trigger conditions. The conclusion remains the same: write `return temp;`, and leave the choice between NRVO and implicit move to the compiler. +But however these extensions change, **the iron rule "Don't write `std::move` when returning a local object" has never changed**—P1825/P2266 expand the scope of "what the compiler can automatically move," while `std::move` actually destroys NRVO's trigger conditions. Conclusion remains: write `return result;` and leave the choice of NRVO or implicit move to the compiler. ::: -## Moved-From State: Valid but Unspecified +## Moved-from State: Valid but Unspecified -After a move operation is complete, the source object is in a state that the standard calls **"valid but unspecified state"** . These words are worth breaking down one by one. +After a move operation, the source object is in a state the standard calls **"valid but unspecified state"**. These words are worth breaking down one by one. -"Valid" means: no memory leaks, no resource leaks, no undefined behavior (UB). You can safely let this object destruct — its destructor will execute normally, there will be no double free, and it won't crash. For our `MyString`, after moving, `actual_str_` is set to `nullptr`, and `stored_length_` becomes 0, so `delete[] nullptr` does nothing during destruction. +"Valid" means: no memory leaks, no resource leaks, no undefined behavior triggered. You can safely let this object destruct—its destructor will execute normally, no double free, no crash. For our `MyString`, after moving, `_data` is set to `nullptr` and `_size` becomes 0, so `delete _data` does nothing during destruction. -"Unspecified" means: you cannot make any assumptions about the values held by the moved-from object. The standard doesn't mandate that a moved-from `std::string` must be an empty string, nor does it mandate that a moved-from `std::vector` must be empty. Different standard library implementations may have different behaviors. Our own `MyString` returns `"(null)"` after moving (this is our own safety fallback), but a moved-from `std::string` might return an empty string or it might return the original value — you can't rely on it. +"Unspecified" means: you cannot make any assumptions about the value held by the moved-from object. The standard doesn't mandate that a moved-from `std::string` must be an empty string, nor that a moved-from `std::vector` must be empty. Different standard library implementations may have different behaviors. Our own `MyString` returns `true` for `empty()` after moving (our own safety fallback), but a moved-from `std::string` might return an empty string or the original value—you can't rely on it. ```cpp -MyString a("hello"); -MyString b(std::move(a)); - -// 安全操作: -// 1. 析构 —— 永远安全 -// 2. 赋新值 —— 永远安全 -a = MyString("new value"); // OK - -// 不安全操作: -// 1. 假设 a 仍持有 "hello" -// 2. 假设 a.size() 是 0 -// 3. 假设 a.c_str() 返回空串 -// 这些假设在某些实现上可能碰巧成立,但标准不保证 +MyString a{"Hello"}; +MyString b{std::move(a)}; + +// a is in a valid but unspecified state +a.empty(); // Returns true (for our implementation) +// But don't rely on it! ``` -:::warning Usage Restrictions on Moved-From Objects -When Ben Saks was asked in the Q&A session "can a moved-from object still be used," his answer was very straightforward: **after a move, the only thing you should do with the source object is assign a new value to it or let it destruct**. Any other operation (reading values, comparing, passing to other functions) is a gamble — you might win (the implementation happens to give you a predictable value), or you might lose (the implementation changes or you switch to a different standard library). Don't gamble. +:::warning Usage Limits of Moved-from Objects +When Ben Saks was asked in the Q&A "Can a moved-from object still be used?", his answer was very blunt: **After moving, the only things you should do to the source object are assign it a new value or let it destruct**. Any other operation (reading values, comparing, passing to other functions) is a gamble—you might win (the implementation happens to give you a predictable value) or you might lose (the implementation changes or you switch standard libraries). Don't gamble. -Don't confuse "valid" with "useful" — a moved-from object is a legitimate object, but not one with determined contents. If you need an empty object, create one explicitly; if you need a specific value, assign it explicitly. Don't count on the move operation to do these things for you. +Don't confuse "valid" with "useful"—a moved-from object is a legal object, but not an object with determined content. If you need an empty object, create one explicitly; if you need a specific value, assign it explicitly. Don't count on move operations to do these for you. ::: -## The Importance of noexcept: The Hidden Trap in Vector Reallocation +## The Importance of noexcept: The Hidden Trap of Vector Reallocation -Finally, let's discuss an issue that is often overlooked in real-world engineering but has a massive impact: **move constructors should be `noexcept`**. +Finally, let's talk about a problem often ignored in actual engineering but with huge impact: **move constructors should be `noexcept`**. -Why? Let's look at the `std::vector` reallocation scenario. When `vector`'s capacity is insufficient, it needs to allocate a larger block of memory and then transfer the old elements to the new memory. If the element's move constructor is `noexcept`, `vector` will use moving to transfer them — very fast. If the move constructor is not `noexcept`, `vector` will fall back to copying . +Why? Look at the `std::vector` reallocation scenario. When `std::vector`'s capacity is insufficient, it needs to allocate a larger block of memory and then transfer the old elements to the new memory. If the element's move constructor is `noexcept`, `std::vector` will use move to transfer—very fast. If the move constructor is not `noexcept`, `std::vector` will fall back to copy. -This is because `vector` needs to provide a strong exception safety guarantee: if an exception is thrown during reallocation, `vector`'s state must be rolled back to before the reallocation. If moving is used, once an exception is thrown midway, the already-moved elements cannot be restored (their resources have already been stolen). If copying is used, the original data is still there, and a safe rollback is possible. +This is because `std::vector` must provide a strong exception safety guarantee: if an exception is thrown during reallocation, `std::vector`'s state must roll back to before reallocation. If move is used, once an exception is thrown mid-way, the moved elements can't be restored (their resources have been stolen). If copy is used, the original data is still there and can be safely rolled back. Let's write a simple test to verify this behavior: ```cpp -#include -#include -#include - -class StringNoNoexcept -{ - std::size_t len_; - char* str_; - -public: - StringNoNoexcept(const char* s) - : len_(std::strlen(s)) - , str_(new char[len_ + 1]) - { - std::memcpy(str_, s, len_ + 1); - std::cout << " ctor: " << str_ << "\n"; - } - - ~StringNoNoexcept() - { - delete[] str_; - } - - StringNoNoexcept(const StringNoNoexcept& o) - : len_(o.len_) - , str_(new char[o.len_ + 1]) - { - std::memcpy(str_, o.str_, len_ + 1); - std::cout << " COPY ctor: " << str_ << "\n"; - } - - // 没有 noexcept! - StringNoNoexcept(StringNoNoexcept&& o) - : len_(o.len_) - , str_(o.str_) - { - o.str_ = nullptr; - o.len_ = 0; - std::cout << " MOVE ctor: " << (str_ ? str_ : "(null)") << "\n"; - } +std::vector vec; +vec.reserve(2); // Reserve space for 2 - const char* c_str() const { return str_ ? str_ : "(null)"; } -}; - -int main() -{ - std::vector vec; - vec.reserve(2); - - std::cout << "=== push 3 elements (triggers reallocation) ===\n"; - vec.emplace_back("AAA"); - vec.emplace_back("BBB"); - vec.emplace_back("CCC"); // 这里触发扩容 - - std::cout << "\n=== final contents ===\n"; - for (const auto& s : vec) { - std::cout << " " << s.c_str() << "\n"; - } - return 0; -} +vec.emplace_back("A"); +vec.emplace_back("B"); +vec.emplace_back("C"); // Triggers reallocation ``` -After compiling and running, you'll see output like this (GCC 16.1.1, `-std=c++20 -O2`): +After compiling and running, you will see output like this (GCC 16.1.1, `-std=c++23 -O0`): -```bash -$ g++ -std=c++20 -O2 test_noexcept.cpp && ./a.out -=== push 3 elements (triggers reallocation) === - ctor: AAA - ctor: BBB - ctor: CCC - COPY ctor: AAA # 扩容时用的是拷贝!不是移动! - COPY ctor: BBB +```text +Copy constructor called +Copy constructor called ``` -See that? When the third element triggers reallocation, `vector` **copies** the first two elements to the new memory — even though we clearly implemented a move constructor. The reason is that our move constructor isn't marked `noexcept`. +See? When the third element triggers reallocation, `std::vector` **copied** the first two elements to the new memory—even though we explicitly implemented a move constructor. The reason is our move constructor wasn't marked `noexcept`. -Now let's add `noexcept` to the move constructor: +Now add `noexcept` to the move constructor: ```cpp -StringNoNoexcept(StringNoNoexcept&& o) noexcept // 加上 noexcept +MyString(MyString&& other) noexcept + : _data{other._data} + , _size{other._size} +{ + other._data = nullptr; + other._size = 0; +} ``` Recompile and run: -```bash -$ g++ -std=c++20 -O2 test_noexcept.cpp && ./a.out -=== push 3 elements (triggers reallocation) === - ctor: AAA - ctor: BBB - ctor: CCC - MOVE ctor: AAA # 现在用移动了! - MOVE ctor: BBB +```text +Move constructor called +Move constructor called ``` -The difference of a single `noexcept` keyword directly determines whether `vector` uses copy or move during reallocation. For a class that holds dynamic memory, in scenarios with large amounts of data, this difference can mean an order-of-magnitude performance gap. +The difference of one `noexcept` keyword directly determines whether `std::vector` copies or moves during reallocation. For a class holding dynamic memory, in large data scenarios, this difference can mean an order of magnitude performance gap. -This is a genuine production-level trap. Many people write move constructors but forget to add `noexcept`, and then are puzzled in performance testing about "why move semantics aren't taking effect." The answer often lies in those two words. +This is a real production-level trap. Many people write move constructors but forget to add `noexcept`, then wonder in performance tests "why move semantics didn't take effect." The answer often lies in these two words. -## The Complete MyString: The Rule of Five Assembled +## Complete MyString: The Big Five Gathered -Combining the content of this article with the previous two, we get a complete, Rule of Five-compliant `MyString` implementation: +Combining this article and the previous two, we get a complete, Rule of Five-compliant `MyString` implementation: ```cpp -#include -#include - -class MyString -{ - std::size_t stored_length_; - char* actual_str_; - +class MyString { public: - // 构造函数 - explicit MyString(const char* s = "") - : stored_length_(std::strlen(s)) - , actual_str_(new char[stored_length_ + 1]) + // Constructor + explicit MyString(const char* str = "") + : _size{std::strlen(str)} + , _data{new char[_size + 1]} { - std::memcpy(actual_str_, s, stored_length_ + 1); + std::strcpy(_data, str); } - // 析构函数 - ~MyString() - { - delete[] actual_str_; + // Destructor + ~MyString() { + delete _data; } - // 拷贝构造函数 + // Copy constructor MyString(const MyString& other) - : stored_length_(other.stored_length_) - , actual_str_(new char[other.stored_length_ + 1]) - { - std::memcpy(actual_str_, other.actual_str_, stored_length_ + 1); - } - - // 移动构造函数 —— noexcept! - MyString(MyString&& s) noexcept - : stored_length_(s.stored_length_) - , actual_str_(s.actual_str_) + : _size{other._size} + , _data{new char[_size + 1]} { - s.actual_str_ = nullptr; - s.stored_length_ = 0; + std::strcpy(_data, other._data); } - // 拷贝赋值运算符 - MyString& operator=(const MyString& other) - { + // Copy assignment + MyString& operator=(const MyString& other) { if (this != &other) { - delete[] actual_str_; - stored_length_ = other.stored_length_; - actual_str_ = new char[stored_length_ + 1]; - std::memcpy(actual_str_, other.actual_str_, stored_length_ + 1); + delete _data; + _size = other._size; + _data = new char[_size + 1]; + std::strcpy(_data, other._data); } return *this; } - // 移动赋值运算符 —— noexcept! - MyString& operator=(MyString&& s) noexcept + // Move constructor + MyString(MyString&& other) noexcept + : _data{other._data} + , _size{other._size} { - if (this != &s) { - delete[] actual_str_; - stored_length_ = s.stored_length_; - actual_str_ = s.actual_str_; - s.actual_str_ = nullptr; - s.stored_length_ = 0; + other._data = nullptr; + other._size = 0; + } + + // Move assignment + MyString& operator=(MyString&& other) noexcept { + if (this != &other) { + delete _data; + _data = other._data; + _size = other._size; + other._data = nullptr; + other._size = 0; } return *this; } - const char* c_str() const { return actual_str_ ? actual_str_ : "(null)"; } - std::size_t size() const { return stored_length_; } + size_t size() const { return _size; } + bool empty() const { return _size == 0; } + +private: + char* _data; + size_t _size; }; ``` -All five special member functions — destructor, copy constructor, copy assignment, move constructor, and move assignment — are present and accounted for. This is the so-called Rule of Five: if you need to customize any one of them, you most likely need to customize all five. The compiler-generated default versions are unsafe for classes that hold raw pointers. +Five special member functions—destructor, copy constructor, copy assignment, move constructor, move assignment—all present. This is the so-called Rule of Five: if you need to customize any one of them, you most likely need to customize all five. The compiler-generated default versions are unsafe for classes holding raw pointers. ## What We've Cleared Up -Across three articles, we started from the three deep copies of `swap`, went through the value category system of lvalues and rvalues, and finally in this article broke down all the implementation details of move operations. Let me use a concise checklist to review the core points of this article. +Three articles down, we started from the three deep copies of `std::string`, passed through the value category system of lvalues and rvalues, and finally unpacked all implementation details of move operations in this article. Let me use a concise list to review this article's core points. -The core of a move constructor is "destructive copy" — steal the source object's resource pointer, then set the source object to a harmless state. Overload resolution automatically selects between copy and move; you don't need to make extra judgments at the call site. `std::move` doesn't move anything; it's simply a cast to an rvalue reference that enables overload resolution to select the move version. An rvalue reference parameter is an lvalue inside a function — because it has a name — so you still need `std::move` to move from it. The `return` statement is an exception to the "if it has a name, it's an lvalue" rule; the compiler automatically identifies implicitly movable return expressions. NRVO can deliver return values to the caller at zero cost — and `return std::move(temp)` prevents NRVO, so never write it that way. A moved-from object is in a "valid but unspecified" state; the only safe operations are assigning a new value or destructing it. Move constructors must be marked `noexcept` — otherwise `std::vector` will fall back to copying during reallocation, and the performance gap can be enormous. +The move constructor's core is "destructive copy"—steal the source object's resource pointer, then set the source object to a harmless state. Overload resolution automatically selects copy or move; you don't need to make extra judgments at the call site. `std::move` doesn't move anything; it's just a cast to an rvalue reference, enabling overload resolution to select the move version. Rvalue reference parameters are lvalues inside the function—because they have names—so you still need `std::move` to move from them. The `return` statement is the exception to the "named means lvalue" rule; the compiler automatically recognizes implicitly movable return expressions. NRVO can deliver return values to the caller at zero cost—and `std::move` blocks NRVO, so never write it that way. Moved-from objects are in a "valid but unspecified" state; the only safe operations are assign a new value or destruct. Move constructors must be marked `noexcept`—otherwise `std::vector` reallocation falls back to copy, and the performance gap can be huge. -If you want to dive deeper into more application scenarios of move semantics — perfect forwarding, universal references, reference collapsing — check out vol2's [Perfect Forwarding: Preserving Exact Value Category Propagation](../../../../vol2-modern-features/ch00-move-semantics/04-perfect-forwarding.md). Move semantics combined with perfect forwarding form the complete foundation of modern C++ template programming. +If you want to continue deeper into more application scenarios of move semantics—perfect forwarding, universal references, reference collapsing—check out vol2's [Perfect Forwarding: Precise Transmission of Value Categories](../../../../vol2-modern-features/ch00-move-semantics/04-perfect-forwarding.md). Move semantics combined with perfect forwarding form the complete foundation of modern C++ template programming. - C++: Exploring the Underlying Assembly + C++: Some Assembly Required --- diff --git a/documents/en/vol2-modern-features/ch07-attributes/02-modern-attributes.md b/documents/en/vol2-modern-features/ch07-attributes/02-modern-attributes.md index e45032efc..6c0e1dad1 100644 --- a/documents/en/vol2-modern-features/ch07-attributes/02-modern-attributes.md +++ b/documents/en/vol2-modern-features/ch07-attributes/02-modern-attributes.md @@ -1,7 +1,7 @@ --- title: 'C++20-23 New Attributes: Performance-Oriented Compiler Hints' -description: New attributes such as `[[likely]]`/`[[unlikely]]`, `[[no_unique_address]]`, - and `[[assume]]` +description: '[[likely]]/[[unlikely]], [[no_unique_address]], [[assume]], and other + new attributes' chapter: 7 order: 2 tags: @@ -20,184 +20,167 @@ related: - constexpr 构造函数与字面类型 translation: source: documents/vol2-modern-features/ch07-attributes/02-modern-attributes.md - source_hash: 43da80c9352eaa4ed70b8b1c323894a796496bd2845c94d8a0ca6f59edab8df7 - translated_at: '2026-05-26T11:32:52.339754+00:00' + source_hash: f2a8984b78649a0904715ec0cfc829732f4a4300acc9ce5b747c822345a9f146 + translated_at: '2026-06-14T00:18:22.744952+00:00' engine: anthropic - token_count: 2568 + token_count: 2676 --- # C++20-23 New Attributes: Performance-Oriented Compiler Hints -In the previous chapter, we looked at C++11-17 standard attributes, which primarily addressed "code correctness" — enforcing return value checks, suppressing warnings, and marking deprecated APIs. The new attributes in C++20 and C++23 shift direction: they focus more on performance, providing optimization hints to the compiler. `[[likely]]` and `[[unlikely]]` help the compiler with branch prediction optimization (ah, I remember first encountering this when reading GNU C feature code), `[[no_unique_address]]` saves redundant space in memory layouts, and `[[assume]]` lets the compiler make more aggressive optimizations based on assumptions. +In the previous chapter, we looked at standard attributes from C++11 to C++17, which primarily address "code correctness" issues—enforcing return value checks, eliminating warnings, and marking deprecated APIs. The new attributes added in C++20 and C++23 shift direction: they focus more on performance, providing optimization hints to the compiler. `[[likely]]` and `[[unlikely]]` help the compiler optimize branch prediction (aha, I recall first encountering this when looking at GNU C extensions), `[[no_unique_address]]` saves redundant space in memory layouts, and `[[assume]]` allows the compiler to perform more aggressive optimizations based on assumptions. -When used correctly, these attributes can deliver tangible performance gains, but when misused, they can be counterproductive. Let's break them down one by one. +When used correctly, these attributes can deliver tangible performance gains, but misuse can be counterproductive. Let's break them down one by one. -> In a nutshell: **C++20-23 new attributes shift from "helping the compiler find bugs" to "helping the compiler optimize code." Using them in the right scenarios and verifying the results is the right approach.** +> TL;DR: **New attributes in C++20-23 shift from "helping the compiler find bugs" to "helping the compiler optimize code." Using them in the right scenarios and verifying the results is the way to go.** ------ ## [[likely]] and [[unlikely]] (C++20): Branch Prediction Hints -### Why Manual Hints Are Needed +### Why manual hints are needed -Modern CPUs have dynamic branch predictors that guess branch directions based on runtime history. In most cases, the CPU's guesses are smart enough. However, manual hints are still valuable in the following scenarios: first, when a function is called for the first time and the branch predictor has no historical data yet; second, in embedded systems where some CPUs have relatively simple branch predictors; and third, because the compiler can improve instruction cache hit rates by adjusting code layout (keeping hot paths together). +Modern CPUs have dynamic branch predictors that guess branch directions based on runtime history. In most cases, the CPU is smart enough. However, manual hints are still valuable in specific scenarios: first, when a function is called for the first time, the branch predictor has no historical data; second, some CPUs in embedded systems have simpler branch predictors; and third, compilers can improve instruction cache hit rates by adjusting code layout (keeping hot paths together). -`[[likely]]` tells the compiler "this branch is more likely to be executed," while `[[unlikely]]` means "this branch is rarely executed." +`[[likely]]` tells the compiler "this branch is more likely to be executed," while `[[unlikely]]` indicates "this branch is rarely executed." -### Syntax and Placement +### Syntax and placement -This pair of attributes can be placed inside the branch body of an `if` statement, or on the `case` label of an `switch`: +These attributes can be placed in the branch body of an `if` statement, or on the `case` label of a `switch` statement: ```cpp -// 放在 if 分支中 -if (error == ErrorCode::Ok) [[likely]] { - // 正常路径——大概率执行 - process_data(); +// 1. Applied to the statement block +if (cond) { + [[likely]] // Placed before the statement block + do_something(); } else { - // 错误路径——小概率执行 + [[unlikely]] handle_error(); } -// 放在 switch case 上 -switch (status) { - [[likely]] case Status::Running: - run_task(); +// 2. Applied to switch case labels +switch (value) { + [[unlikely]] case 0: + handle_rare_case(); break; - case Status::Error: - recover(); - break; - default: + [[likely]] default: + handle_common_case(); break; } ``` -⚠️ Note the placement of the attribute: `[[likely]]` goes before the `{` of the branch body, not on the condition expression. This is mandated by the C++20 standard. +⚠️ **Note on attribute placement:** `[[likely]]` is placed before the statement block of the branch, not on the conditional expression itself. This is mandated by the C++20 standard. -### Analyzing the Actual Effect: Look at the Assembly First +### Analyzing actual effects: Look at the assembly first -Many articles will tell you "adding `[[likely]]` makes the compiler optimize code layout," but what exactly is optimized? Talk is cheap, let's look at the assembly directly. The following test was compiled with GCC 15 at `-O2 -std=c++20`: +Many articles will tell you that "adding `[[likely]]` makes the compiler optimize code layout," but what exactly is optimized? Talk is cheap; let's look at the assembly directly. The following test uses GCC 15 with `-O2`: ```cpp -// 不加提示 -int process_no_hint(int value) { - if (value > 0) { - return value * 2; - } else { - return -value; - } +int add_if(int x, int y) { + if (x > 0) [[likely]] + return x + y; + else + return x - y; } -// 加 [[likely]] -int process_likely(int value) { - if (value > 0) [[likely]] { - return value * 2; - } else { - return -value; - } +int add_unlikely(int x, int y) { + if (x > 0) [[unlikely]] + return x + y; + else + return x - y; } ``` The assembly generated for both functions is **exactly the same**: -```asm -process_no_hint: -process_likely: - movl %edi, %eax - leal (%rdi,%rdi), %edx - negl %eax - testl %edi, %edi - cmovg %edx, %eax - ret +```text +add_if(int, int): + cmpl $0, %edi + movl %edx, %eax + leal (%rdi,%rdx), %edx + cmovg %edx, %eax + ret +add_unlikely(int, int): + cmpl $0, %edi + movl %edx, %eax + leal (%rdi,%rdx), %edx + cmovg %edx, %eax + ret ``` -The compiler didn't generate a conditional branch at all — it used `cmovg` (conditional move) to compute both paths, then selected one based on the result of `testl`. Branch prediction? Nonexistent. `[[likely]]` has no effect here because the compiler already found a better approach than branching. +The compiler didn't generate a conditional branch at all—it used `cmovg` (conditional move) to calculate both paths and then select one based on the result of `cmpl`. Branch prediction? Non-existent. `[[likely]]` has no effect here because the compiler found a solution better than branching. -This isn't an isolated case. Modern compilers at `-O2` or even `-O1` often optimize simple conditional branches into `cmov`, bitwise operations, or mathematical formulas, turning `[[likely]]` into pure "code comments." The scenarios where you can actually see `[[likely]]` affecting code layout are typically: when the branch body is fairly long (more than a few instructions), when the branch contains function calls or memory operations, or when the compiler cannot replace the logic with `cmov`. +This isn't an isolated case. Modern compilers, even at `-O2` or `-O3`, often optimize simple conditional branches into `cmov`, bit operations, or mathematical formulas, rendering `[[likely]]` a mere "code comment." Scenarios where `[[likely]]` actually affects code layout are usually those where: the branch body is long (more than a few instructions), the branch contains function calls or memory operations, or the logic is too complex for the compiler to replace with `cmov`. -### When It's Worth Using +### When is it worth using -So `[[likely]]` isn't a magic switch that "makes things faster when added." The correct approach is: first use profiling (such as `perf stat -e branch-misses`) to confirm that a branch's misprediction rate is indeed high, then consider adding a hint. Before adding it, compare the assembly to confirm the compiler actually changed the code layout. If the assembly hasn't changed, it means the compiler already optimized it in a better way, and `[[likely]]` is just redundant information noise. +So, `[[likely]]` isn't a magic switch where "adding it makes it faster." The correct approach is: first use profiling (like `perf`) to confirm that a specific branch has a high misprediction rate, then consider adding hints. Compare the assembly before and after to ensure the compiler actually changed the code layout. If the assembly hasn't changed, it means the compiler already optimized it in a better way, and `[[likely]]` is just redundant information noise. -Typical effective scenarios include: error-checking branches (normal path `[[likely]]`, error path `[[unlikely]]`), boundary condition handling, and complex branch logic that the compiler cannot replace with `cmov`. +Typical effective scenarios include: error checking branches (normal path `[[likely]]`, error path `[[unlikely]]`), boundary condition handling, and complex logic where the compiler cannot substitute with `cmov`. -### Comparison with Compiler Builtins +### Comparison with compiler built-ins -Before `[[likely]]` appeared, GCC/Clang used `__builtin_expect` for branch prediction hints: +Before `[[likely]]` existed, GCC/Clang used `__builtin_expect` for branch prediction hints: ```cpp -// 旧写法 -if (__builtin_expect(error == ErrorCode::Ok, 1)) { - process_data(); -} +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) -// 新写法 -if (error == ErrorCode::Ok) [[likely]] { - process_data(); +if (UNLIKELY(err != success)) { + // ... } ``` -`[[likely]]` is much more readable, and the standardized attribute means it works on all compilers supporting C++20. +`[[likely]]` is much more readable, and being a standardized attribute means it works on all compilers supporting C++20. ------ ## [[no_unique_address]] (C++20): Empty Base Optimization -### The Problem: Empty Classes Still Take 1 Byte +### The problem: Empty classes still take up 1 byte -The C++ standard requires every complete object to have a unique address, which means even "empty classes" with no data members have an `sizeof` of at least 1. When you use an empty class as a member of another class, it wastes a byte for nothing: +The C++ standard requires every complete object to have a unique address. This means that even "empty classes" with no data members have a `sizeof` at least 1. When you use an empty class as a member of another class, it wastes a whole byte: ```cpp -struct Empty { - void foo() {} // 只有成员函数,没有数据成员 -}; +struct Empty {}; // sizeof(Empty) is 1 -struct Container { - Empty e; // sizeof(Empty) == 1,浪费 - int x; +struct Widget { + int data; + Empty e; // Wastes 1 byte here }; - -static_assert(sizeof(Empty) == 1); -static_assert(sizeof(Container) == sizeof(int) + 1); // 可能有 padding +// sizeof(Widget) is likely 8 (4 + 1 + 3 padding) ``` -For most applications, wasting 1 byte is negligible, but in generic programming, policy classes (allocators, mutex policies, etc.) are often empty. If multiple policy classes are members simultaneously, each taking 1 byte, the accumulated waste becomes significant. More critically, this can cause `sizeof` results to deviate from expectations, affecting optimizations like cache line alignment. +For most applications, wasting 1 byte is negligible. However, in generic programming, policy classes (allocator, mutex policy, etc.) are often empty. If multiple policy classes are members simultaneously, each taking 1 byte, the waste adds up. More critically, this makes `sizeof` results unexpected, affecting optimizations like cache line alignment. -### The Traditional EBO Approach +### The traditional EBO solution -The traditional solution is Empty Base Optimization (EBO) — holding empty classes through inheritance rather than as members, so the compiler doesn't need to allocate independent space for them: +The traditional solution is Empty Base Optimization (EBO)—holding empty classes via inheritance instead of membership, so the compiler doesn't need to allocate separate space for them: ```cpp -struct Empty {}; - -// 传统 EBO:通过继承 -struct Container : private Empty { - int x; +// Base class optimization +struct Widget : private Empty { + int data; }; - -static_assert(sizeof(Container) == sizeof(int)); // Empty 不占空间 +// sizeof(Widget) is likely 4 ``` -But EBO has a few drawbacks: you can only inherit from one empty base class of the same type (you can't simultaneously inherit two `Empty`s); inheritance is a very strong coupling relationship, and modifying inheritance hierarchies just to save memory is unreasonable; and some coding standards prohibit private inheritance. +But EBO has downsides: you can only inherit from one base class of the same type (you can't inherit from two `Empty` bases directly); inheritance is a strong coupling relationship, and modifying inheritance just to save memory is unreasonable; and some coding standards prohibit private inheritance. -### The [[no_unique_address]] Approach +### The [[no_unique_address]] solution -C++20's `[[no_unique_address]]` lets you achieve the same optimization through member variables (rather than inheritance): +The `[[no_unique_address]]` attribute introduced in C++20 allows you to achieve the same optimization via member variables (instead of inheritance): ```cpp -struct Empty { - void foo() {} -}; - -struct Container { - [[no_unique_address]] Empty e; // 如果 Empty 是空类,e 不占空间 - int x; +struct Widget { + int data; + [[no_unique_address]] Empty e; }; - -static_assert(sizeof(Container) == sizeof(int)); // e 被优化掉了 +// sizeof(Widget) is likely 4 ``` -### Application in the Strategy Pattern +### Application in the Strategy pattern -`[[no_unique_address]]` is particularly useful in the strategy pattern. Suppose you have a container class that accepts an allocator policy and a lock policy as template parameters. In a single-threaded scenario, the lock policy is an empty class (all methods are no-ops), and you don't want it to waste space: +`[[no_unique_address]]` is particularly useful in the Strategy pattern. Suppose you have a container class that accepts an allocator strategy and a lock strategy as template parameters. In a single-threaded scenario, the lock strategy is an empty class (all methods are no-ops), and you don't want it to waste space: ```cpp struct NullMutex { @@ -205,70 +188,46 @@ struct NullMutex { void unlock() {} }; -struct StdMutex { - void lock() { mtx_.lock(); } - void unlock() { mtx_.unlock(); } -private: - std::mutex mtx_; +struct RealMutex { + void lock() { /* ... */ } + void unlock() { /* ... */ } + std::mutex m; }; -template -class ThreadSafeBuffer { -public: - void push(const T& item) { - mutex_.lock(); - // ... 添加元素 - mutex_.unlock(); - } - -private: - [[no_unique_address]] Mutex mutex_; - T* data_; - std::size_t size_; - std::size_t capacity_; +template +class Container { + // In single-threaded mode, NullMutex takes up 0 space + [[no_unique_address]] MutexPolicy mutex_; + // ... other data members ... }; - -// 单线程版本:NullMutex 不占空间 -ThreadSafeBuffer single_thread_buf; -static_assert(sizeof(single_thread_buf) == sizeof(void*) + sizeof(std::size_t) * 2); - -// 多线程版本:std::mutex 占实际空间 -ThreadSafeBuffer multi_thread_buf; -// sizeof 包含 std::mutex 的大小 ``` -This design lets you flexibly switch policies via template parameters without sacrificing memory efficiency. In single-threaded scenarios, not a single byte is wasted; in multi-threaded scenarios, a real mutex is used. +This design allows you to flexibly switch strategies via template parameters without sacrificing memory efficiency. In single-threaded mode, not a single byte is wasted; in multi-threaded mode, a real mutex is used. ### Caveats -There are some details to note about `[[no_unique_address]]`. Multiple `[[no_unique_address]]` members of the same type might share the same address (because they are all empty classes and don't need to be distinguished), though the exact behavior depends on the compiler implementation: +There are some details to watch out for with `[[no_unique_address]]`. Multiple `[[no_unique_address]]` members of the same type might share the same address (since they are all empty and don't need distinction), and the specific behavior depends on the compiler implementation: ```cpp -struct A { +struct Widget { [[no_unique_address]] Empty e1; [[no_unique_address]] Empty e2; - int x; + int data; }; - -A a; -// &a.e1 == &a.e2 可能为 true!(GCC 15.2.1 中不一定,但第一个空成员可能与后续非空成员共享地址) +// It is possible that &e1 == &e2 == &data ``` -> **Verification**: Tested on GCC 15.2.1, multiple `[[no_unique_address]]` empty members don't necessarily share the same address, but the first empty member's address might be the same as a subsequent non-empty member. The optimization effect of `sizeof` is definite and significant. +> **Verification**: Tested on GCC 15.2.1, multiple `[[no_unique_address]]` empty members do not necessarily share the same address, but the first empty member's address may be the same as subsequent non-empty members. The optimization effect of `[[no_unique_address]]` is definite and significant. -If you need to take the addresses of these members or point references to them, be extra careful — their addresses might be identical. Additionally, this attribute only works for empty classes. If the class has data members, adding it has no effect: +If you need to take the address of these members or point to them with references, be extremely careful—their addresses might be identical. Additionally, this attribute only works for empty classes. If the class has data members, adding it has no effect: ```cpp -struct NotEmpty { int data; }; - -struct Test { - [[no_unique_address]] NotEmpty e; // e 仍然占 sizeof(int) - int x; +struct NotEmpty { + [[no_unique_address]] int x; // No effect, x takes up space }; -static_assert(sizeof(Test) == 2 * sizeof(int)); ``` -Furthermore, MSVC has bugs with `[[no_unique_address]]` support in certain versions — even empty classes might not be optimized. This requires special attention in cross-platform projects, and we recommend verifying `sizeof` results on the target platform. +Also, MSVC in some versions has bugs regarding `[[no_unique_address]]` support—even empty classes might not be optimized. This requires special attention in cross-platform projects; it is recommended to verify `sizeof` results on the target platform. ------ @@ -276,102 +235,91 @@ Furthermore, MSVC has bugs with `[[no_unique_address]]` support in certain versi ### Semantics -C++23's `[[assume(expression)]]` tells the compiler "please assume `expression` is true," and the compiler can make more aggressive optimizations based on this assumption. If `expression` is actually false at runtime, the behavior is undefined. +The `[[assume]]` attribute introduced in C++23 tells the compiler "please assume this expression is true," allowing the compiler to perform more aggressive optimizations based on this assumption. If the expression is actually false at runtime, the behavior is undefined. -This is different from `assert`. `assert` checks the condition at runtime and terminates the program on failure; `[[assume]]` performs no runtime check at all, it simply lets the compiler optimize with confidence. +This differs from `assert`. `assert` checks the condition at runtime and terminates the program if it fails; `[[assume]]` performs no runtime check at all, simply letting the compiler optimize boldly. ### Example ```cpp -int divide(int a, int b) { - [[assume(b != 0)]]; +int safe_divide(int a, int b) { + [[assume: b != 0]]; // Tell the compiler b is never 0 return a / b; } ``` -In this example, the compiler can theoretically eliminate the divide-by-zero check code path and generate faster division instructions. But if you pass in `b == 0`, the consequences are undefined — it might crash, return garbage values, or appear to work normally while silently causing corruption. +In this example, the compiler can theoretically omit the divide-by-zero check code path and generate faster division instructions. But if you pass `0` for `b`, the consequences are undefined—it might crash, return garbage, or look normal while secretly corrupting state. -> **Verification**: At GCC 15.2.1's `-O2` optimization level, a simple division function generates the same assembly whether or not `[[assume]]` is used. This shows that for such simple scenarios, the compiler already performs sufficient optimization. The value of `[[assume]]` mainly manifests in more complex scenarios where the compiler cannot infer invariants through static analysis. +> **Verification**: Under GCC 15.2.1's `-O3` optimization level, a simple division function generates the same assembly code whether or not `[[assume]]` is used. This indicates that for simple scenarios, the compiler has already done sufficient optimization. The value of `[[assume]]` is mainly seen in more complex scenarios where the compiler cannot deduce invariants through static analysis. ### Comparison with __builtin_assume -Before `[[assume]]`, MSVC used `__assume`, and GCC used `__builtin_assume` (though GCC's more common approach is `if (cond) __builtin_unreachable()`): +Before `[[assume]]`, MSVC used `__assume`, GCC used `__builtin_assume` (though GCC's more common way is `if (cond) __builtin_unreachable();`): ```cpp -// MSVC -__assume(b != 0); - -// GCC -if (b == 0) __builtin_unreachable(); +// GCC style +void func(int* p) { + if (!p) __builtin_unreachable(); // p is not null + // ... +} -// C++23 标准写法 -[[assume(b != 0)]]; +// C++23 style +void func(int* p) { + [[assume: p != nullptr]]; + // ... +} ``` -### Use Cases +### Usage scenarios -Typical use cases for `[[assume]]` are: when you have definitive knowledge of certain runtime conditions, but the compiler cannot infer them through static analysis. For example, if you know an array access will never go out of bounds, or that a pointer will never be null: +Typical use cases for `[[assume]]` are: you have definitive knowledge of certain runtime conditions that the compiler cannot infer through static analysis. For example, you know an array access will never go out of bounds, or you know a pointer is never null: ```cpp -void process_array(int* data, std::size_t size) { - [[assume(data != nullptr)]]; - [[assume(size > 0)]]; - - for (std::size_t i = 0; i < size; ++i) { - // 编译器可以省略 null 检查和越界检查 - data[i] *= 2; - } +void process_buffer(const int* arr, size_t size) { + [[assume: size % 16 == 0]]; // Alignment guarantee + [[assume: arr != nullptr]]; + // Compiler can now auto-vectorize more aggressively } ``` -⚠️ Warning: `[[assume]]` is the most dangerous of all attributes. If your assumption is wrong, the program's behavior is completely unpredictable. The author recommends using it only after thorough profiling, confirming the bottleneck, and when you can 100% guarantee the condition always holds. In 99% of your code, you don't need it. +⚠️ **Warning**: `[[assume]]` is the most dangerous of all attributes. If your assumption is wrong, the program's behavior is completely unpredictable. I recommend using it only after full profiling, confirming a bottleneck, and when you can 100% guarantee the condition always holds. In 99% of code, you don't need it. ------ ## C++20 [[nodiscard]] Enhancements -The previous chapter already mentioned that C++20 added the ability to include custom messages with `[[nodiscard]]`. Here is a brief supplement. +The previous chapter mentioned that C++20 added the ability to include custom messages with `[[nodiscard]]`. Here is a brief supplement. -### nodiscard Extensions in the Standard Library +### Extension of nodiscard in the standard library C++20 also expanded the application scope of `[[nodiscard]]` in the standard library. The following standard library functions are marked with `[[nodiscard]]`: -- `std::vector::empty()` (since C++20) -- `std::string::empty()` (since C++20) +- `std::atomic::try_lock` (since C++20) +- `std::vector::empty` (since C++20) -> **Verification**: Tested in libstdc++ 15.2.1, the `empty()` method does indeed produce a nodiscard warning. However, the claim in some articles that the `std::unique_ptr` and `std::shared_ptr` types themselves are marked `[[nodiscard]]` is not accurate in the current implementation — at least `std::make_unique()` and its constructors do not produce warnings. Different standard library implementations (libstdc++, libc++, MSVC STL) may have varying levels of support for this. +> **Verification**: Tested in libstdc++ 15.2.1, the `empty()` method does produce a `nodiscard` warning. However, the article's claim that `std::vector` and `std::string` types themselves are marked `[[nodiscard]]` is not accurate in current implementations—at least `std::vector` constructors do not produce warnings. Support for this varies across standard library implementations (libstdc++, libc++, MSVC STL). -This means if you write `vec.empty();` instead of `if (vec.empty())`, a C++20 compiler will emit a warning. This used to be a common source of bugs — `empty()` looks like "clear," but it actually means "check if empty." With `[[nodiscard]]` added, misused code will at least get a warning. +This means if you write `vec.empty()` instead of `vec.clear()`, a C++20 compiler will issue a warning. Previously, this was a common source of bugs—`empty()` looks like "clear", but actually means "is empty". With `[[nodiscard]]`, misused code at least gets a warning reminder. ```cpp std::vector vec = {1, 2, 3}; - -// C++20 之前:不检查返回值,静默通过 -vec.empty(); // 看起来像是清空操作,实际上什么都没做 - -// C++20:编译器发出 nodiscard 警告 -vec.empty(); // warning: ignoring return value of 'empty()' +vec.empty(); // Warning: ignoring return value of 'empty' [-Wunused-result] ``` -### Using nodiscard Messages in Your Own Code +### Using nodiscard messages in your own code -For library authors, `[[nodiscard("reason")]]` is very practical. You can explain in the message why the return value shouldn't be ignored, and what the correct usage is: +For library authors, `[[nodiscard("reason")]]` is very practical. You can explain in the message why the return value shouldn't be ignored and how to use it correctly: ```cpp -// 告诉调用方为什么需要检查返回值 -[[nodiscard("Memory leak: returned pointer must be freed")]] -void* allocate_buffer(std::size_t size); - -// 告诉调用方应该怎么用 -[[nodiscard("Store the lock_guard to keep the mutex locked")]] -std::unique_lock acquire_lock(); +[[nodiscard("Returning a raw pointer requires manual memory management; consider using std::unique_ptr")]] +int* create_data(); ``` ------ ## Comparison with C++11-17 Attributes -Putting the C++11-17 attributes and the new C++20-23 attributes side by side reveals a clear development trajectory: early attributes focused on code correctness and maintainability, while later attributes focus more on performance optimization. +Comparing attributes from C++11-17 with the new attributes in C++20-23 reveals a clear development trajectory: early attributes focused on code correctness and maintainability, while later attributes focus more on performance optimization. | Attribute | Version | Focus | Risk | |-----------|---------|-------|------| @@ -379,35 +327,35 @@ Putting the C++11-17 attributes and the new C++20-23 attributes side by side rev | `[[carries_dependency]]` | C++11 | Performance | Low | | `[[deprecated]]` | C++14 | Maintainability | Low | | `[[nodiscard]]` | C++17 | Correctness | Low | -| `[[fallthrough]]` | C++17 | Correctness | Low | -| `[[maybe_unused]]` | C++17 | Readability | Low | -| `[[likely]]/[[unlikely]]` | C++20 | Performance | Low | +| `[[maybe_unused]]` | C++17 | Correctness | Low | +| `[[fallthrough]]` | C++17 | Readability | Low | +| `[[likely]]` / `[[unlikely]]` | C++20 | Performance | Low | | `[[no_unique_address]]` | C++20 | Performance | Low | | `[[assume]]` | C++23 | Performance | **High** | -Among these, only `[[assume]]` is truly "dangerous" — if the assumption is wrong, the consequence is undefined behavior. For the other attributes, even if the "hint" is wrong, the worst case is slightly worse performance; it won't cause the program to crash. +Only `[[assume]]` is a truly "dangerous" attribute—if the assumption is wrong, the consequence is undefined behavior. With other attributes, even if the "hint" is wrong, the worst case is slightly worse performance; it won't crash the program. ------ -## Practical Performance Testing Recommendations +## Performance Impact Testing Recommendations -For performance-oriented attributes like `[[likely]]`/`[[unlikely]]` and `[[assume]]`, the author's recommendation is: always test after adding them. The optimization effect is highly dependent on the specific hardware, compiler, and code context. Some scenarios show clear benefits, while others show no difference at all. +For performance-oriented attributes like `[[likely]]`/`[[unlikely]]` and `[[assume]]`, my advice is: always test after adding them. Optimization effectiveness depends heavily on specific hardware, compilers, and code context. Some scenarios show significant gains, while others show no difference at all. -A simple testing approach is to use `perf stat` or `valgrind --tool=cachegrind` to compare instruction counts, branch misprediction rates, and cache hit rates before and after adding the attributes. If the data doesn't show significant improvement, it's not worth adding — because attributes increase the "information density" of the code, forcing readers to understand one more concept. +Testing methods can be simple: use tools like `perf` or `VTune` to compare instruction count, branch misprediction rate, and cache hit rate before and after adding the attribute. If there is no significant improvement in data, it's not worth adding—because attributes increase the "information density" of the code, requiring readers to understand one more concept. -For `[[no_unique_address]]`, verification is more straightforward — just look at the `sizeof` results. If the empty policy class truly takes no space, the attribute is working. +For `[[no_unique_address]]`, verification is more direct—just look at the `sizeof` results. If empty policy classes indeed take up no space, the attribute is working. ------ ## Summary -The new attributes in C++20-23 extend the compiler's hinting capabilities from "finding bugs" to "performing optimizations." `[[likely]]` and `[[unlikely]]` help the compiler with branch prediction, `[[no_unique_address]]` eliminates the memory waste of empty class members, and `[[assume]]` lets the compiler make more aggressive optimizations based on definitive assumptions. +New attributes in C++20-23 extend compiler hint capabilities from "finding bugs" to "doing optimizations." `[[likely]]` and `[[unlikely]]` help the compiler with branch prediction, `[[no_unique_address]]` eliminates memory waste from empty class members, and `[[assume]]` lets the compiler perform more aggressive optimizations based on deterministic assumptions. -The three attributes carry different levels of risk. `[[no_unique_address]]` is essentially harmless — the worst case is the optimization doesn't take effect, and `sizeof` stays the same. `[[likely]]`/`[[unlikely]]` carry low risk too — the worst case is an incorrect branch prediction hint, resulting in slightly worse performance. `[[assume]]` is the only truly dangerous attribute — a wrong assumption leads to undefined behavior, and it must be used with caution. +The risks of these three attributes vary. `[[no_unique_address]]` is mostly harmless—the worst case is the optimization doesn't kick in, and `sizeof` remains unchanged. `[[likely]]`/`[[unlikely]]` risks are also low—the worst case is a wrong branch prediction hint, leading to slightly worse performance. `[[assume]]` is the only truly dangerous attribute—a wrong assumption leads to undefined behavior and must be used with caution. -In practice, `[[no_unique_address]]` can almost be used unconditionally in generic code (strategy class pattern), `[[likely]]`/`[[unlikely]]` should only be added after profiling confirms a hotspot, and `[[assume]]` should only be used in extremely performance-sensitive scenarios, always accompanied by corresponding assertions or tests to guarantee the assumption always holds. +In practice, `[[no_unique_address]]` can be used almost without thinking in generic code (strategy pattern), `[[likely]]`/`[[unlikely]]` are recommended after profiling confirms hotspots, and `[[assume]]` should only be used in extreme performance-sensitive scenarios, accompanied by corresponding assertions or tests to ensure assumptions always hold. -## References +## Reference Resources - [cppreference: assume (C++23)](https://en.cppreference.com/w/cpp/language/attributes/assume) - [cppreference: likely/unlikely (C++20)](https://en.cppreference.com/w/cpp/language/attributes/likely) diff --git a/documents/en/vol2-modern-features/ch09-filesystem/01-filesystem-path.md b/documents/en/vol2-modern-features/ch09-filesystem/01-filesystem-path.md index 764f3daaf..41a54f539 100644 --- a/documents/en/vol2-modern-features/ch09-filesystem/01-filesystem-path.md +++ b/documents/en/vol2-modern-features/ch09-filesystem/01-filesystem-path.md @@ -1,6 +1,6 @@ --- -title: 'path Operations: Cross-Platform Path Handling' -description: Handling cross-platform paths uniformly with `std::filesystem::path` +title: 'Path operations: Cross-platform path handling' +description: Use `std::filesystem::path` for unified cross-platform path handling chapter: 9 order: 1 tags: @@ -18,393 +18,301 @@ related: - 文件与目录操作 translation: source: documents/vol2-modern-features/ch09-filesystem/01-filesystem-path.md - source_hash: 52c413a55b2b766ca0f538aa29c95e1cf3b7ceb44ec89764071c5d3d20b49ab8 - translated_at: '2026-05-26T11:33:40.524813+00:00' + source_hash: 949d1664e017452108d9cfd8617a9c4759dd7b4172a91825db15247c5b3c33e0 + translated_at: '2026-06-14T00:18:41.648574+00:00' engine: anthropic - token_count: 2971 + token_count: 2960 --- # Path Operations: Cross-Platform Path Handling -When writing cross-platform code in the past, nothing gave me more headaches than path handling. Windows uses backslashes `\`, while Linux and macOS use forward slashes `/`. Different path separators are annoying enough, but absolute paths are also represented differently (`C:\Users\...` vs `/home/...`)—not to mention advanced topics like Unicode filenames and symbolic links. We used to have to rely on a bunch of `#ifdef _WIN32` combined with string concatenation as a workaround, resulting in code that was painful to look at. +When writing cross-platform code in the past, nothing gave me more headaches than path handling. Windows uses backslashes `\`, while Linux and macOS use forward slashes `/`. Even if the path separators were the same, the representation of absolute paths differs (`C:\` vs `/`), not to mention advanced topics like Unicode filenames and symbolic links. In the old days, we had to rely on a bunch of `#ifdef`s combined with string concatenation to get by, resulting in code I didn't even want to look at. -The `` library introduced in C++17 completely solves this problem. `std::filesystem::path` provides a unified, cross-platform path handling API. Regardless of your operating system, path construction, decomposition, and modification can all be done using the same set of code. In this article, we focus on the `path` type itself—its construction, decomposition, modification, and comparison. We will leave file operations (exists, copy, remove, etc.) for the next article. +The `std::filesystem` library introduced in C++17 completely solves this problem. It provides a unified set of cross-platform path handling APIs. Regardless of your operating system, path construction, decomposition, and modification can be performed using the same code. In this article, we focus on the `std::filesystem::path` type itself—its construction, decomposition, modification, and comparison. We will leave file operations (such as `exists`, `copy`, `remove`, etc.) for the next article. > **Learning Objectives** > > - After completing this chapter, you will be able to: > - [ ] Understand the internal structure and cross-platform design of `std::filesystem::path` -> - [ ] Master path decomposition (root_name, parent_path, filename, etc.) -> - [ ] Master path modification (replace_extension, append, concat, etc.) +> - [ ] Master path decomposition (`root_name`, `parent_path`, `filename`, etc.) +> - [ ] Master path modification (`replace_extension`, `append`, `concat`, etc.) > - [ ] Write cross-platform path handling code ## Environment Notes -All code in this article is based on the C++17 standard and can be compiled and run on Linux (GCC 13+), macOS (Clang 15+), and Windows (MSVC 2022). Compilation requires linking `` support—before GCC 9, you needed `-lstdc++fs`, while other compilers usually support it directly. The header file is ``, and the namespace is `std::filesystem`. For brevity, we will use the alias `namespace fs = std::filesystem;` from here on. +All code in this article is based on the C++17 standard and can be compiled and run on Linux (GCC 13+), macOS (Clang 15+), and Windows (MSVC 2022). When compiling, you need to link `std::filesystem` support—before GCC 9, you needed `-lstdc++fs`, while other compilers usually support it directly. The header file is ``, and the namespace is `std::filesystem`. For brevity, we will use the alias `fs` later. ## Core Design Philosophy of path -The design philosophy of `std::filesystem::path` is: **only handle path syntax, do not touch the file system**. This means a `path` object can represent a path that does not exist at all, or a syntactically correct but completely meaningless path. It only cares about "whether the path string's syntax is correct," not "whether this path is valid on the file system." +The design philosophy of `fs::path` is: **perform only syntactic path processing and do not touch the file system**. This means a `fs::path` object can represent a path that doesn't exist at all, or a path that is syntactically correct but meaningless. It only cares about "whether the path string's syntax is correct," not "whether this path is valid on the file system." -This design is crucial because it means all operations on `path` are pure computations—they do not involve system calls, they cannot fail (unless out of memory), and they will not throw exceptions due to file permissions or similar issues. You can safely use `path` in any context without worrying that it might trigger I/O operations. +This design is crucial because it means all operations on `fs::path` are pure computations—no system calls are involved, they cannot fail (unless out of memory), and they won't throw exceptions due to file permissions or other issues. You can safely use `fs::path` in any context without worrying that it will trigger I/O operations. -Internally, `path` stores paths using the **platform's native format**—backslashes `\` on Windows, and forward slashes `/` on POSIX systems. When you call `generic_string()`, it converts to the generic format on demand (always using forward slashes `/`). This design ensures compatibility with the operating system while providing a unified cross-platform interface. +Internally, `fs::path` stores paths using the **platform's native format**—backslashes `\` on Windows and forward slashes `/` on POSIX systems. When you call `generic_string()`, it converts to the generic format (always using forward slashes `/`) on demand. This design ensures compatibility with the operating system while providing a unified cross-platform interface. ## Constructing path Objects -A `path` can be constructed from various sources. The most direct way is to construct it from a string: +`fs::path` can be constructed from various sources. The most direct way is to construct from a string: ```cpp -#include #include -#include +#include namespace fs = std::filesystem; int main() { - // 从 C 字符串构造 + // Construct from string literals fs::path p1 = "/usr/local/bin"; - // 从 std::string 构造 - std::string str = "/home/user/docs"; - fs::path p2(str); - // 从字面量构造 - fs::path p3 = "C:\\Users\\Alice\\Documents"; // Windows 路径也可以 - // 在 Linux 上,反斜杠会被当作文件名的一部分(因为 \ 不是分隔符) - // 但在 Windows 上会被正确识别为分隔符 + + // Construct from std::string + std::string dir = "/var/log"; + fs::path p2(dir); std::cout << "p1: " << p1 << "\n"; std::cout << "p2: " << p2 << "\n"; - std::cout << "p3: " << p3 << "\n"; - return 0; } ``` -Output (on Linux): +Result (on Linux): ```text p1: "/usr/local/bin" -p2: "/home/user/docs" -p3: "C:\\Users\\Alice\\Documents" +p2: "/var/log" ``` -Note that `operator<<` adds quotes when outputting a `path`. If you do not want quotes, you can use `p.string()` for output. +Note that `operator<<` for `fs::path` outputs the path with quotes. If you don't want quotes, use the `c_str()` or `string()` method for output. -⚠️ The constructor of `path` supports `std::string_view` (since C++17). You can directly pass in a `string_view`: +⚠️ The constructor for `fs::path` supports `std::string_view` (since C++17). You can directly pass a `std::string_view`: ```cpp -std::string_view sv = "/tmp/test"; -fs::path p(sv); // 直接使用 string_view +std::string_view sv = "/tmp"; +fs::path p3{sv}; // Direct construction ``` -However, due to template deduction rules, you may need to explicitly specify the type or convert to a `std::string` in certain complex scenarios. +However, due to template deduction rules, some complex scenarios might require explicitly specifying the type or converting to `std::string`. -## Path Decomposition: Breaking Paths Down +## Path Decomposition: Breaking It Down -Path decomposition is one of the most powerful features of `path`. A path can be broken down into multiple components, each of which can be accessed independently. Let's first look at a complete example, decomposing a typical path on Linux: +Path decomposition is one of the most powerful features of `fs::path`. A path can be split into multiple components, each of which can be accessed independently. Let's first look at a complete example, decomposing a typical path on Linux: ```cpp -void decompose_path(const fs::path& p) { - std::cout << "原始路径: " << p << "\n"; - std::cout << "root_name: " << p.root_name() << "\n"; - std::cout << "root_dir: " << p.root_directory() << "\n"; - std::cout << "root_path: " << p.root_path() << "\n"; - std::cout << "relative_path:" << p.relative_path() << "\n"; - std::cout << "parent_path: " << p.parent_path() << "\n"; - std::cout << "filename: " << p.filename() << "\n"; - std::cout << "stem: " << p.stem() << "\n"; - std::cout << "extension: " << p.extension() << "\n"; - std::cout << "------\n"; -} +#include +#include + +namespace fs = std::filesystem; int main() { - decompose_path("/usr/local/bin/gcc"); - decompose_path("/home/user/report.pdf"); - decompose_path("config.ini"); - decompose_path("/tmp/archive.tar.gz"); - return 0; + fs::path p = "/home/user/documents/report.pdf"; + + std::cout << "root_name(): " << p.root_name() << "\n"; + std::cout << "root_directory(): " << p.root_directory() << "\n"; + std::cout << "root_path(): " << p.root_path() << "\n"; + std::cout << "relative_path(): " << p.relative_path() << "\n"; + std::cout << "parent_path(): " << p.parent_path() << "\n"; + std::cout << "filename(): " << p.filename() << "\n"; + std::cout << "stem(): " << p.stem() << "\n"; + std::cout << "extension(): " << p.extension() << "\n"; } ``` -Output (on Linux): +Result (on Linux): ```text -原始路径: "/usr/local/bin/gcc" -root_name: "" -root_dir: "/" -root_path: "/" -relative_path:"usr/local/bin/gcc" -parent_path: "/usr/local/bin" -filename: "gcc" -stem: "gcc" -extension: "" ------- -原始路径: "/home/user/report.pdf" -root_name: "" -root_dir: "/" -root_path: "/" -relative_path:"home/user/report.pdf" -parent_path: "/home/user" -filename: "report.pdf" -stem: "report" -extension: ".pdf" ------- -原始路径: "config.ini" -root_name: "" -root_dir: "" -root_path: "" -relative_path:"config.ini" -parent_path: "" -filename: "config.ini" -stem: "config" -extension: ".ini" ------- -原始路径: "/tmp/archive.tar.gz" -root_name: "" -root_dir: "/" -root_path: "/" -relative_path:"tmp/archive.tar.gz" -parent_path: "/tmp" -filename: "archive.tar.gz" -stem: "archive.tar" -extension: ".gz" ------- +root_name(): "" +root_directory(): "/" +root_path(): "/" +relative_path(): "home/user/documents/report.pdf" +parent_path(): "/home/user/documents" +filename(): "report.pdf" +stem(): "report" +extension(): ".pdf" ``` -Let's understand each component one by one. `root_name` is always an empty string on Linux—because Linux has no concept of drive letters. On Windows, `C:` is the root_name. `root_directory` is the root directory separator, which is `/` on Linux and also `\` (or `/`) on Windows. `root_path` equals the combination of `root_name / root_directory`. `relative_path` is the part remaining after removing the root_path. `parent_path` is the parent directory's path—if you are familiar with the POSIX `dirname` command, it does the same thing. `filename` is the last component in the path—equivalent to `basename`. `stem` is the filename with the last extension removed. `extension` is the last extension (including the `.`). +Let's understand these components one by one. `root_name()` is always an empty string on Linux—because Linux has no concept of drive letters. On Windows, `C:` would be the `root_name`. `root_directory()` is the root directory separator; on Linux it is `/`, and on Windows it is also `\` (or `/`). `root_path()` is the combination of `root_name()` and `root_directory()`. `relative_path()` is the part of the path after removing `root_path`. `parent_path()` is the path of the parent directory—if you are familiar with the POSIX `dirname` command, it does the same thing. `filename()` is the last component of the path—equivalent to `basename`. `stem()` is the part of the filename with the last extension removed. `extension()` is the last extension (including the `.`). -Pay attention to the decomposition result of the fourth example, `/tmp/archive.tar.gz`. `extension` only takes the part after the last `.`, which is `.gz`, not `.tar.gz`. Meanwhile, `stem` is `archive.tar`. If you need to get the complete "base name" (with all extensions removed), you need to handle it yourself: +Pay attention to the decomposition result of the fourth example, `archive.tar.gz`. `extension()` only takes the part after the last `.`, which is `.gz`, not `.tar.gz`. And `stem()` is `archive.tar`. If you need to get the complete "base name" (removing all extensions), you need to handle it yourself: ```cpp -fs::path p = "/tmp/archive.tar.gz"; -auto full_stem = p; -while (full_stem.has_extension()) { - full_stem = full_stem.stem(); +fs::path p = "archive.tar.gz"; +// Manual handling to remove all extensions +auto full_stem = p.filename().string(); +auto dot_pos = full_stem.find('.'); +if (dot_pos != std::string::npos) { + full_stem = full_stem.substr(0, dot_pos); } -// full_stem = "archive" +std::cout << "Full stem: " << full_stem << "\n"; // Output: archive ``` -## Path Modification: In-Place vs. Creating New +## Path Modification: In-Place vs. New Objects -Modification operations on a `path` return a new `path` object and do not modify the original object (due to the value semantics design of `path`). Here are the commonly used modification operations: +Modification operations on `fs::path` return a new `fs::path` object and do not modify the original object (due to `fs::path`'s value semantics design). Common modification operations include the following: -`replace_extension(new_ext)` replaces the current path's extension with `new_ext`. If there is no existing extension, it appends one. This is the safest way to handle file extensions—it correctly handles all edge cases (such as trailing `.` or missing extensions): +`replace_extension()` replaces the current path's extension with a new one. If there was no extension, it appends one. This is the safest way to handle file extensions—it correctly handles all edge cases (such as trailing dots or missing extensions): ```cpp -fs::path p = "/home/user/report.pdf"; -auto p2 = p.replace_extension(".txt"); -// p2 = "/home/user/report.txt" - -fs::path p3 = "/home/user/README"; -auto p4 = p3.replace_extension(".md"); -// p4 = "/home/user/README.md" +fs::path p = "data.txt"; +p.replace_extension(".json"); // "data.json" -// replace_extension 不改变原始对象 -std::cout << p << "\n"; // 仍然是 "report.pdf" -std::cout << p2 << "\n"; // "report.txt" +fs::path p2 = "archive"; +p2.replace_extension(".tar.gz"); // "archive.tar.gz" ``` -`remove_filename()` removes the filename part of the path, keeping only the directory portion: +`remove_filename()` removes the filename part of the path, keeping only the directory part: ```cpp -fs::path p = "/usr/local/bin/gcc"; -auto dir = p.remove_filename(); -// dir = "/usr/local/bin/" +fs::path p = "/tmp/test.txt"; +p.remove_filename(); // "/tmp/" ``` -⚠️ Note the difference between `remove_filename()` and `parent_path()`: `parent_path()` returns the logical parent directory (without a trailing separator), while `remove_filename()` simply deletes the last component (keeping the trailing separator). In most cases, `parent_path()` is what you want. +⚠️ Note the difference between `remove_filename()` and `parent_path()`: `parent_path()` returns the logical parent directory (without the trailing separator), whereas `remove_filename()` simply deletes the last component (keeping the trailing separator). In most cases, `parent_path()` is what you want. -### append and concat: Two Ways to Concatenate Paths +### append and concat: Two Ways to Join Paths -`path` provides two ways to concatenate paths, and their semantics differ, which can be confusing. +`fs::path` provides two ways to join paths, and their semantics differ, which can be confusing. -`operator/=` and `operator/` are append operations. They append the content on the right as a path component to the left. If the right side is an absolute path, the result is simply the right-side path (the left side is discarded). This behavior is consistent with shell path concatenation: +`operator/=` and `append()` are append operations. They append the content on the right as a path component to the left. If the right side is an absolute path, the result is the path on the right (the left side is discarded). This behavior is consistent with shell path joining: ```cpp -fs::path base = "/usr/local"; -auto full = base / "bin" / "gcc"; -// full = "/usr/local/bin/gcc" - -// 如果右边是绝对路径,左边被丢弃 -fs::path p = "/home/user"; -auto result = p / "/tmp/file"; -// result = "/tmp/file"(不是 "/home/user/tmp/file") +fs::path p1 = "/var"; +p1 /= "log"; // "/var/log" + +fs::path p2 = "/var"; +p2 /= "/usr/bin"; // "/usr/bin" (absolute path discards left side) ``` -`operator+=` and `concat` are string concatenation operations. They directly append the characters on the right to the end of the path string without any path-semantic processing: +`operator+=` and `concat()` are string concatenation operations. They directly append the characters on the right to the end of the path string, without any path semantic processing: ```cpp -fs::path p = "file"; -p += ".txt"; -// p = "file.txt"——这就是简单的字符串拼接 - -// 区别:如果用 append -fs::path p2 = "file"; -p2 /= ".txt"; -// p2 = "file/.txt"——append 把 ".txt" 当成一个独立的路径组件 +fs::path p1 = "/var"; +p1 += "log"; // "/varlog" (Pure string concatenation) + +fs::path p2 = "/var"; +p2 += "/log"; // "/var/log" (Added separator manually) ``` -You will notice that the difference between `+=` and `/=` lies in the fact that `+=` is pure string concatenation (ignoring path semantics), while `/=` is a path component append (following path concatenation rules). In most cases, you should use `/=`, and only use `+=` when you know exactly what you are doing. +You will find that the difference between `operator/=` and `operator+=` is: `operator+=` is pure string concatenation (ignoring path semantics), while `operator/=` is path component appending (observing path joining rules). In most cases, you should use `operator/=`, and only use `operator+=` when you know exactly what you are doing. ## Cross-Platform Path Handling -The cross-platform capability of `path` is mainly reflected in two aspects: automatic conversion of path separators, and recognition of platform-specific paths. +The cross-platform capability of `fs::path` is mainly reflected in two aspects: automatic conversion of path separators, and recognition of platform-specific paths. ### Path Separators -`path` internally uses the forward slash `/` as the generic separator (generic format), automatically converting the platform's native separator to the generic format upon construction. When you need to get the platform's native format, call `native()` or `string()`: +`fs::path` internally uses the forward slash `/` as the generic separator (generic format), automatically converting the platform's native separators to the generic format upon construction. When you need the platform's native format, call `native()` or `string()`: ```cpp -// 这段代码在 Windows 和 Linux 上都能正确工作 -fs::path p = "dir/subdir/file.txt"; - -// 通用格式(总是正斜杠) -std::cout << p.generic_string() << "\n"; // "dir/subdir/file.txt" +fs::path p = "C:/Users/Documents"; -// 平台原生格式 -// Linux: "dir/subdir/file.txt" -// Windows: "dir\\subdir\\file.txt" -std::cout << p.string() << "\n"; +std::string generic = p.generic_string(); // "C:/Users/Documents" +std::string native = p.string(); // "C:\Users\Documents" on Windows ``` -This means you can uniformly use forward slashes to write paths without worrying about platform differences: +This means you can uniformly write paths using forward slashes without worrying about platform differences: ```cpp -fs::path config_dir = "/etc/myapp"; -fs::path config_file = config_dir / "config.ini"; -// 在所有平台上都能正确构造路径 +fs::path config_dir = "/etc/myapp/config"; // Works on Windows, Linux, macOS ``` ### Absolute and Relative Paths -`path` provides `is_absolute()` and `is_relative()` to determine whether a path is absolute or relative. Note that whether a path is absolute or relative depends on the platform—on Linux, a path starting with `/` is an absolute path; on Windows, it needs to start with a drive letter (`C:\...`) or with `\\` (UNC path). +`fs::path` provides `is_absolute()` and `is_relative()` to determine if a path is absolute or relative. Note that whether a path is absolute or relative depends on the platform—on Linux, starting with `/` means it's an absolute path; on Windows, it needs to start with a drive letter (`C:`) or `/` (UNC path). ```cpp -fs::path p1 = "/usr/local"; // Linux: absolute, Windows: relative(没有驱动器号) -fs::path p2 = "C:\\Windows"; // Windows: absolute, Linux: relative(被当成普通目录名) -fs::path p3 = "../config.ini"; // 所有平台: relative - -std::cout << std::boolalpha; -std::cout << "p1 is_absolute: " << p1.is_absolute() << "\n"; // true on Linux -std::cout << "p2 is_absolute: " << p2.is_absolute() << "\n"; // true on Windows -std::cout << "p3 is_absolute: " << p3.is_absolute() << "\n"; // false +fs::path p1 = "/usr/bin"; +bool is_abs = p1.is_absolute(); // true on Linux/macOS + +fs::path p2 = "C:\\Windows"; +bool is_abs_win = p2.is_absolute(); // true on Windows ``` -If you need to convert a relative path to an absolute path, use `fs::absolute(p)` (which requires a file system query) or `fs::canonical(p)` (which resolves all symbolic links and `.`, `..`). +If you need to convert a relative path to an absolute path, use `absolute()` (requires file system query) or `canonical()` (resolves all symbolic links and `.` and `..`). ## Conversion Between path and string -Conversion between `path` and `string` is a frequent operation. `path` provides multiple conversion methods: +Conversion between `fs::path` and `std::string` is a frequent operation. `fs::path` provides multiple conversion methods: ```cpp -fs::path p = "/usr/local/bin"; - -// 转为 std::string(平台原生编码) -std::string s = p.string(); - -// 转为通用格式 string(总是正斜杠) -std::string gs = p.generic_string(); - -// 获取原生格式(返回 const string_type&,零拷贝) -const auto& native = p.native(); // Windows 上是 std::wstring - -// 从 string 转 path -fs::path from_str = fs::path(s); +fs::path p = "/tmp/test"; -// C 风格字符串 -const char* c = p.c_str(); // Windows 上是 const wchar_t* +std::string s = p.string(); // Native format string +std::string gs = p.generic_string(); // Generic format string (always uses /) +const char* cstr = p.c_str(); // C-style string pointer ``` -⚠️ On Windows, `path` internally uses `wchar_t` (UTF-16), so `string()` returns a UTF-8 or ANSI string converted from UTF-16, and `native()` returns a `std::wstring`. On Linux/macOS, `path` internally uses `char` (UTF-8), so this conversion issue does not exist. +⚠️ On Windows, `fs::path` internally uses `std::wstring` (UTF-16), so `string()` returns a UTF-8 or ANSI string converted from UTF-16, and `wstring()` returns a `std::wstring`. On Linux/macOS, `fs::path` internally uses `std::string` (UTF-8), so there is no conversion issue. ## Path Comparison and Iteration -Two `path` objects can be compared using operators like `==`, `!=`, and `<`. The comparison rule is component-by-component—first comparing root_name, then root_directory, and then each path component in order. This means `/a/b/c` and `/a/b/c` are equal, but `/a/b/c` and `/a/b/./c` are not necessarily equal (because `.` is not normalized). +Two `fs::path` objects can be compared using operators like `==`, `<`, `>`. The comparison rule is component-by-component—first comparing `root_name`, then `root_directory`, and then comparing each path component in order. This means that `a/b` and `a//b` are equal, but `a/../b` and `b` are not necessarily equal (because `a/..` is not normalized). ```cpp -fs::path p1 = "/usr/local/bin"; -fs::path p2 = "/usr/local/bin"; -fs::path p3 = "/usr/local/bin/"; +fs::path p1 = "a/b"; +fs::path p2 = "a//b"; -std::cout << std::boolalpha; -std::cout << (p1 == p2) << "\n"; // true -std::cout << (p1 == p3) << "\n"; // false(末尾有 / 的差异) +if (p1 == p2) { + std::cout << "Equal\n"; // This will be printed +} ``` -`path` also supports iterators, allowing you to access each component of the path one by one: +`fs::path` also supports iterators, allowing you to access each component of the path individually: ```cpp -fs::path p = "/usr/local/bin/gcc"; +fs::path p = "/usr/local/bin"; -for (const auto& component : p) { - std::cout << "[" << component << "] "; +for (const auto& part : p) { + std::cout << "[" << part << "] "; } -std::cout << "\n"; -// 输出: [/] [usr] [local] [bin] [gcc] +// Output: ["/"] ["usr"] ["local"] ["bin"] ``` -The iterator skips empty components and returns each segment between path separators as an independent `path` object. The root_directory (`/`) is also returned as a component. +The iterator skips empty components and returns each segment between path separators as an independent `fs::path` object. The `root_directory` (`/`) is also returned as a component. -## Practical Example: Path Normalization and File Extension Filtering +## Real-World Example: Path Normalization and File Extension Filtering -Let's combine what we have learned so far to write a practical utility function: finding all files with a specific extension in a given directory. This function is very common in build systems, resource managers, and testing frameworks. +Let's combine the knowledge we've learned to write a practical utility function: finding all files with a specific extension in a given directory. This function is common in build systems, resource managers, and test frameworks. ```cpp -#include #include +#include #include -#include namespace fs = std::filesystem; -/// @brief 在指定目录下查找所有匹配扩展名的文件 -/// @param dir 搜索目录 -/// @param ext 目标扩展名(如 ".cpp") -/// @return 匹配的文件路径列表 -std::vector find_by_extension(const fs::path& dir, - const std::string& ext) { +std::vector find_files_by_extension(const fs::path& dir, const std::string& ext) { std::vector results; + if (!fs::exists(dir) || !fs::is_directory(dir)) { - std::cerr << "目录不存在或不是目录: " << dir << "\n"; + std::cerr << "Path does not exist or is not a directory\n"; return results; } for (const auto& entry : fs::directory_iterator(dir)) { if (entry.is_regular_file()) { - auto path_ext = entry.path().extension().string(); - // 统一转小写比较,应对 .CPP 和 .cpp - std::transform(path_ext.begin(), path_ext.end(), - path_ext.begin(), ::tolower); - std::string lower_ext = ext; - std::transform(lower_ext.begin(), lower_ext.end(), - lower_ext.begin(), ::tolower); - if (path_ext == lower_ext) { + // Check if the extension matches + if (entry.path().extension() == ext) { results.push_back(entry.path()); } } } - - // 按文件名排序 - std::sort(results.begin(), results.end()); return results; } int main() { - auto cpp_files = find_by_extension(".", ".md"); + auto cpp_files = find_files_by_extension(".", ".cpp"); for (const auto& f : cpp_files) { - std::cout << f.filename().string() << "\n"; + std::cout << f.filename() << "\n"; } - return 0; } ``` -This function comprehensively uses the decomposition (`extension()`), query (`filename()`), and comparison features of `path`, and also utilizes file system operations like `fs::exists`, `fs::is_directory`, and `fs::directory_iterator` which we will cover in detail in the next article. Just keep this in mind for now, and we will dive into these in the next article. +This function comprehensively uses the decomposition (`filename`), query (`extension`), and comparison features of `fs::path`, as well as file system operations like `fs::directory_iterator`, `exists`, and `is_directory`, which we will cover in detail in the next article. Just get a general impression for now; we will go into details in the next article. ## Summary -`std::filesystem::path` is a powerful cross-platform path handling tool brought to us by C++17. It only handles path syntax (without touching the file system), providing complete path decomposition (root_name, parent_path, filename, stem, extension), modification (replace_extension, remove_filename, append, concat), comparison, and iteration features. It internally uses the generic format (forward slashes), automatically handling cross-platform separator differences. When concatenating paths, `/=` is semantic concatenation (recommended), while `+=` is pure string concatenation (use with caution). +`fs::path` is a cross-platform path handling tool brought to us by C++17. It performs only syntactic path processing (without touching the file system) and provides complete path decomposition (`root_name`, `parent_path`, `filename`, `stem`, `extension`), modification (`replace_extension`, `remove_filename`, `append`, `concat`), comparison, and iteration features. It uses the generic format (forward slash) internally and automatically handles cross-platform separator differences. When joining paths, `operator/=` is semantic joining (recommended), while `operator+=` is pure string joining (use with caution). -Now that we understand the operations of `path`, in the next article we will look at how to perform actual file and directory operations using the `` library—creating, copying, deleting, permission management, and a practical log rotation tool. +With an understanding of `fs::path` operations, in the next article we will look at how to use the `std::filesystem` library for actual file and directory operations—creation, copying, deletion, permission management, and a practical log rotation utility. -## References +## Reference Resources - [cppreference: std::filesystem::path](https://en.cppreference.com/w/cpp/filesystem/path) - [cppreference: path::parent_path](https://en.cppreference.com/w/cpp/filesystem/path/parent_path) diff --git a/documents/en/vol2-modern-features/ch09-filesystem/02-filesystem-ops.md b/documents/en/vol2-modern-features/ch09-filesystem/02-filesystem-ops.md index e1d3f0301..ab891ec0c 100644 --- a/documents/en/vol2-modern-features/ch09-filesystem/02-filesystem-ops.md +++ b/documents/en/vol2-modern-features/ch09-filesystem/02-filesystem-ops.md @@ -18,248 +18,177 @@ related: - 目录遍历与搜索 translation: source: documents/vol2-modern-features/ch09-filesystem/02-filesystem-ops.md - source_hash: 4c4bf535b1461d4894271c9c1a332b3888c53edf90bd80a79ece569f303f954e - translated_at: '2026-05-26T11:33:50.845819+00:00' + source_hash: 8fd5e0b1e8e7a44582eb5a5973bf711a2a3129b326f15711a412ff2248853fdc + translated_at: '2026-06-14T00:19:02.053785+00:00' engine: anthropic - token_count: 3323 + token_count: 3359 --- # File and Directory Operations -In the previous article, we learned how to use `std::filesystem::path` to handle path syntax—constructing, decomposing, modifying, and comparing paths, all as pure computations without touching the disk. In this article, we get down to business: using the `std::filesystem` library to directly manipulate the file system—checking if files exist, creating directories, copying files, deleting files, and querying permissions and disk space. +In the previous post, we learned how to use `std::filesystem::path` to handle path syntax issues—construction, decomposition, modification, and comparison—all pure computation without touching the disk. In this post, we get real: we use the `std::filesystem` library to manipulate the file system directly—checking if files exist, creating directories, copying files, deleting files, and querying permissions and disk space. -As with the previous article, our environment is C++17, with GCC 13+ / Clang 15+ / MSVC 2022. The header file is ``, and the namespace is `std::filesystem`. +As before, our environment is C++17 with GCC 13+ / Clang 15+ / MSVC 2022. The header file is ``, and the namespace is `std::filesystem`. > **Learning Objectives** > -> - After completing this chapter, you will be able to: -> - [ ] Use `exists`, `is_regular_file`, and `is_directory` to check file status -> - [ ] Master the use of `create_directory` and `create_directories` +> After completing this chapter, you will be able to: +> +> - [ ] Use `exists`, `is_regular_file`, `is_directory` to check file status +> - [ ] Master the usage of `create_directory`, `create_directories` > - [ ] Safely perform file copy and delete operations -> - [ ] Understand metadata queries such as `file_size`, `last_write_time`, and `status` +> - [ ] Understand metadata queries like `file_size`, `last_write_time`, `status` > - [ ] Write a practical log rotation tool ## File Status Queries: Does it exist? What type is it? -The first step in file system operations is usually to "see what's actually at this path." `std::filesystem` provides a set of query functions to answer this question. +The first step in file system manipulation is usually "check what is actually at this path." `std::filesystem` provides a set of query functions to answer this. ### exists: Does the path exist? -`exists` checks whether a given path exists on the file system. It can accept a `path` object or a `std::string_view` (which we will cover in the next article). It returns `bool`: +`std::filesystem::exists` checks if a given path exists on the file system. It accepts a `path` object or a `symlink_permission` (we'll cover this in the next post). It returns `bool`: ```cpp #include -#include - namespace fs = std::filesystem; int main() { - fs::path p = "/usr/local/bin/gcc"; + fs::path p = "test.txt"; + if (fs::exists(p)) { - std::cout << p << " 存在\n"; + // File exists } else { - std::cout << p << " 不存在\n"; + // File does not exist } - return 0; } ``` -⚠️ `exists` throws an exception in certain situations (such as insufficient permissions preventing access to the parent directory). If you don't want exceptions to propagate, use the overload that does not accept an `error_code`, or wrap it in a try-catch block. A better approach is to use the overload that accepts an `error_code`: +⚠️ `exists` may throw an exception in certain cases (e.g., insufficient permissions to access a parent directory). If you do not want exceptions to propagate, use the overload that does not accept `error_code&`, or wrap it in try-catch. A better approach is to use the overload accepting `error_code&`: ```cpp std::error_code ec; -bool exists = fs::exists(p, ec); -if (ec) { - std::cerr << "查询失败: " << ec.message() << "\n"; +if (fs::exists(p, ec)) { + // ... +} else if (ec) { + // An error occurred + std::cerr << "Error: " << ec.message() << std::endl; } ``` -### is_regular_file / is_directory / is_symlink: Type checking +### is_regular_file / is_directory / is_symlink: Type determination -Once we know a path exists, the next step is to determine its type. `is_regular_file` checks if it is a regular file, `is_directory` checks if it is a directory, and `is_symlink` checks if it is a symbolic link. There are also more granular type checks like `is_block_file`, `is_character_file`, `is_fifo`, `is_socket`, and `is_other`, which are occasionally used in Linux system programming. +Once we know a path exists, the next step is to determine its type. `is_regular_file` checks if it is a regular file, `is_directory` checks if it is a directory, and `is_symlink` checks if it is a symbolic link. There are also more specific type checks like `is_block_file`, `is_character_file`, `is_fifo`, `is_socket`, and `is_other`, which are occasionally used in Linux system programming. ```cpp -fs::path p = "/usr/local/bin"; - -if (fs::is_directory(p)) { - std::cout << p << " 是一个目录\n"; -} else if (fs::is_regular_file(p)) { - std::cout << p << " 是一个普通文件\n"; +if (fs::is_regular_file(p)) { + std::cout << "This is a regular file.\n"; +} else if (fs::is_directory(p)) { + std::cout << "This is a directory.\n"; } else if (fs::is_symlink(p)) { - std::cout << p << " 是一个符号链接\n"; + std::cout << "This is a symbolic link.\n"; } ``` -⚠️ If the path does not exist, these functions return `false`—they do not throw exceptions. So you don't need to call `exists` first before checking the type; just check directly. However, note that if the underlying `status` call itself fails (e.g., due to permission issues), it will throw a `filesystem_error` exception. +⚠️ If the path does not exist, these functions return `false`—they do not throw exceptions. So you don't need to call `exists` before checking the type; just check directly. However, be aware that if the underlying `status` call fails (e.g., due to permission issues), it will throw a `filesystem_error` exception. ### file_size / last_write_time / status: Metadata queries -Besides the type, we often need to query a file's size, last modification time, and permission status: +Beyond type, we often need to query file size, last modification time, and permission status: ```cpp -#include -#include -#include -#include - -namespace fs = std::filesystem; - -void print_file_info(const fs::path& p) { - std::error_code ec; - - // 文件大小(字节) - auto size = fs::file_size(p, ec); - if (!ec) { - std::cout << "大小: " << size << " 字节\n"; - if (size > 1024 * 1024) { - std::cout << " " - << size / (1024.0 * 1024.0) << " MB\n"; - } else if (size > 1024) { - std::cout << " " - << size / 1024.0 << " KB\n"; - } - } - - // 最后修改时间 - auto ftime = fs::last_write_time(p, ec); - if (!ec) { - // C++20 之前:需要转换成 time_t 来显示 - auto sctp = std::chrono::time_point_cast( - ftime - fs::file_time_type::clock::now() + std::chrono::system_clock::now() - ); - auto time_t_val = std::chrono::system_clock::to_time_t(sctp); - std::cout << "修改时间: " - << std::ctime(&time_t_val); - } - - // 文件状态(权限等) - auto status = fs::status(p, ec); - if (!ec) { - std::cout << "类型: " << static_cast(status.type()) << "\n"; - std::cout << "权限: " << static_cast(status.permissions()) << "\n"; - } -} - -int main() { - print_file_info("/usr/local/bin/gcc"); - return 0; +if (fs::is_regular_file(p)) { + // Get file size in bytes + uintmax_t size = fs::file_size(p); + std::cout << "Size: " << size << " bytes\n"; + + // Get last write time + auto ftime = fs::last_write_time(p); + + // Convert to system time (approximate for C++17) + auto sctp = std::chrono::time_point_cast( + ftime - fs::file_time_type::clock::now() + std::chrono::system_clock::now() + ); + std::time_t cftime = std::chrono::system_clock::to_time_t(sctp); + std::cout << "Last write time: " << std::asctime(std::localtime(&cftime)) << std::endl; } ``` -⚠️ Converting `last_write_time` into a readable format before C++20 is a bit cumbersome (as shown above), because the clock used by `file_time_type` is not necessarily `std::chrono::system_clock`. C++20 provides a more concise approach via `std::chrono::clock_cast`, but in C++17, we can only use the approximate method shown above. In real projects, using `std::ctime` for simple display is sufficient, though the precision might not be completely accurate. +⚠️ Converting `last_write_time` to a readable format is a bit verbose in C++17 (as shown above) because the `file_time_type`'s clock is not necessarily `system_clock`. C++20 provides a simpler way via `std::chrono::clock_cast`, but in C++17 we must use the approximation above. In actual projects, using `std::asctime` for simple display is sufficient, though the precision might not be perfectly accurate. ## Creating Directories -`create_directory` creates a single directory—provided that the parent directory already exists. If the parent directory does not exist, the call will fail: +`create_directory` creates a directory—provided the parent directory already exists. If the parent does not exist, the call fails: ```cpp -fs::path dir = "/tmp/myapp_config"; -if (!fs::exists(dir)) { - if (fs::create_directory(dir)) { - std::cout << "目录创建成功\n"; - } else { - std::cerr << "目录创建失败\n"; - } -} +fs::create_directory("foo"); // OK if parent exists +// fs::create_directory("bar/baz"); // Error if "bar" does not exist ``` -If you need to create a multi-level directory (for example, `a/b/c`, where neither `a/b` nor `a` exists), use `create_directories`. It automatically creates all missing intermediate directories in the path, similar to `mkdir -p`: +If you need to create a multi-level directory (e.g., `a/b/c`, where `a` and `a/b` do not exist), use `create_directories`. It automatically creates all missing intermediate directories in the path, similar to `mkdir -p`: ```cpp -fs::path deep_dir = "/tmp/myapp/data/cache/tmp"; -fs::create_directories(deep_dir); // 自动创建所有中间目录 -std::cout << "创建完成\n"; +fs::create_directories("a/b/c"); // Creates "a", "a/b", and "a/b/c" ``` -`create_directories` is one of the file system operations I use the most. Ensuring that configuration, log, and cache directories exist at program startup is a very common requirement. With `create_directories`, one line of code gets it done, without manually checking whether each level exists. +`create_directories` is one of the file system operations I use most. When a program starts, ensuring that configuration, log, and cache directories exist is a very common requirement. With `create_directories`, one line of code handles it, without manually checking each level. -⚠️ `create_directory` returns `false` when the directory already exists, but it does not report an error. The same applies to `create_directories`—if all directories already exist, it also returns `false`. Therefore, you should not use the return value to determine "whether an error occurred," but rather use the `error_code` version. +⚠️ `create_directory` returns `false` if the directory already exists, but it does not report an error. The same applies to `create_directories`—if all directories exist, it returns `false`. Therefore, you should not use the return value to judge "whether an error occurred"; instead, use the `error_code&` version. ## Copying Files and Directories -`copy` is a versatile copy function. Its behavior depends on the type of the `from` path and whether `copy_options` are specified: +`std::filesystem::copy` is a multi-function copy utility. Its behavior depends on the type of `from` and whether `options` are specified: ```cpp -// 默认行为: -// - 如果 from 是普通文件,复制文件到 to -// - 如果 from 是目录,复制目录结构到 to(不递归复制内容) -// - 如果 from 是符号链接,复制链接本身 - -fs::path src = "/tmp/source.txt"; -fs::path dst = "/tmp/dest.txt"; - -std::error_code ec; -fs::copy(src, dst, ec); -if (ec) { - std::cerr << "复制失败: " << ec.message() << "\n"; -} +fs::copy("src.txt", "dst.txt"); // Copy file +fs::copy("src_dir", "dst_dir", fs::copy_options::recursive); // Copy directory ``` ### copy_options: Controlling copy behavior -`copy_options` is a bitmask type used to finely control copy behavior. Common options include: +`copy_options` is a bitmask type used to fine-tune copy behavior. Common options include: -`copy_options::overwrite_existing`—If the target file already exists, overwrite it. By default, if the target already exists, `copy` will fail (or skip, depending on the specific operation). +`copy_options::overwrite_existing`—If the target file exists, overwrite it. By default, if the target exists, `copy` fails (or skips, depending on the specific operation). -`copy_options::recursive`—Recursively copy directory contents. If `from` is a directory, it will recursively copy all files and subdirectories within it. +`copy_options::recursive`—Recursively copy directory contents. If `from` is a directory, it recursively copies all files and subdirectories. `copy_options::copy_symlinks`—Copy the symbolic link itself (rather than following the link to copy the target file). ```cpp -// 递归复制整个目录 -fs::copy("/tmp/source_dir", "/tmp/dest_dir", - fs::copy_options::recursive | - fs::copy_options::overwrite_existing); +fs::copy( + "src_dir", "dst_dir", + fs::copy_options::recursive | + fs::copy_options::overwrite_existing +); ``` -`copy_file` is a function specifically for copying files. The difference between it and `copy` is that `copy_file` only handles regular files and provides finer-grained control. ⚠️ Note: `copy_file` **does not provide atomicity guarantees**—if the copy fails midway (e.g., due to insufficient disk space or a power outage), the target file might be in a partially written state. If atomicity is required, you should use the "copy to a temporary file + atomic rename" pattern. +`copy_file` is a function specifically for copying files. The difference between it and `copy` is that `copy_file` only handles regular files and provides finer control. ⚠️ Note: `copy_file` **provides no atomicity guarantee**—if the copy fails (e.g., insufficient disk space, power outage), the target file may be in a partially written state. For atomicity, use the "copy to temporary file + atomic rename" pattern. (See the `safe_write` function example in the "Temporary File Handling" section). ```cpp -// 安全的文件复制(原子性保证) -fs::path src = "/data/important_config.yaml"; -fs::path dst = "/backup/important_config.yaml"; - -std::error_code ec; -fs::copy_file(src, dst, - fs::copy_options::overwrite_existing, ec); -if (ec) { - std::cerr << "复制失败: " << ec.message() << "\n"; -} else { - std::cout << "复制成功\n"; -} +fs::copy_file("src.txt", "dst.txt", fs::copy_options::overwrite_existing); ``` ## Deleting and Renaming -`remove` deletes a file or an empty directory. If the path does not exist, it returns `false` (no error). If the path is a symbolic link, it deletes the link itself rather than the target. If the path is a non-empty directory, the deletion fails: +`remove` deletes a file or an empty directory. If the path does not exist, it returns `false` (no error). If the path is a symbolic link, it deletes the link itself, not the target. If the path is a non-empty directory, deletion fails: ```cpp -fs::path temp = "/tmp/temp_file.txt"; -bool removed = fs::remove(temp); -if (removed) { - std::cout << "已删除\n"; -} else { - std::cout << "文件不存在或删除失败\n"; -} +bool deleted = fs::remove("tmp.txt"); // Returns true if deleted ``` -`remove_all` recursively deletes a directory and all its contents (files, subdirectories, symbolic links). It returns the number of deleted files. This is a "nuclear-level" operation—always confirm the path is correct before calling it: +`remove_all` recursively deletes a directory and all its contents (files, subdirectories, symbolic links). It returns the count of deleted files. This is a "nuclear" operation—always confirm the path is correct before calling: ```cpp -fs::path temp_dir = "/tmp/my_temp_dir"; -auto count = fs::remove_all(temp_dir); -std::cout << "删除了 " << count << " 个文件/目录\n"; +uintmax_t count = fs::remove_all("build_dir"); // Deletes everything inside +std::cout << "Deleted " << count << " items.\n"; ``` -⚠️ `remove_all` is an irreversible operation. I once accidentally wrote the wrong path while debugging (missing a directory level) and almost wiped out the entire project directory. Fortunately, I was running in a test environment at the time, so no actual damage was done. Since then, I always print and confirm the path before calling `remove_all`. I recommend you build this habit too. +⚠️ `remove_all` is irreversible. Once, while debugging, I accidentally wrote the path wrong (missing a directory level) and nearly wiped the entire project directory. Fortunately, I was running in a test environment, so no actual damage occurred. Since then, I always print and confirm the path before calling `remove_all`. I suggest you build this habit too. -`rename` renames or moves a file/directory. In most implementations, renaming on the same file system is an atomic operation (it only modifies the directory entry, without moving data). ⚠️ Note: Renaming across file systems typically **fails** (throwing an exception or returning an error), rather than automatically performing a copy + delete. To move across file systems, you should explicitly use `copy` + `remove`: +`rename` renames or moves a file/directory. In most implementations, renaming on the same file system is an atomic operation (modifying directory entries only, not moving data). ⚠️ Note: Cross-filesystem renaming usually **fails** (throwing an exception or returning an error) rather than automatically performing copy + delete. For cross-filesystem moves, explicitly use `copy` + `remove_all`: ```cpp -std::error_code ec; -fs::rename("/tmp/old_name.txt", "/tmp/new_name.txt", ec); -if (ec) { - std::cerr << "重命名失败: " << ec.message() << "\n"; -} +// Move file to another disk (not atomic) +fs::copy("src.txt", "/mnt/backup/src.txt"); +fs::remove("src.txt"); ``` ## Permissions and Disk Space @@ -269,168 +198,111 @@ if (ec) { `permissions` modifies a file's permission bits, similar to `chmod`. Permissions are represented by the `perms` enum: ```cpp -fs::path script = "/tmp/my_script.sh"; - -// 设置为 rwxr-xr-x (755) -fs::permissions(script, - fs::perms::owner_read | fs::perms::owner_write | fs::perms::owner_exec | - fs::perms::group_read | fs::perms::group_exec | - fs::perms::others_read | fs::perms::others_exec); - -// 或者用 perm_options 控制修改方式 -fs::permissions(script, - fs::perms::owner_exec, // 只修改 owner_exec 位 - fs::perm_options::add); // 添加(不影响其他位) +fs::permissions( + "script.sh", + fs::perms::owner_all | fs::perms::group_read | fs::perms::others_read, + fs::perm_options::replace +); ``` -The third parameter, `perm_options`, can be `perm_options::replace` (replace all permissions, the default behavior), `perm_options::add` (add the specified permission bits), or `perm_options::remove` (remove the specified permission bits). This is more convenient than replacing all permissions when you only need to modify one or two permission bits. +The third parameter can be `perm_options::replace` (replace all permissions, default behavior), `perm_options::add` (add specified permission bits), or `perm_options::remove` (remove specified permission bits). This is more convenient than replacing all permissions when you only need to modify one or two bits. ### space: Querying disk space -`space` returns a `space_info` struct containing the disk's capacity, used space, and available space: +`space` returns a `space_info` struct containing the disk's capacity, used space, and free space: ```cpp -auto info = fs::space("/tmp"); -if (info.capacity > 0) { - std::cout << "总容量: " - << info.capacity / (1024.0 * 1024 * 1024) << " GB\n"; - std::cout << "可用空间: " - << info.available / (1024.0 * 1024 * 1024) << " GB\n"; - std::cout << "剩余空间: " - << info.free / (1024.0 * 1024 * 1024) << " GB\n"; -} +fs::space_info root = fs::space("/"); +std::cout << "Total: " << root.capacity << "\n"; +std::cout << "Free: " << root.free << "\n"; +std::cout << "Avail: " << root.available << "\n"; ``` -Note the difference between `available` and `free`: `free` is the remaining space on the disk (including the portion only usable by root), while `available` is the space actually available to the current user. On Linux, the difference between these two values comes from reserved blocks (ext4 reserves 5% for root by default). +Note the difference between `free` and `available`: `free` is the remaining space on the disk (including parts only root can use), while `available` is the space actually available to the current user. On Linux, this difference comes from reserved blocks (ext4 reserves 5% for root by default). -## Handling Temporary Files +## Temporary File Handling -C++ does not provide a standard API for "creating temporary files" directly (C++23's `temp_directory_path` only tells you where the temporary directory is). However, in C++17, we can combine existing tools to safely handle temporary files: +C++ does not provide a standard API for "creating temporary files" directly (C++23's `std::filesystem::temp_directory_path` only tells you where the temporary directory is). However, in C++17, we can combine existing tools to handle temporary files safely: ```cpp #include #include #include -#include namespace fs = std::filesystem; -/// @brief 创建一个唯一的临时文件路径 -/// @return 临时文件的路径(文件尚未创建) -fs::path make_temp_file() { - auto temp_dir = fs::temp_directory_path(); - - // 生成随机后缀 +// Generate a random temporary filename +fs::path temp_filename() { + std::string random_str; std::random_device rd; std::mt19937 gen(rd()); - std::uniform_int_distribution dist(0, 999999); - auto suffix = std::to_string(dist(gen)); + std::uniform_int_distribution<> dis(0, 15); - auto temp_path = temp_dir / ("myapp_temp_" + suffix + ".tmp"); - return temp_path; + for (int i = 0; i < 8; ++i) { + random_str += "0123456789abcdef"[dis(gen)]; + } + return fs::temp_directory_path() / ("tmp_" + random_str); } -/// @brief 安全地将数据写入临时文件,然后原子性地重命名为目标文件 -/// @param target 目标文件路径 -/// @param data 要写入的数据 -/// @return 是否成功 -bool safe_write_file(const fs::path& target, const std::string& data) { - auto temp = make_temp_file(); - - // 先写入临时文件 +// Safely write to a file (atomic rename) +void safe_write(const fs::path& dest, const std::string& content) { + auto temp = temp_filename(); { - std::ofstream out(temp); - if (!out) return false; - out << data; - out.close(); - if (out.fail()) { - fs::remove(temp); - return false; - } - } - - // 原子性重命名 - std::error_code ec; - fs::rename(temp, target, ec); - if (ec) { - fs::remove(temp); // 清理临时文件 - return false; - } - return true; + std::ofstream ofs(temp, std::ios::binary); + ofs << content; + } // File closed here + fs::rename(temp, dest); // Atomic operation } ``` -This "write to temporary file + atomic rename" pattern is crucial in scenarios that require data integrity guarantees—if the program crashes or the power goes out during the write, the target file will either be the old complete version or the new complete version, and will never end up in a corrupted "half-written" state. Many databases, configuration file managers, and package managers use this pattern. +This "write to temporary file + atomic rename" pattern is crucial in scenarios requiring data integrity. If the program crashes or power is lost during the write, the target file is either the old complete version or the new complete version—never a "half-written" corrupted state. Many databases, configuration file managers, and package managers use this pattern. -## Practical Example: Log Rotation Tool +## Real-World Example: Log Rotation Tool -Let's combine all the operations we learned in this article to write a practical log rotation tool. The core logic of log rotation is: when a log file exceeds a certain size, rename it to a backup file (with a sequence number), and then create a new empty log file. At the same time, we limit the number of backups, deleting old backups that exceed the limit. +Let's combine all the operations learned in this post to write a practical log rotation tool. The core logic of log rotation is: when a log file exceeds a certain size, rename it to a backup file (with a sequence number) and create a new empty log file. We also limit the number of backups, deleting old ones that exceed the limit. ```cpp #include -#include #include -#include +#include #include -#include namespace fs = std::filesystem; -/// @brief 执行日志轮转 -/// @param log_path 日志文件路径 -/// @param max_size 最大文件大小(字节) -/// @param max_backups 最大备份数量 -void rotate_log(const fs::path& log_path, - std::uintmax_t max_size, - int max_backups) { - std::error_code ec; - - // 检查日志文件是否存在且超过大小限制 - if (!fs::exists(log_path, ec) || ec) return; - auto size = fs::file_size(log_path, ec); - if (ec || size < max_size) return; - - auto stem = log_path.stem().string(); - auto ext = log_path.extension().string(); - auto parent = log_path.parent_path(); - - // 收集已有的备份文件 - std::vector backups; - for (int i = 1; i <= max_backups + 1; ++i) { - auto backup_name = stem + "." + std::to_string(i) + ext; - auto backup_path = parent / backup_name; - if (fs::exists(backup_path)) { - backups.push_back(backup_path); - } - } +void rotate_logs(const fs::path& log_dir, const std::string& base_name, uintmax_t max_size, int max_backups) { + fs::path current_log = log_dir / (base_name + ".log"); - // 删除超出数量限制的旧备份 - std::sort(backups.begin(), backups.end()); - while (static_cast(backups.size()) >= max_backups) { - fs::remove(backups.back(), ec); - backups.pop_back(); - } + // Check if log file exists and exceeds size limit + if (fs::exists(current_log) && fs::file_size(current_log) > max_size) { + // Rename existing backups (e.g., .log.1 -> .log.2) + for (int i = max_backups - 1; i >= 1; --i) { + fs::path old = log_dir / (base_name + ".log." + std::to_string(i)); + fs::path next = log_dir / (base_name + ".log." + std::to_string(i + 1)); - // 将现有备份序号 +1 - for (int i = static_cast(backups.size()); i >= 1; --i) { - auto old_name = stem + "." + std::to_string(i) + ext; - auto new_name = stem + "." + std::to_string(i + 1) + ext; - fs::rename(parent / old_name, parent / new_name, ec); - } + if (fs::exists(old)) { + fs::rename(old, next); + } + } - // 将当前日志重命名为 .1 备份 - auto first_backup = parent / (stem + ".1" + ext); - fs::rename(log_path, first_backup, ec); + // Rename current log to .log.1 + fs::path backup = log_dir / (base_name + ".log.1"); + fs::rename(current_log, backup); - // 创建新的空日志文件 - std::ofstream(log_path).close(); + // Delete excess backup + fs::path excess = log_dir / (base_name + ".log." + std::to_string(max_backups + 1)); + fs::remove(excess); + } - std::cout << "日志轮转完成: " << log_path << "\n"; + // Create new log file if it doesn't exist + if (!fs::exists(current_log)) { + std::ofstream(current_log); // Create empty file + } } int main() { - // 示例:当 app.log 超过 1MB 时轮转,最多保留 5 个备份 - rotate_log("/tmp/app.log", 1024 * 1024, 5); + // Rotate logs in "./logs" directory + // Max size 10MB, keep 3 backups + rotate_logs("./logs", "app", 10 * 1024 * 1024, 3); return 0; } ``` @@ -438,40 +310,45 @@ int main() { After running, the file status under `./logs` will look like this: ```text -app.log ← 新的空日志文件 -app.1.log ← 上一次的日志 -app.2.log ← 上上次的日志 -... -app.5.log ← 最老的备份 +./logs/ +├── app.log (new empty file) +├── app.log.1 (previous app.log) +├── app.log.2 (previous app.log.1) +└── app.log.3 (previous app.log.2) ``` -This rotation tool uses all the core operations we learned in this article: `exists`, `file_size`, `rename`, and `remove`. The "atomic rename" ensures that no log data is lost during rotation—even if the program crashes during the rename process, the worst-case scenario is that a particular backup file did not finish renaming, and the next rotation will handle it automatically. +This rotation tool uses all core operations covered in this post: `exists`, `file_size`, `rename`, `remove`. The "atomic rename" ensures no log data is lost during rotation—even if the program crashes during the rename, the worst case is a backup file isn't renamed, which the next rotation will handle automatically. ## Two Modes of Error Handling -Throughout this article, I have been using two ways to handle errors: throwing exceptions and using `error_code`. Let's summarize the best practices for error handling in `std::filesystem`. +Throughout this post, I have been using two ways to handle errors: throwing exceptions and `error_code&`. Let's summarize the best practices for error handling in `std::filesystem`. -Most `std::filesystem` functions have two overloaded versions: one that throws a `filesystem_error` exception on error, and another that accepts an `error_code` parameter and returns the error code through it on failure. Which one to choose depends on your scenario: +Most `std::filesystem` functions have two overloads: one that throws a `filesystem_error` exception on error, and another that accepts an `error_code&` parameter and returns an error code through it. The choice depends on your scenario: ```cpp -// 模式一:抛异常(适合"不应该失败"的操作) -fs::create_directories("/tmp/myapp/data"); +// Method 1: Exception (for initialization) +try { + fs::create_directories("config"); +} catch (const fs::filesystem_error& e) { + std::cerr << "Init failed: " << e.what() << std::endl; + std::exit(1); +} -// 模式二:error_code(适合"可能失败"的操作) +// Method 2: error_code (for runtime operations) std::error_code ec; -fs::copy(src, dst, ec); +fs::copy_file(src, dst, fs::copy_options::overwrite_existing, ec); if (ec) { - // 处理错误 + std::cerr << "Copy failed: " << ec.message() << std::endl; } ``` -My personal preference is: for initialization operations at program startup (like creating configuration directories), use the exception-throwing version—because if these operations fail, it means the program cannot run normally, and an exception can directly terminate the startup process. For operations that might legitimately fail at runtime (like copying files, deleting temporary files, etc.), use the `error_code` version—because these failures are expected and need to be handled gracefully. +My personal preference is: for initialization operations at program startup (creating config directories, etc.), use the throwing version—because failure here means the program cannot run normally, and an exception can directly terminate the startup process. For operations that might fail normally at runtime (copying files, deleting temporary files, etc.), use the `error_code&` version—because these failures are expected and need to be handled gracefully. ## Summary -In this article, we covered the core file operations of the `std::filesystem` library. File status queries (`exists`, `is_regular_file`, `is_directory`) and metadata queries (`file_size`, `last_write_time`, `status`) let us understand "what is actually on the file system." `create_directory` and `create_directories` handle directory creation, with the latter automatically creating intermediate directories, which is very convenient. `copy` / `copy_file` provide flexible file copying, `remove` / `remove_all` handle file deletion, and `rename` provides atomic renaming. `permissions` and `space` handle permission and disk space queries, respectively. `temp_directory_path` and the "write to temporary file + atomic rename" pattern are key techniques for ensuring data integrity. +In this post, we covered the core file operations of the `std::filesystem` library. File status queries (`exists`, `is_regular_file`, `is_directory`) and metadata queries (`file_size`, `last_write_time`, `status`) let us understand "what is actually on the file system." `create_directory` and `create_directories` handle directory creation, with the latter automatically creating intermediate directories, which is very convenient. `copy` / `copy_file` provide flexible file copying, `remove` / `remove_all` provide file deletion, and `rename` provides atomic renaming. `permissions` and `space` handle permission and disk space queries respectively. `std::filesystem::path` and the "write temporary file + atomic rename" pattern are key techniques for ensuring data integrity. -In the next article, we will discuss directory traversal—`directory_iterator` and `recursive_directory_iterator`—and how to efficiently search for files in a file system. +In the next post, we will discuss directory traversal—`directory_iterator` and `recursive_directory_iterator`—and how to efficiently search for files in the file system. ## Reference Resources diff --git a/documents/en/vol3-standard-library/01-vector-deep-dive.md b/documents/en/vol3-standard-library/01-vector-deep-dive.md new file mode 100644 index 000000000..ab8d3a084 --- /dev/null +++ b/documents/en/vol3-standard-library/01-vector-deep-dive.md @@ -0,0 +1,300 @@ +--- +chapter: 7 +cpp_standard: +- 11 +- 14 +- 17 +- 20 +description: Based on the three-pointer internal representation, we dive deep into + `std::vector`'s reallocation costs, the full picture of iterator invalidation, `move_if_noexcept` + exception safety, and C++20 `constexpr vector` with `erase`/`erase_if`. +difficulty: intermediate +order: 1 +platform: host +prerequisites: +- 卷一:vector 基础用法(size / capacity / push_back) +reading_time_minutes: 16 +tags: +- host +- cpp-modern +- intermediate +- vector +title: 'Vector Deep Dive: Three Pointers, Reallocation, and Iterator Invalidation' +translation: + source: documents/vol3-standard-library/01-vector-deep-dive.md + source_hash: 3a794c3b8b7c339211aacff5c51798b07990dff9266fcf14a11fb79bdaa0a358 + translated_at: '2026-06-14T00:19:27.289156+00:00' + engine: anthropic + token_count: 2821 +--- +# Vector Deep Dive: Three Pointers, Reallocation, and Iterator Invalidation + +In this post, I want to have a deep conversation with you about the implementation layer of `std::vector`. + +In Volume 1, we've been using `std::vector` as a "self-growing array" quite smoothly, picking up `push_back`, `size`, `operator[]`, and iteration with ease. But I must be honest—using it smoothly and truly understanding it are two different things. Have you ever encountered these weird situations: a loop continuously `push_back`-ing, running fast most of the time, but stuttering inexplicably on one specific iteration; or you carefully cache an iterator or a pointer, and one day it points to a piece of garbage; or you thought you wrote strongly exception-safe code, only to have a hole silently torn in it during a reallocation. + +The roots of these pitfalls are buried deep in `std::vector`'s implementation layer. So, in this post, we won't repeat how to call the APIs from Volume 1 (you surely know that by now). Instead, we'll break `std::vector` down into three pointers, a reallocation strategy, a rule table for invalidation, and conveniently connect the two new doors C++20 opened for it—`constexpr` and `erase_if`. + +------ + +## Three Pointers Hold Up the Entire Vector + +In mainstream standard library implementations (libstdc++, libc++, MSVC STL), the body of a `std::vector` is essentially just three pointers. Not an array, not a linked list, just `M_start` pointing to the first element, `M_finish` pointing to "one past" the last valid element, and `M_end_of_storage` pointing to the end of the allocated buffer. (I recall there was a question on Zhihu about this, and mainstream implementations indeed follow this.) + +```cpp +// Simplified implementation structure +template +class vector { + T* M_start; // Points to the beginning of the buffer + T* M_finish; // Points to one past the last element + T* M_end_of_storage; // Points to the end of the allocated capacity +}; +``` + +Once you deduce along this diagram, everything clicks: `size()` is just `M_finish - M_start`, `capacity()` is `M_end_of_storage - M_start`, and `capacity() - size()` is exactly the number of elements you can still stuff in without reallocation. The standard text doesn't actually mandate `std::vector` must look like this (it only requires contiguous storage plus a bunch of interface behaviors), but once you know the underlying layer is these three pointers, all subsequent features become logical: + +1. Reallocation is nothing more than moving this `M_start`/`M_finish`/`M_end_of_storage` chunk to a new buffer. +2. Iterator invalidation is nothing more than the buffer being swapped out. +3. `data()` can feed directly into C APIs because `M_start` points to a whole chunk of contiguous raw memory. + +## Reallocation: Amortized Constant, but Single Operation Can Be O(n) + +So what happens when you `push_back` into a `std::vector` that is already full? It triggers a *reallocation*—applying for a new buffer, moving old elements over, and releasing the old buffer. The standard's guarantee for this step is **amortized constant time complexity**. Please hold onto the word "amortized"; it is not "constant". + +This is too easily misread as "`push_back` is O(1) every time", so some friends confidently stuff `push_back` into hot loops, only to see one specific reallocation become an O(n) move, causing a sharp spike in the performance curve. Why does amortized analysis hold? The key lies in the fact that during reallocation, capacity grows by a geometric factor greater than 1. Thus, the cost of that one expensive move is spread (amortized) over the preceding several cheap `push_back` operations. + +(PS: I've been incredibly busy lately. If you find this topic interesting, try profiling it locally!) + +```cpp +// Visualizing capacity jumps +#include +#include + +int main() { + std::vector v; + for (int i = 0; i < 20; ++i) { + size_t old_cap = v.capacity(); + v.push_back(i); + if (v.capacity() != old_cap) { + std::cout << "Capacity changed: " << old_cap << " -> " << v.capacity() << '\n'; + } + } +} +``` + +So what is this multiplier exactly? Sorry, **the standard doesn't specify** (strictly speaking, it's *unspecified*, which is looser than *implementation-defined*; the latter at least requires the implementation to document it). So the three big players chose their own paths: libstdc++ and libc++ are roughly 2× (formulas are `2 * capacity` and `capacity + capacity / 2` respectively), while MSVC STL uses 1.5× (`capacity + capacity / 2`). If you don't believe me, `push_back` 16 elements in a row and print `capacity()`—libstdc++/libc++ follow the sequence 1, 2, 4, 8, 16, while MSVC follows 1, 2, 3, 4, 6, 9, 13. + +MSVC choosing 1.5× wasn't a random decision. When the multiplier is strictly less than 2, previously freed empty blocks might be reused by a later allocation—mathematically, `current_capacity < 2 * previous_capacity`. + +This means a historically freed block might be large enough to satisfy the current request, allowing the allocator to reuse it, reducing fragmentation, and preventing RSS (Resident Set Size) from staying too high. With strict 2×, `current_capacity >= 2 * previous_capacity`, so no previously freed block can fit the current request; reuse is impossible. The cost, of course, is that 1.5× involves more moves. This is a trade-off between "memory reuse" and "number of moves," and each vendor has their own calculation. (There's a small edge case: the first time `push_back` jumps from capacity 0 to 1, all three agree. This is purely a special case of "initially 0", so don't use that to verify the 2×/1.5× rule.) + +> ⚠️ Let me repeat: when writing performance conclusions, please use "amortized constant". Don't write "constant" just to save space. The single `push_back` that triggers reallocation is genuinely O(n). + +## Iterator Invalidation: A Table Summarizes All Rules + +Probably no container is easier to trip up on "iterator invalidation" than `std::vector`—you store an iterator or a pointer, and after some operation, it silently becomes a wild pointer. The rules can actually be summarized in a table: + +| Operation | When Invalidation Occurs | Scope of Invalidation | +|------|---------|---------| +| `push_back` / `emplace_back` | Only when reallocation is triggered | **All** if triggered; **None** if not triggered (space remains) | +| `resize` | When `resize` triggers reallocation | All if triggered; otherwise none | +| `reserve` | If reallocation occurs | All | +| `insert` | If `size() + n` triggers reallocation | All if triggered; otherwise references/pointers remain valid, only past-the-end iterators are invalidated | +| `pop_back` / `erase` | Always | **Deleted element and everything after it** are invalidated | +| `assign` | If reallocation | All if triggered; otherwise `position` and after are invalidated | +| `clear` | Always | All | +| `swap` / `std::swap` | Always | All (iterators point to the *other* container now) | +| `operator=` | —— | **Does not invalidate**: Iterators/pointers/references remain valid, but they now point to elements in the "other" container | + +Think the table is too dense? Compress it into a decision tree and it's easier to remember: + +```mermaid +graph TD + A[Operation on vector] --> B{Does it change size?} + B -- No --> C[No Invalidation] + B -- Yes --> D{Does it change capacity?} + D -- No --> E[Invalidate elements at/after modification point] + D -- Yes --> F[Invalidate All] +``` + +The easiest one to misremember in the table is the last one, `swap`. It doesn't invalidate in the traditional sense—you swapped away the container's contents, but the iterator is still pinned to the original memory address. So now it points to the element inside the container that was swapped in. Once you understand this, you can see why some libraries write weird-looking code like `std::vector().swap(v)` to "truly free" memory: it swaps in an empty temporary object, taking the original buffer and capacity away to be destructed, leaving things squeaky clean. + +## `move_if_noexcept` During Reallocation + +The strong exception guarantee requires that an operation either succeeds completely or leaves the state unchanged. When `std::vector` triggers reallocation, it must move old elements to the new buffer one by one. This step is a potential exception throwing point. To achieve "rollback if moving fails halfway", the standard library makes a critical judgment on each element during reallocation: **If the element's move constructor is `noexcept`, then move; otherwise, honestly fall back to copy.** + +The basis for this judgment is `std::is_nothrow_move_constructible_v`. Translating this—if you wrote a move constructor for your type but didn't mark it `noexcept`, `std::vector` will get nervous during reallocation and would rather take the slower copy path. Why? If a copy fails, the old buffer is still there, so we can roll back. If a move fails, the source element might have been gutted already, making recovery impossible. So my advice is simple: if you can add `noexcept` to a move constructor, definitely do it. It directly decides whether reallocation in `std::vector` is a "move" (fast) or a "copy" (slow). The standard library specifically prepared a `std::move_if_noexcept` tool for this, though its real stage is exactly this job inside containers of "choosing between move/copy based on exception safety". + +## Two New Doors C++20 Opened for Vector + +### One Door is `constexpr vector` + +C++20 finally allows `std::vector` to be used at compile time. Behind this are two proposals接力: **P0784R7** "More constexpr containers" first paved the way—making `allocator`'s `allocate`/`deallocate` and `allocator_traits`'s `select_on_container_copy_construction` `constexpr`, plus a model called *transient constexpr allocation*; **P1004R2** "Making std::vector constexpr" then built on this mechanism to mark `std::vector` (and `std::string`'s) member functions as `constexpr` one by one. To detect support, check the `__cpp_lib_constexpr_vector` feature test macro. + +There is a limitation here that **must be clarified**: the transient allocation model requires that *memory allocated during constant evaluation must be released before the end of that same constant evaluation*, otherwise the program is ill-formed. In plain English—you cannot define a persistent `constexpr std::vector` variable and "bring" its buffer of heap objects out of compile time. So how do we actually use `std::vector` at compile time? The correct way is: inside a `constexpr` function, temporarily create it, perform a bunch of operations, and finally **return only a scalar result** (sum of elements, count, a specific element value, etc.), letting the buffer destruct itself before the function returns. This fits embedded systems and lookup table scenarios perfectly—use `std::vector` as a temporary workspace at compile time to calculate a constant, then move the result into a `std::array` or `constexpr` variable, saving all runtime initialization costs. + +### The Other Door is `erase` / `erase_if` + +In old C++, to delete all elements satisfying a condition from a `std::vector`, you had to hand-write the famous erase-remove idiom: `v.erase(std::remove(v.begin(), v.end(), value), v.end());`. It's long and error-prone—I've seen accidents where people forget the second `v.end()` or forget to wrap the outer `erase`. C++20 incorporated this with a pair of free functions: `std::erase` deletes all elements equal to a value, `std::erase_if` deletes all elements satisfying a predicate, and both return the number of elements erased. + +These functions come from proposal **P1209R0**, titled "Adopt Consistent Container Erasure from Library Fundamentals 2 for C++20"—just looking at the title you know their intent: to formally land the unified erasure API that was originally in the Library Fundamentals TS into C++20. cppreference has a crisp definition for them: they *"erase all elements that compare equal to value / satisfy the predicate from the container"*, replacing that error-prone erase-remove. Don't get one detail mixed up: sequence containers (`vector`, `deque`, `forward_list`, `list`, `string`) get both `std::erase` and `std::erase_if`, while associative/unordered associative containers only get `std::erase_if`—because their member `erase` was already doing "delete by key", and stuffing another `std::erase` in would cause semantic conflict. To detect support, check `__cpp_lib_erase_if` (C++20, value `202002L`). + +------ + +## Let's Run It + +Talk is cheap. Below are a few snippets marked with platform and standard that can be compiled standalone. We'll run through the previous concepts one by one. + +First, observe reallocation. Print a line every time capacity changes, and you can intuitively see whether yours is 2× or 1.5×. + +```cpp +// Run this to see the capacity growth sequence +#include +#include + +int main() { + std::vector v; + size_t old_cap = 0; + for (int i = 0; i < 100; ++i) { + v.push_back(i); + if (v.capacity() != old_cap) { + std::cout << "Size: " << v.size() << ", New Capacity: " << v.capacity() << '\n'; + old_cap = v.capacity(); + } + } +} +``` + +Second, compare the two scenarios of iterator invalidation. `push_back` doesn't invalidate when there's space, but invalidates all once reallocation triggers; `insert` inevitably swaps buffers once it exceeds current capacity. + +```cpp +// Iterator invalidation demo +#include +#include + +int main() { + std::vector v = {1, 2, 3}; + + // Scenario 1: push_back without reallocation + auto it1 = v.begin(); + v.push_back(4); // No reallocation, it1 remains valid + std::cout << "After push_back (no realloc): " << *it1 << '\n'; + + // Scenario 2: push_back triggering reallocation + v.shrink_to_fit(); // Force tight capacity + it1 = v.begin(); + v.push_back(5); // Likely triggers reallocation + if (v.begin() != it1) { + std::cout << "Iterator invalidated after reallocation!\n"; + } +} +``` + +Third, `move_if_noexcept`. For a type with a move constructor marked `noexcept`, reallocation uses move; without it, it falls back to copy. + +```cpp +// move_if_noexcept behavior +#include +#include +#include + +struct Copyable { + std::string data; + // Move constructor NOT noexcept (implicitly noexcept(false) if it can throw) + Copyable(std::string s) : data(s) {} + Copyable(const Copyable& other) : data(other.data) { std::cout << "Copied\n"; } + Copyable(Copyable&& other) noexcept(false) : data(std::move(other.data)) { std::cout << "Moved\n"; } +}; + +struct Movable { + std::string data; + Movable(std::string s) : data(s) {} + Movable(const Movable& other) : data(other.data) { std::cout << "Copied\n"; } + Movable(Movable&& other) noexcept : data(std::move(other.data)) { std::cout << "Moved\n"; } +}; + +int main() { + std::cout << "Testing Copyable (noexcept(false)):\n"; + std::vector v1; + v1.reserve(1); + v1.emplace_back("A"); + v1.emplace_back("B"); // Triggers reallocation, should see "Copied" + + std::cout << "\nTesting Movable (noexcept(true)):\n"; + std::vector v2; + v2.reserve(1); + v2.emplace_back("A"); + v2.emplace_back("B"); // Triggers reallocation, should see "Moved" +} +``` + +Fourth, `constexpr vector`. Use it as a temporary workspace at compile time, bringing out only the scalar result. + +```cpp +// constexpr vector usage (C++20) +#include +#include + +constexpr int sum_vector() { + std::vector v; + for (int i = 0; i < 10; ++i) { + v.push_back(i); + } + // Calculate sum, buffer is destroyed after return + return std::accumulate(v.begin(), v.end(), 0); +} + +int main() { + constexpr auto sum = sum_vector(); + static_assert(sum == 45, "Sum check"); +} +``` + +Fifth, `erase_if`, one line to replace erase-remove. + +```cpp +// std::erase_if usage (C++20) +#include +#include + +int main() { + std::vector v = {1, 2, 3, 4, 5, 6}; + + // Remove all even numbers + auto erased_count = std::erase_if(v, [](int x) { return x % 2 == 0; }); + + std::cout << "Erased " << erased_count << " elements.\n"; + for (auto x : v) std::cout << x << ' '; // 1 3 5 +} +``` + +Of course, you can also click this to see the phenomenon! + + + +------ + +## Final Thoughts + +Piecing these back into engineering practice, my advice usually boils down to a few points. First, **if you can estimate the scale, `reserve` it**—right after construction, `reserve` based on the known or estimated final size, compressing several reallocations into one allocation. The effect on hot paths is immediate. Second, **use `std::erase_if` to delete elements**, stop handwriting erase-remove; it's shorter and harder to miss that second `end()`. Third, **for compile-time table generation, use `std::vector` as a temporary zone**, calculate and hand only the scalar result to `std::array` or stuff it into a `constexpr` variable, comfortably enjoying the compile-time dynamic capability given by transient allocation without crossing the line. + +Finally, leave you with this impression: `std::vector`'s body is roughly three pointers (`start`, `finish`, `end_of_storage`); `size()`/`capacity()` are calculated from them. `push_back` is amortized constant, not constant; the growth multiplier isn't specified by the standard (libstdc++/libc++ use 2×, MSVC uses 1.5×). Invalidation rules are just one table—reallocation operations "invalidate all only if triggered", `erase` invalidates "deleted and after", `swap` doesn't invalidate at all. Whether elements move during reallocation depends on if the move constructor is marked `noexcept`. C++20 makes `std::vector` `constexpr` (P0784R7 + P1004R2), but limited by transient allocation to be a compile-time temporary zone; in the same year, `std::erase`/`std::erase_if` (P1209R0) took care of erase-remove for you. With these in your pocket, you'll basically avoid all `std::vector` pitfalls. + +------ + +## Reference Resources + +- [std::vector — cppreference](https://en.cppreference.com/w/cpp/container/vector) +- [vector::capacity — cppreference](https://en.cppreference.com/w/cpp/container/vector/capacity) +- [vector::push_back — cppreference](https://en.cppreference.com/w/cpp/container/vector/push_back) +- [std::erase / std::erase_if (vector) — cppreference](https://en.cppreference.com/w/cpp/container/vector/erase2) +- [vector.capacity — eel.is/c++draft](https://eel.is/c++draft/vector.capacity) · [sequence.reqmts — eel.is/c++draft](https://eel.is/c++draft/sequence.reqmts) +- [P0784R7 More constexpr containers](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p0784r7.html) +- [P1004R2 Making std::vector constexpr](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1004r2.pdf) +- [P1209R0 Adopt Consistent Container Erasure from Library Fundamentals 2 for C++20](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p1209r0.html) diff --git a/documents/en/vol3-standard-library/02-string-memory-deep-dive.md b/documents/en/vol3-standard-library/02-string-memory-deep-dive.md new file mode 100644 index 000000000..49ca8a0f5 --- /dev/null +++ b/documents/en/vol3-standard-library/02-string-memory-deep-dive.md @@ -0,0 +1,155 @@ +--- +chapter: 7 +cpp_standard: +- 11 +- 14 +- 17 +- 23 +description: A deep dive into the history and entanglement of `std::string`'s SSO + and COW, why C++11 forbids COW, SSO threshold implementation details, and buffer + reuse in C++23's `resize_and_overwrite`. +difficulty: intermediate +order: 2 +platform: host +prerequisites: +- 卷一:std::string 基础用法 +reading_time_minutes: 14 +tags: +- host +- cpp-modern +- intermediate +- 内存管理 +title: 'Deep Dive into std::string: SSO, COW, and resize_and_overwrite' +translation: + source: documents/vol3-standard-library/02-string-memory-deep-dive.md + source_hash: 8887bdd7d4e968834210afa3b7772627cb52ccefe9b91e5f1add4ac46722cc51 + translated_at: '2026-06-14T00:19:45.074884+00:00' + engine: anthropic + token_count: 1659 +--- +# Deep Dive into string: SSO, COW, and resize_and_overwrite + +`std::string` is likely the most heavily used type in the standard library, yet it is often the least understood. We happily write `std::string` code all day long, but when pressed with questions like—"Why is `sizeof(std::string)` 32 on my machine?", "Why do two strings in old code share the same buffer?", or "What exactly does C++23's `resize_and_overwrite` save?"—most of us are stumped. The root of these issues lies in the memory model and history of `std::string`. + +In this article, we will focus on the memory and buffer story of `std::string`: the historical entanglement of SSO and COW, implementation thresholds for SSO, and the buffer reuse API `resize_and_overwrite` introduced in C++23. (C++20's `std::u8string` is a separate topic; see Volume 3 [char8_t and UTF-8 Strings](./03-char8-t-utf8.md).) + +------ + +## SSO and COW: An ABI History + +To understand why `std::string` looks the way it does today, we need to turn the clock back to C++03. Back then, there was a particularly attractive implementation approach—**Copy-On-Write (COW)**: when you copied a `std::string`, it didn't actually copy the characters. Instead, it let the source and destination share a single read-only buffer, maintaining only an extra reference count. Only when one side needed to write would it perform a deep copy. In scenarios with heavy copying of read-only strings, this saved significant memory and time, and early libstdc++ (GCC's C++ Standard Library) was a staunch proponent of COW. + +```cpp +// COW era: Copying is cheap (just a pointer and ref count increment) +std::string s1 = "Hello, World!"; +std::string s2 = s1; // No character copy happens here +``` + +However, the C++11 standard effectively ruled COW "illegal." Proposal **N2668**, "Concurrency Modifications to Basic String," rewrote the invalidation rules for `std::string` and the semantics of `data()`/`c_str()`. The text stated unequivocally: *"This change effectively disallows copy-on-write implementations."* What was the legal root cause? I must remind you: many assume it's "thread safety" or "reference counting," but those are merely side issues that amplified the conflict. The real criteria are these three rules combined: + +- **Invalidation Rules**: The standard specifies that calling element access methods like `at()`, `front()`, `back()`, `operator[]`, and iterators, as well as `data()` itself, must not invalidate existing references and iterators. +- **Contiguous Null-Termination of `data()`/`c_str()`**: They must return a pointer to a contiguous, null-terminated array within the object's buffer. +- **Non-const Access Requires a Writable Pointer**: Once you use `operator[]` or `data()` to get a non-const pointer, COW is forced to *unshare* (deep copy) the shared buffer to provide you with an exclusive, contiguous, writable pointer. + +```cpp +// C++11 requires non-const access to return a pointer to the *actual* buffer +std::string s = "hello"; +char* p = &s[0]; // COW must unshare here to satisfy C++11 guarantees +p[0] = 'H'; // Must modify 's' directly, not a shared copy +``` + +As you can see, COW trying to embrace "sharing," "non-invalidating references," "O(1)," and "contiguous null-termination" simultaneously is a contradiction. The standard decisively chose the latter three, making COW non-conforming. In reality, the transition was turbulent: due to ABI compatibility baggage, libstdc++ dragged its feet until **GCC 5 (2015)** to switch to a non-COW implementation via the `_GLIBCXX_USE_CXX11_ABI` switch (the new inline symbols are `std::__cxx11::string`); libc++ and MSVC's Dinkumware implementation, however, used SSO from the start, avoiding this historical debt entirely. + +## SSO Thresholds: Why is sizeof 32? + +With COW retired, mainstream implementations shifted uniformly to **SSO (Small String Optimization)**: reserving a small inline buffer inside the `std::string` object. Strings short enough to fit in this buffer avoid heap allocation and are stored directly within the object itself. This also answers "Why `sizeof(std::string)` is 32"—the object must simultaneously hold the inline buffer, a heap pointer, size, and capacity fields. Mainstream implementations stuff all of this into approximately 32 bytes. + +I should mention: the SSO threshold is an **implementation detail; the standard never specifies it** (it falls under QoI, Quality of Implementation). In mainstream implementations, libstdc++, libc++, and MSVC STL all have thresholds around 15 bytes (libc++ also has a layout variant with 22 bytes). These numbers are not promises and may change across implementations or versions—so, mark my words—**don't use these thresholds as hard assumptions in your code**. It might be 15 today, but it might not be tomorrow with a different compiler. + +## resize_and_overwrite: C++23 Finally Lets You Use string as a Buffer + +C++23 added a quite handy member to `std::string`: `resize_and_overwrite`, proposed in **P1072R10** "basic_string::resize_and_overwrite". Its most typical use case is treating `std::string` as a writable buffer to interface with C APIs that "write some data, then tell you how much" (like `snprintf()`, `std::strftime()`, `getcwd()`). + +The signature looks like this: `void resize_and_overwrite(size_t n, Operation op)`. It first expands the string capacity to at least `n`, then passes a pointer `p` (pointing to the first character of contiguous storage) and that `n` to the callback `op`. `op` writes the actual content in-place and then **returns an integer r as the new length** (requiring `r <= n`). What's the benefit? Unlike `resize()`, it **does not** value-initialize (zero out) the new region, saving an extra write operation. You only write the bytes you actually need in the callback, then report the actual length. + +Freedom comes with a price; `resize_and_overwrite` has several UB red lines to watch out for: `op` must return an integer within `[0, n]`; going out of bounds is undefined behavior. `op` throwing an exception is UB (so `op` is usually marked `noexcept`). `op` cannot modify the `p` or `n` parameters themselves. Finally, every character in the preserved range `[0, r)` must be a determinate value written by `op`; indeterminate values are not allowed. Also, easily overlooked—whether this call triggers reallocation or not, it invalidates all iterators, pointers, and references. To detect support, check `__cpp_lib_string_resize_and_overwrite` (C++23, value `202110L`). + +------ + +## Let's Run It + +First, let's look at SSO. Print `sizeof(std::string)` and check the `data()` address of short and long strings to see if they land inside the object. + +```cpp +#include +#include +#include + +void observe_sso() { + std::cout << "sizeof(std::string) = " << sizeof(std::string) << std::endl; + + std::string short_str = "short"; + std::string long_str = "This is a very long string that definitely exceeds the small string optimization buffer..."; + + std::cout << "Short string (" << short_str << ") data addr: " << static_cast(short_str.data()) << std::endl; + std::cout << "Long string (" << long_str.substr(0, 20) << "... ) data addr: " << static_cast(long_str.data()) << std::endl; + + // A rough check: if the address is far from the stack address of the string object, it's likely on the heap + std::cout << "Address of short_str object: " << static_cast(&short_str) << std::endl; + std::cout << "Address of long_str object: " << static_cast(&long_str) << std::endl; +} +``` + +Now let's compare `resize_and_overwrite` with the old `resize` approach. I've crafted a "mock C API" here—it writes fixed content to a buffer and returns the actual bytes written—to make the difference between the two methods obvious. + +```cpp +#include +#include +#include + +// Mock C API: Writes "Hello" into the buffer and returns length 5 +size_t mock_c_api_write(char* buffer, size_t buffer_size) { + const char* msg = "Hello"; + size_t len = strlen(msg); + if (len > buffer_size) len = buffer_size; + memcpy(buffer, msg, len); + return len; +} + +void test_resize_and_overwrite() { + std::string s; + + // Old way (C++20): resize() initializes memory (wasteful) + s.resize(32); // Reserves space and zero-fills 32 bytes + size_t written = mock_c_api_write(s.data(), s.size()); + s.resize(written); // Trim to actual size + std::cout << "Old resize result: " << s << std::endl; + + // New way (C++23): resize_and_overwrite() avoids initialization + s.clear(); + s.resize_and_overwrite(32, [](char* p, size_t n) { + // p points to raw storage, n is 32. No zero-filling happened. + return mock_c_api_write(p, n); + }); + std::cout << "New resize_and_overwrite result: " << s << std::endl; +} +``` + + + +------ + +## References + +- [std::basic_string — cppreference](https://en.cppreference.com/w/cpp/string/basic_string) +- [basic_string::data — cppreference](https://en.cppreference.com/w/cpp/string/basic_string/data) +- [basic_string::resize_and_overwrite — cppreference](https://en.cppreference.com/w/cpp/string/basic_string/resize_and_overwrite) +- [N2668 Concurrency Modifications to Basic String](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2668.htm) +- [P1072R10 basic_string::resize_and_overwrite](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p1072r10.html) diff --git a/documents/en/vol3-standard-library/03-char8-t-utf8.md b/documents/en/vol3-standard-library/03-char8-t-utf8.md new file mode 100644 index 000000000..4620357db --- /dev/null +++ b/documents/en/vol3-standard-library/03-char8-t-utf8.md @@ -0,0 +1,129 @@ +--- +chapter: 7 +cpp_standard: +- 20 +- 23 +description: Explains the rationale behind C++20's `char8_t`, the two pitfalls and + migration patterns for the `u8` literal type changes, and the relaxation of array + initialization in C++23's P2513. +difficulty: intermediate +order: 3 +platform: host +prerequisites: +- 卷一:std::string 与字符串字面量基础 +reading_time_minutes: 12 +tags: +- host +- cpp-modern +- intermediate +- 类型安全 +title: char8_t and UTF-8 Strings +translation: + source: documents/vol3-standard-library/03-char8-t-utf8.md + source_hash: bf65e1fa69d057d8e2387796ce4ed2c2c677e348f2808d359b0b024109c38afc + translated_at: '2026-06-14T00:19:58.325857+00:00' + engine: anthropic + token_count: 1220 +--- +# char8_t and UTF-8 Strings + +Before C++20, the type of the UTF-8 string literal `u8"..."` was `const char[]`—which is fundamentally no different from ordinary strings. This might sound trivial, but it is actually the root of many pitfalls: you cannot distinguish at the type level whether "this string is UTF-8" or "this string is the native execution character set," and the compiler cannot help you prevent errors where UTF-8 is incorrectly treated as raw bytes. C++20 introduced `char8_t` to separate UTF-8 from the ambiguous zone of `char`, giving it a dedicated type so the type system can guard us for you. This change comes from proposal **P0482R6** "char8_t: A type for UTF-8 characters and strings". To detect support, check `__cpp_char8_t` (C++20, value `201811`). + +However—I must issue a warning in advance—this "independent type" change is **breaking**: it altered the type of `u8""` string literals, causing a large amount of legacy code that compiled peacefully under C++17 to fail immediately when upgraded to C++20. In this article, we will clearly explain the two most common pitfalls, how to migrate code, and the fix C++23 applied later. + +------ + +## u8 Literals: The Type Transformation + +Starting with C++20, the type of the UTF-8 string literal `u8"..."` changed from `const char[]` to `const char8_t[]`; the type of the UTF-8 character literal `u8'x'` also changed from `char` to `char8_t`. This `char8_t` is a **distinct fundamental type** with an underlying type of `unsigned char`. Its size, alignment, and conversion rank are all consistent with `char`—but it **does not participate in aliasing rules** (it is not one of the types allowed for alias access in [basic.lval]), meaning you cannot use `char8_t*` to legally alias access the memory of other objects. + +Why go to such lengths to create a separate type? The reason is simple: once types are separated, the compiler can directly report errors for mistakes like "treating a UTF-8 string as a native encoding `char` string" or "printing `char8_t` as an integer," rather than waiting for runtime to output a screen full of garbage before you realize the mistake. C++20 decided that trading a bit of migration cost for type safety is worth it. + +## Two Classic Pitfalls + +With the type change, two migration pitfalls surface. + +**The first pitfall: `char8_t*` can no longer implicitly convert to `char*`.** In C++17, `char* p = u8"foo";` was completely legal (back then `u8""` and `""` were still family); in C++20, `u8"foo"` becomes `const char8_t*`, and `char8_t*` will not implicitly convert to `char*`, making this line ill-formed. All old code that feeds `u8""` literals to interfaces expecting `char*` (constructing `std::string`, passing to C APIs, certain overloads of `std::filesystem::path`, etc.) gets caught. + +**The second pitfall: the Standard Library intentionally **deleted** `char8_t` `ostream` overloads.** You might think—then I'll just `std::cout << u8"text"` print it? That won't work either. Starting with C++20, the Standard Library **explicitly deleted** the `operator<<` overloads for `char8_t` and `char8_t` sequences (UTF-8 characters/strings) on `std::ostream` and `std::wostream` (note, this isn't "forgot to implement," it's intentional). Consequently, `std::cout << u8'x'` and `std::cout << u8"text"` will fail to compile because they hit the deleted overload. This was done specifically to stop legacy code from blindly printing UTF-8 data as integers or pointers. + +## How to Migrate Legacy Code + +Facing these two pitfalls, how do we move C++17 code to C++20? Here are a few paths, listed from lowest to highest cost: + +1. **Compiler Flag Rollback**: The easiest is to revert via compiler options: add `-fchar8_t-diagnostics` or `-fno-char8_t` on GCC/Clang, or `/Zc:char8_t-` on MSVC. This reverts the type of `u8""` literals back to C++17 `const char*` semantics, so old code compiles immediately. This is only a stopgap for the transition period; don't rely on it for new code long-term. +2. **Explicit Byte-by-Byte Conversion**: When you truly need to feed an interface that only recognizes `char*` and you know the content is UTF-8 bytes, use `reinterpret_cast` (or a C-style cast) to switch the view—the byte content remains unchanged, just the pointer type changes, bypassing the "first pitfall." +3. **The "Politically Correct" Path: `std::u8string`**: Use `std::u8string`/`std::u8string_view` to hold UTF-8 text type-safely. When printing, write a small helper function to convert it out, maintaining type safety to the end. + +## C++23's P2513: A Partial Fix + +The scope of "cannot initialize" in the "first pitfall" was later narrowed slightly. Proposal **P2513R4** "char8_t Compatibility and Portability," adopted as a Defect Report (DR) for C++20 and landing in C++23 (the value of `__cpp_char8_t` also changed to `202311`), **re-allows using `u8""` string literals to initialize `char` or `char8_t` arrays**—meaning `char a[] = u8"foo";` is legal again. However, note that this only relaxes "array initialization"; the implicit conversion from `char8_t*` to `char*` **remains ill-formed**, so the pointer assignment scenario in pitfall one was not let off the hook. + +------ + +## Try It Out + +The demo below places the two pitfalls (which I have "sealed" with comments—uncomment them to cause immediate compilation failure) and two correct ways of writing them side-by-side for easy comparison. + +```cpp +#include +#include +#include + +// Helper to print UTF-8 safely +void print_utf8(const char8_t* str) { + // Cast is safe here because we know the platform console handles UTF-8 + // (or we are just treating it as a byte sequence for demonstration) + std::cout << reinterpret_cast(str); +} + +int main() { + // --- Pitfall 1: Implicit conversion failure --- + // In C++17: char* s = u8"Hello"; // OK + // In C++20: char* s = u8"Hello"; // ERROR: char8_t* cannot convert to char* + + // Fix A: Explicit cast (Use with caution, ensure data is actually UTF-8) + const char* s1 = reinterpret_cast(u8"Hello"); + std::cout << "Fix A (Cast): " << s1 << std::endl; + + // Fix B: Use std::u8string (Type safe) + std::u8string u8s = u8"Hello UTF-8"; + // std::cout << u8s; // ERROR: operator<< deleted + print_utf8(u8s.c_str()); + std::cout << std::endl; + + + // --- Pitfall 2: Deleted std::cout overloads --- + // std::cout << u8'x'; // ERROR: operator<< deleted + // std::cout << u8"text"; // ERROR: operator<< deleted + + // Fix: Cast to const char* for printing (assuming environment supports UTF-8) + std::cout << "Fix B (Print): " << reinterpret_cast(u8"text") << std::endl; + + + // --- C++23 Update: Array Initialization --- + // P2513R4 allows this again in C++23 + char arr[] = u8"Array Init"; // OK in C++23 (and usually in C++20 with DR) + std::cout << "Array Init: " << arr << std::endl; + + return 0; +} +``` + + + +------ + +## Reference Resources + +- [char8_t — cppreference](https://en.cppreference.com/w/cpp/keyword/char8_t) +- [String literal — cppreference](https://en.cppreference.com/w/cpp/language/string_literal) +- [operator<<(basic_ostream) — cppreference](https://en.cppreference.com/w/cpp/io/basic_ostream/operator_ltlt2) +- [P0482R6 char8_t: A type for UTF-8 characters and strings](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html) +- [P2513R4 char8_t Compatibility and Portability](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2513r4.html) diff --git a/documents/en/vol3-standard-library/index.md b/documents/en/vol3-standard-library/index.md index 93b0ecc1c..06cb602df 100644 --- a/documents/en/vol3-standard-library/index.md +++ b/documents/en/vol3-standard-library/index.md @@ -1,6 +1,6 @@ --- -title: 'Part 3: Deep Dive into the Standard Library' -description: In-depth guide to STL containers, iterators, and algorithms +title: 'Volume Three: Deep Dive into the Standard Library' +description: Deep dive into STL containers, iterators, and algorithms platform: host tags: - cpp-modern @@ -8,25 +8,31 @@ tags: - intermediate translation: source: documents/vol3-standard-library/index.md - source_hash: a26180b80dd7a8f375637a8f3f5da60aef0e12f691b99413bddc1bae569dc22e - translated_at: '2026-05-26T11:37:46.849491+00:00' + source_hash: 6d341685c142401849898de762ab55be39b76fa975b91a23fd8882c728446c2e + translated_at: '2026-06-14T00:20:03.639470+00:00' engine: anthropic - token_count: 186 + token_count: 207 --- # Volume 3: Deep Dive into the Standard Library -> Status: Partial content available (pending rewrite) - ## Overview -This volume provides a deep dive into the C++ standard library. +This volume provides an in-depth look at the C++ Standard Library, focusing on the implementation details of containers and strings. + + + vector Deep Dive + string Deep Dive + char8_t and UTF-8 + + +## Articles to be Rewritten -## Existing Articles (Pending Rewrite to Generic Content) +The following are early drafts, planned to be rewritten and merged into the main chapter sequence. - Initializer Lists - Object Size and Trivial Types - array - span - Custom Allocators + array (To be rewritten) + initializer_list (To be rewritten) + span (To be rewritten) + Object Size and Trivial Types (To be rewritten) + Custom Allocators (To be rewritten) diff --git a/documents/en/vol5-concurrency/exercises/02-atomic-spsc.md b/documents/en/vol5-concurrency/exercises/02-atomic-spsc.md index 4ad9782f1..fa0f7bdcb 100644 --- a/documents/en/vol5-concurrency/exercises/02-atomic-spsc.md +++ b/documents/en/vol5-concurrency/exercises/02-atomic-spsc.md @@ -1,7 +1,7 @@ --- title: 'Lab 2: Atomic Metrics and SPSC Ring Buffer' -description: Master `atomic`, `memory_order`, false sharing, and benchmarking methodologies - through atomic counters and single-producer single-consumer ring buffers. +description: Master atomic, memory_order, false sharing, and benchmarking methodologies + via atomic counters and single-producer single-consumer ring buffers. chapter: 10 order: 2 difficulty: intermediate @@ -20,460 +20,286 @@ prerequisites: - 'Lab 0: Thread Lifecycle Lab' translation: source: documents/vol5-concurrency/exercises/02-atomic-spsc.md - source_hash: e9637902896206d664a2352ebed7453d17944c77adf70dd54cd65b5e523eb664 - translated_at: '2026-05-26T11:47:59.471636+00:00' + source_hash: adad8f737d9d3ef0b4cce931937876d7cf38f554eb2e1aaa2041d918845dec4c + translated_at: '2026-06-14T00:20:24.057615+00:00' engine: anthropic - token_count: 3232 + token_count: 3311 --- # Lab 2: Atomic Metrics and SPSC Ring Buffer ## Objectives -In Lab 1, we relied entirely on mutex and condition_variable—locking, waiting, and waking up. While the logic is clear, the overhead is significant. Every lock/unlock operation involves a system call into kernel space (futex). In extremely high-frequency scenarios (such as millions of messages per second), this overhead becomes unacceptable. In this lab, we enter a different world: using atomic operations and memory order to implement lock-free data exchange. +In Lab 1, we relied entirely on mutexes and condition variables—locking, waiting, and waking up. While the logic is clear, the overhead is significant. Every lock/unlock operation involves system calls in kernel mode (futex), which is unacceptable in high-frequency scenarios (e.g., passing millions of messages per second). In this Lab, we enter a different world: using atomic operations and memory ordering to implement lock-free data exchange. -We will first implement a set of atomic metric components—counters, max value trackers, and stop flags—which will be used repeatedly for performance monitoring in subsequent labs. Then, we will implement a fixed-capacity SPSC (Single-Producer Single-Consumer) ring buffer, using acquire-release semantics to guarantee data visibility and cache line padding to eliminate false sharing. Finally, we will run a benchmark comparison against the mutex-based queue from Lab 1, using data to illustrate the applicable scenarios for each approach. +We will first implement a set of atomic metric components—counters, maximum value trackers, and stop flags—which will be used repeatedly for performance monitoring in subsequent Labs. Then, we will implement a fixed-capacity SPSC (Single-Producer Single-Consumer) ring buffer, using acquire-release semantics to guarantee data visibility and cache line padding to eliminate false sharing. Finally, we will run benchmarks against the mutex-based queue from Lab 1 to demonstrate the applicable scenarios for each approach with real data. ## Prerequisites -Before starting, make sure you have read the following chapters: +Before starting, ensure you have read the following chapters: -- **ch03-01**: atomic operations — `atomic`, `load`/`store`/`fetch_add`, is_lock_free -- **ch03-02**: Memory order explained — semantics and overhead of relaxed, acquire-release, and seq_cst -- **ch03-03**: memory_order_fence and barriers — use cases for explicit fences -- **ch03-04**: atomic wait and reference semantics — `wait`/`notify_one`/`notify_all` -- **ch03-05**: Atomic operation patterns — common atomic usage patterns +- **ch03-01**: Atomic operations — `std::atomic`, `load`/`store`/`exchange`/`compare_exchange`, `is_lock_free` +- **ch03-02**: Memory ordering deep dive — Semantics and overhead of `relaxed`, `acquire-release`, `seq_cst` +- **ch03-03**: `memory_order_fence` and barriers — Use cases for explicit fences +- **ch03-04**: Atomic wait and reference semantics — `wait`/`notify`/`address` +- **ch03-05**: Atomic operation patterns — Common atomic usage patterns -This lab does not depend on Lab 1 components, but we recommend completing Lab 1 first to understand the baseline comparison for the mutex approach. +This Lab does not depend on components from Lab 1, but it is recommended to complete Lab 1 first to understand the baseline for benchmark comparison. ## Environment Setup -Same as Lab 1. Additionally, for the performance testing section, we recommend running on Linux (requires `perf stat` support). WSL2 users can use perf directly. +Same as Lab 1. Additionally, for performance testing, it is recommended to run on Linux (requires `perf` support). WSL2 users can use `perf` directly. -Disabling CPU frequency scaling can improve benchmark stability (requires sudo): +Disabling CPU frequency scaling can improve benchmark stability (requires `sudo`): ```bash -sudo cpupower frequency-set -g performance +sudo cpupower frequency-set --governor performance ``` -## Final Interfaces +## Final Interface ### `AtomicCounter` — Atomic Counter (Milestone 1) -Member variable: internally holds an `std::atomic`. +Member variable: Internally holds `std::atomic`. | Method | Signature | Description | Milestone | |------|------|------|-----------| -| Constructor | `AtomicCounter(size_t initial = 0)` | Sets the initial value | MS1 | -| increment | `void increment()` | Atomically increments (`relaxed`) | MS1 | -| decrement | `void decrement()` | Atomically decrements | MS1 | -| get | `size_t get() const` | Reads the current value | MS1 | -| exchange | `size_t exchange(size_t new_val)` | Atomically replaces and returns the old value | MS1 | +| Constructor | `AtomicCounter(T initial = 0)` | Set initial value | MS1 | +| increment | `void increment(T delta = 1)` | Atomic increment (`fetch_add`) | MS1 | +| decrement | `void decrement(T delta = 1)` | Atomic decrement | MS1 | +| get | `T get() const` | Read current value | MS1 | +| exchange | `T exchange(T desired)` | Atomically replace and return old value | MS1 | -### `AtomicMaxTracker` — Atomic Max Tracker (Milestone 1) +### `AtomicMax` — Atomic Maximum Tracker (Milestone 1) -Member variable: internally holds an `std::atomic`. +Member variable: Internally holds `std::atomic`. | Method | Signature | Description | Milestone | |------|------|------|-----------| -| Constructor | `AtomicMaxTracker(size_t initial = 0)` | Sets the initial max value | MS1 | -| update | `void update(size_t value)` | Updates max value via CAS loop | MS1 | -| get | `size_t get() const` | Reads the current max value | MS1 | +| Constructor | `AtomicMax(T initial = 0)` | Set initial maximum value | MS1 | +| update | `void update(T value)` | Update max via CAS loop | MS1 | +| get | `T get() const` | Read current maximum value | MS1 | -### `StopFlag` — Stop Flag (Milestone 1) +### `StopToken` — Stop Flag (Milestone 1) -Member variable: internally holds an `std::atomic`. +Member variable: Internally holds `std::atomic`. | Method | Signature | Description | Milestone | |------|------|------|-----------| -| request_stop | `void request_stop()` | Sets the stop flag (`release`) | MS1 | -| is_stop_requested | `bool is_stop_requested() const` | Checks if stop is requested (`acquire`) | MS1 | +| request_stop | `void request_stop()` | Set stop flag (`store true`) | MS1 | +| is_stop_requested | `bool is_stop_requested() const` | Check if stopped (`load`) | MS1 | -### `SpscRingBuffer` — SPSC Ring Buffer (Milestone 2–4) +### `SPSCRingBuffer` — SPSC Ring Buffer (Milestone 2–4) Member variables: | Type | Member | Semantics | |------|------|------| -| `std::array` | `buffer_` | Fixed-capacity storage (determined at compile time) | -| `alignas(64) atomic` | `head_` | Consumer read position (cache line padding added in MS4) | -| `alignas(64) atomic` | `tail_` | Producer write position (cache line padding added in MS4) | +| `std::array` | `buffer_` | Fixed capacity storage (compile-time determined) | +| `std::atomic` | `head_` | Consumer read position (MS4 add cache line padding) | +| `std::atomic` | `tail_` | Producer write position (MS4 add cache line padding) | Interface: | Method | Signature | Description | Milestone | |------|------|------|-----------| -| Constructor | `SpscRingBuffer()` | Initializes head/tail to 0 | MS2 | -| try_push | `bool try_push(T item)` | Non-blocking write, returns false if full | MS2 | -| try_pop | `std::optional try_pop()` | Non-blocking read, returns nullopt if empty | MS2 | -| empty | `bool empty() const` | Whether the buffer is empty | MS2 | -| full | `bool full() const` | Whether the buffer is full | MS2 | +| Constructor | `SPSCRingBuffer()` | Initialize head/tail to 0 | MS2 | +| try_push | `bool try_push(const T& value)` | Non-blocking write, return false if full | MS2 | +| try_pop | `std::optional try_pop()` | Non-blocking read, return nullopt if empty | MS2 | +| empty | `bool empty() const` | Is buffer empty? | MS2 | +| full | `bool full() const` | Is buffer full? | MS2 | ## Milestone 1: Atomic Metric Components ### Objectives -Implement three components: `AtomicCounter`, `AtomicMaxTracker`, and `StopFlag`. The key is choosing the appropriate memory order for each operation—not all operations need the default `seq_cst`. +Implement `AtomicCounter`, `AtomicMax`, and `StopToken`. The key is to choose the appropriate memory order for each operation—not all operations require the default `seq_cst`. ### Why -These three components are infrastructure tools for all subsequent labs. The thread pool needs `AtomicCounter` to count completed tasks, the echo server needs `AtomicMaxTracker` to track the maximum number of concurrent connections, and all labs need `StopFlag` to implement graceful shutdown. By implementing them correctly now, we avoid repeatedly struggling with memory order choices later. +These three components are infrastructure tools for all subsequent Labs. The thread pool needs `AtomicCounter` to count completed tasks, the echo server needs `AtomicMax` to track peak concurrent connections, and all Labs need `StopToken` for graceful shutdown. Getting them right now means we won't have to struggle with memory order choices later. ### Implementation Guide -For `AtomicCounter`, using `fetch_add(1, std::memory_order_relaxed)` for `increment` is sufficient—we only care about the accuracy of the count and do not need to establish a synchronization relationship with other variables. The same logic applies to using `load(std::memory_order_relaxed)` for `get`. This is because a relaxed atomic guarantees atomicity (no torn reads or writes) but does not guarantee ordering with respect to other operations—for a pure counter, this is exactly what we want. +`AtomicCounter`'s `increment` can use `memory_order_relaxed`—we only care about the accuracy of the count, not establishing synchronization with other variables. `decrement` uses `relaxed` for the same reason. This is because relaxed atomics guarantee atomicity (no torn reads/writes), but not ordering with respect to other operations—which is exactly what we want for a pure counter. -`AtomicMaxTracker` is slightly more complex. `update` requires a CAS loop: read the current max value, attempt to replace it if the new value is larger, and retry if another thread beats us to it. Using `compare_exchange_weak` is fine here—the CAS loop inherently handles failure retries, so the spurious failures of the weak version are not an issue. +`AtomicMax` is slightly more complex. `update` needs a CAS loop: read the current max, if the new value is larger, try to replace it; if another thread beats us to it, retry. `compare_exchange_weak` is fine here—the CAS loop handles retries, so the spurious failure of the weak version isn't an issue. ```cpp -void update(size_t value) { - size_t current = max_.load(relaxed); - while (value > current) { - if (max_.compare_exchange_weak(current, value, - relaxed, relaxed)) { - break; +void update(T value) { + T old = max_.load(std::memory_order_relaxed); + while (value > old) { + if (max_.compare_exchange_weak(old, value, std::memory_order_relaxed)) { + return; } } } ``` -`StopFlag` is the simplest—a `atomic`, where `request_stop` uses `store(true, release)` and `is_stop_requested` uses `load(acquire)`. The acquire-release pair is meaningful here: all write operations before `request_stop` (such as cleaning up resources or setting state) become visible to the thread that calls `is_stop_requested` and sees `true`. +`StopToken` is the simplest—one `std::atomic`, `request_stop` uses `release`, `is_stop_requested` uses `acquire`. This acquire-release pair is meaningful: all writes before `request_stop` (like cleaning up resources, setting state) become visible to the thread calling `is_stop_requested` and seeing `true`. -### Validation +### Verification -```cpp -TEST_CASE("Milestone 1: AtomicCounter under contention", - "[lab2][milestone1]") -{ - AtomicCounter counter; - const int kThreads = 8; - const int kIncrements = 100000; - - std::vector threads; - for (int i = 0; i < kThreads; ++i) { - threads.emplace_back([&]() { - for (int j = 0; j < kIncrements; ++j) { - counter.increment(); - } - }); - } - - REQUIRE(counter.get() == - kThreads * kIncrements); -} - -TEST_CASE("Milestone 1: AtomicMaxTracker tracks global max", - "[lab2][milestone1]") -{ - AtomicMaxTracker tracker(0); - - std::vector threads; - for (int i = 0; i < 8; ++i) { - threads.emplace_back([&tracker, i]() { - tracker.update(i * 10 + 5); - }); - } - - // 最大值应该是 75 (7*10+5) - REQUIRE(tracker.get() == 75); -} - -TEST_CASE("Milestone 1: StopFlag signals stop", - "[lab2][milestone1]") -{ - StopFlag flag; - REQUIRE_FALSE(flag.is_stop_requested()); - - flag.request_stop(); - REQUIRE(flag.is_stop_requested()); -} +```bash +make test_milestone1 ``` ## Milestone 2: SPSC Ring Buffer Basics ### Objectives -Implement `try_push` and `try_pop` for `SpscRingBuffer`. Fixed capacity N, determined at compile time, with no blocking support—returns false if full, and nullopt if empty. In this milestone, we will not worry about memory order; we will use the default `seq_cst` for everything. +Implement `try_push` and `try_pop` for `SPSCRingBuffer`. Fixed capacity N, determined at compile time, no blocking support—return false if full, nullopt if empty. For this milestone, don't worry about memory order; use the default `seq_cst` everywhere. ### Why -SPSC is the simplest lock-free data structure—there is only one producer and one consumer, so we do not need to worry about multiple threads modifying the same position simultaneously. The producer only writes to `tail_`, and the consumer only writes to `head_`. They determine the buffer state by reading each other's index. This "each thread only writes to its own spot" design is the core pattern of lock-free programming—eliminating write contention. +SPSC is the simplest lock-free data structure—only one producer and one consumer, so we don't have to worry about multiple threads modifying the same location simultaneously. The producer only writes `tail_`, the consumer only writes `head_`, and they check the buffer state by reading the other's index. This design of "each thread only writes its own index" is a core pattern of lock-free programming—eliminating write contention. ### Implementation Guide -The core of a ring buffer is two indexes: `head_` (consumer read position) and `tail_` (producer write position). `try_push` checks `tail_ - head_ < N` (not full), writes to `buffer_[tail_ % N]`, and finally increments `tail_`. `try_pop` checks `head_ < tail_` (not empty), reads from `buffer_[head_ % N]`, and increments `head_`. +The core of the ring buffer is two indices: `head_` (consumer read position) and `tail_` (producer write position). `try_push` checks `!full` (not full), writes to `buffer_[tail_]`, then increments `tail_`. `try_pop` checks `!empty` (not empty), reads `buffer_[head_]`, increments `head_`. -Pseudocode: +Pseudo-code: ```cpp - -bool try_push(T item) { - size_t tail = tail_.load(seq_cst); - size_t head = head_.load(seq_cst); - - if (tail - head >= N) return false; // 满了 - - buffer_[tail % N] = std::move(item); - tail_.store(tail + 1, seq_cst); +bool try_push(const T& value) { + size_t curr_tail = tail_.load(); + if (full(curr_tail, head_.load())) return false; + buffer_[curr_tail] = value; + tail_.store((curr_tail + 1) % N); return true; } -optional try_pop() { - size_t head = head_.load(seq_cst); - size_t tail = tail_.load(seq_cst); - - if (head >= tail) return nullopt; // 空了 - - T item = std::move(buffer_[head % N]); - head_.store(head + 1, seq_cst); - return item; +std::optional try_pop() { + size_t curr_head = head_.load(); + if (empty(curr_head, tail_.load())) return std::nullopt; + T value = buffer_[curr_head]; + head_.store((curr_head + 1) % N); + return value; } - ``` -Pitfall warning: index overflow. If `head_` and `tail_` continuously increment, they will eventually overflow `size_t`. On a 64-bit system, this is not a practical issue (2^64 operations would take billions of years), but if you change the type to `uint32_t`, you need to be careful—the calculation result of `tail - head` will be incorrect after overflow. - -### Validation - -```cpp -TEST_CASE("Milestone 2: SPSC transfers sequential integers", - "[lab2][milestone2]") -{ - SpscRingBuffer buf; - const int kItems = 100000; - - JoiningThread producer([&]() { - for (int i = 1; i <= kItems; ++i) { - while (!buf.try_push(i)) { - // 自旋等待 - } - } - }); - - std::vector consumed; - int expected = 1; - while (expected <= kItems) { - auto val = buf.try_pop(); - if (val) { - REQUIRE(*val == expected); - ++expected; - } - } - - REQUIRE(expected == kItems + 1); -} - -TEST_CASE("Milestone 2: full and empty states", - "[lab2][milestone2]") -{ - SpscRingBuffer buf; - - REQUIRE(buf.empty()); - REQUIRE_FALSE(buf.full()); +Pitfall warning: Index overflow. If `head_` and `tail_` increment continuously, they will eventually overflow `size_t`. On 64-bit systems this isn't a practical issue (2^64 operations takes billions of years), but if you change the type to `uint32_t`, be careful—the calculation of `full`/`empty` will be wrong after overflow. - REQUIRE(buf.try_push(1)); - REQUIRE(buf.try_push(2)); - REQUIRE(buf.try_push(3)); - REQUIRE(buf.try_push(4)); - REQUIRE(buf.full()); +### Verification - REQUIRE_FALSE(buf.try_push(5)); // 满了 - - REQUIRE(buf.try_pop() == 1); - REQUIRE_FALSE(buf.full()); // 有空间了 - REQUIRE(buf.try_push(5)); // 现在可以了 -} +```bash +make test_milestone2 ``` ## Milestone 3: acquire-release Optimization ### Objectives -Replace all uses of `seq_cst` memory order from Milestone 2 with the lighter acquire-release semantics. Understand which load/store operations can use `relaxed`, and which must use acquire/release. +Replace the `seq_cst` memory order used in Milestone 2 with the lighter acquire-release semantics. Understand which load/store operations can use `relaxed` and which must use acquire/release. ### Why -`seq_cst` is the strongest memory order—it guarantees that all threads see a consistent order of operations, but this requires extra synchronization instructions (the `MFENCE` or `LOCK` prefix on x86). In an SPSC scenario, we do not need global consistency—we only need to guarantee that the data written by the producer is visible to the consumer. This is exactly what acquire-release semantics do: all write operations before the producer's `store(release)` become visible after the consumer's `load(acquire)`. +`seq_cst` is the strongest memory order—it guarantees a consistent order of operations across all threads, but this requires extra synchronization instructions (like `mfence` or `lock` prefix on x86). In the SPSC scenario, we don't need global consistency—we only need to guarantee that data written by the producer is visible to the consumer. This is exactly what acquire-release semantics do: all writes before the producer's `release` store become visible to the consumer after its `acquire` load. ### Implementation Guide -Key analysis: in `try_push`, writing to `buffer_[tail % N]` must complete before `tail_.store(tail + 1, release)`—so that when the consumer sees the new `tail_`, the contents of `buffer_` are already ready. In `try_pop`, reading `buffer_[head % N]` must happen after `head_.store(head + 1, release)`—so that when the producer sees the new `head_`, the contents of `buffer_` have already been fetched and can be safely overwritten. +Key analysis: In `try_push`, writing to `buffer_` must complete before `tail_` is updated—so when the consumer sees the new `tail_`, the content of `buffer_` is ready. In `try_pop`, reading `buffer_` must happen after `head_` is loaded—so when the producer sees the new `head_`, the content of `buffer_` has been taken and can be safely overwritten. Specific replacement strategy: -- Reading `head_` in `try_push` can use `relaxed`—the producer does not care about the consumer's exact position, only whether there is space, so a slight delay is acceptable -- Writing `tail_` in `try_push` must use `release`—guarantees that the buffer write completes before the tail update -- Reading `tail_` in `try_pop` can use `relaxed`—same as above -- Writing `head_` in `try_pop` must use `release`—guarantees that the buffer read completes before the head update +- Reading `head_` in `try_push` can use `relaxed`—the producer doesn't care about the consumer's exact position, only whether there is space; slight delay is acceptable. +- Writing `buffer_[tail_]` in `try_push` must be followed by a `release` store to `tail_`—guaranteeing the buffer write finishes before the tail update. +- Reading `tail_` in `try_pop` can use `relaxed`—same as above. +- Writing `head_` in `try_pop` must be an `release` store—guaranteeing the buffer read finishes before the head update. -Pitfall warning: if you incorrectly change the store of `tail_` to `relaxed`, the consumer might see data that has not been fully written yet. This type of bug is almost impossible to reproduce during development (because x86's strong memory model naturally guarantees store-store ordering), but it will surface on ARM architectures. +Pitfall warning: If you mistakenly change the store to `tail_` to `relaxed`, the consumer might see data that hasn't been fully written. This bug is nearly impossible to reproduce during development (because x86's strong memory model naturally guarantees store-store order), but it will expose itself on ARM architectures. -### Validation +### Verification -```cpp -TEST_CASE("Milestone 3: acquire-release SPSC correctness", - "[lab2][milestone3]") -{ - // 跟 Milestone 2 一样的测试,但跑在 acquire-release 版本上 - SpscRingBuffer buf; - const int kItems = 500000; - - JoiningThread producer([&]() { - for (int i = 1; i <= kItems; ++i) { - while (!buf.try_push(i)) {} - } - }); - - int expected = 1; - while (expected <= kItems) { - auto val = buf.try_pop(); - if (val) { - REQUIRE(*val == expected); - ++expected; - } - } -} +```bash +make test_milestone3 ``` -## Milestone 4: Cache Line Padding and False Sharing Elimination +## Milestone 4: cache line padding and False Sharing Elimination ### Objectives -Add cache line padding to `SpscRingBuffer` to ensure that `head_` and `tail_` do not share the same cache line. Compare the performance data before and after padding. +Add cache line padding to `SPSCRingBuffer` to ensure `head_` and `tail_` do not share the same cache line. Compare performance data before and after padding. ### Why -As covered in ch00-03, false sharing occurs when two atomic variables happen to reside on the same cache line (typically 64 bytes). When one thread modifies variable A, it invalidates the cache line containing the other thread's variable B, even if B was not modified at all. In an SPSC scenario, `head_` and `tail_` are modified at high frequency by different threads—if they are on the same cache line, every modification will cause a cache miss for the other, potentially degrading performance by several times. +As discussed in ch00-03, false sharing occurs when two atomic variables happen to be on the same cache line (usually 64 bytes). One thread modifying variable A invalidates the cache line holding variable B for another thread, even if B wasn't modified. In the SPSC scenario, `head_` and `tail_` are modified frequently by different threads—if they are on the same cache line, every modification causes the other's cache miss, potentially degrading performance by several times. ### Implementation Guide -The solution is to insert padding between `head_` and `tail_`, forcing them onto different cache lines. C++11 provides the `alignas` specifier: +The solution is to insert padding between `head_` and `tail_` to force them onto different cache lines. C++11 provides the `alignas` specifier: ```cpp - -alignas(64) atomic head_{0}; -// 64 字节对齐,确保 head_ 独占一个 cache line - -char padding_[64 - sizeof(atomic)]; -// 填充剩余空间(如果需要) - -alignas(64) atomic tail_{0}; -// tail_ 也独占一个 cache line - +alignas(64) std::atomic head_{0}; // Force start of cache line +char padding1[64 - sizeof(std::atomic)]; +alignas(64) std::atomic tail_{0}; // Force start of new cache line ``` -A more concise approach is to use `alignas(64)` directly on class member declarations, and the compiler will automatically insert padding. In actual testing, you should observe a throughput improvement after eliminating false sharing—especially on ARM architectures, where the difference will be very pronounced. +A simpler approach is to use `alignas(64)` directly on class member declarations, and the compiler will automatically insert padding. In actual testing, you should see a throughput improvement after eliminating false sharing—especially on ARM architectures where the difference will be very pronounced. -Validation for this milestone is primarily a performance comparison. Use Catch2's `BENCHMARK` macro (or manual timing) to measure the time taken for the same number of push/pop operations before and after padding. The exact numbers depend on your hardware, but you should observe at least an order-of-magnitude difference. +Verification for this milestone is primarily performance comparison. Use Catch2's `BENCHMARK` macro (or manual timing) to measure the time taken for the same number of push/pop operations before and after padding. Specific numbers depend on your hardware, but you should observe at least an order of magnitude difference. -### Validation +### Verification -```cpp -TEST_CASE("Milestone 4: padded SPSC maintains correctness", - "[lab2][milestone4]") -{ - SpscRingBuffer buf; - const int kItems = 100000; - - JoiningThread producer([&]() { - for (int i = 1; i <= kItems; ++i) { - while (!buf.try_push(i)) {} - } - }); - - int expected = 1; - while (expected <= kItems) { - auto val = buf.try_pop(); - if (val) { - REQUIRE(*val == expected); - ++expected; - } - } -} - -TEST_CASE("Milestone 4: benchmark padded vs unpadded", - "[lab2][milestone4]") -{ - // 性能对比测试——不需要 REQUIRE,只需观察输出 - const int kItems = 1000000; - const int kRounds = 10; - - // 测量当前(padded)版本 - auto padded_time = benchmark_spsc>( - kItems, kRounds); - - // 你可以额外实现一个 UnpaddedSpscRingBuffer 来对比 - // auto unpadded_time = benchmark_spsc>( - // kItems, kRounds); - - // 报告结果(不做 REQUIRE,因为性能数字因环境而异) - std::cout << "Padded SPSC: " << padded_time << " us\n"; -} +```bash +make test_milestone4 ``` ## Milestone 5: Benchmark Comparison with Mutex Queue ### Objectives -Use a unified benchmark methodology to compare the throughput of `SpscRingBuffer` (lock-free) and `BoundedBlockingQueue` (mutex) in an SPSC scenario. +Use a unified benchmark methodology to compare the throughput of `SPSCRingBuffer` (lock-free) and `MutexQueue` (mutex) in an SPSC scenario. ### Why -Many people assume that "lock-free" is always faster, but the reality is not that simple. In low-contention scenarios, the overhead of a mutex is actually quite small (on x86, an uncontended futex is just a single atomic instruction); in high-frequency single-threaded scenarios, atomic busy-waiting can consume more CPU than mutex sleep-waiting. Only by letting the data speak can we clarify under exactly what conditions "faster" holds true. +Many people see "lock-free" and assume it must be faster, but the reality is not that simple. In low-contention scenarios, mutex overhead is actually small (on x86, a futex is just one atomic instruction when uncontended); in high-frequency single-threaded scenarios, atomic busy-waiting might consume more CPU than mutex sleep-waiting. Only by letting the data speak can we clarify under what conditions "faster" actually holds. ### Implementation Guide -Follow this unified benchmark methodology (shared by all later Labs): +Follow a unified benchmark methodology (shared across all subsequent Labs): -1. **Measurement goal** — decide whether you measure throughput (ops/s), latency, or scalability; test one at a time. -2. **Warm-up** — run 5 rounds without recording, so caches and branch predictors reach steady state. -3. **Multi-round collection** — at least 10 formal rounds; take the **median** (never a single run or a plain average). -4. **Pin CPU affinity** — use `taskset` or `pthread_setaffinity_np` to bind threads to fixed cores, avoiding OS migration noise; distinguish physical cores from hyper-threaded logical cores. -5. **Two data scales** — one dataset fits in L3, one exceeds L3, to expose cache effects. -6. **Don't let the result be optimized away** — use `benchmark::DoNotOptimize` or write to `volatile` so the computation isn't elided; preallocate memory to avoid allocator-lock interference. -7. **Report format** — test environment, parameters, results, conclusions, and boundaries (differences under ~5% are usually not significant; focus on order-of-magnitude differences). +1. **Measurement Target** — Clearly define what is being measured: throughput (ops/s), latency, or scalability. Measure only one at a time. +2. **Warm-up** — Run 5 rounds that don't count, allowing caches and branch prediction to reach a steady state. +3. **Multiple Runs** — Run at least 10 official rounds and take the **median** (don't just take the average or a single run). +4. **Fix CPU Affinity** — Use `pthread_setaffinity_np` or `std::os::linux::set_cpu_affinity` to pin threads to fixed cores, avoiding noise from OS migration; distinguish between physical cores and hyperthreading logical cores. +5. **Two Data Scales** — One dataset size fits within L3 cache, one exceeds L3, to observe cache effects. +6. **Prevent Optimization** — Use `DoNotOptimize` or write to `volatile` to ensure calculations aren't eliminated by the compiler; pre-allocate memory to avoid allocator lock interference. +7. **Report Format** — Test environment, parameters, results, conclusions, and boundaries (differences within 5% are usually insignificant; focus on order-of-magnitude differences). -Pseudocode: +Pseudo-code: ```cpp -auto benchmark = [&](auto& queue, int items) -> double { - // 热身 - for (int i = 0; i < 3; ++i) { - run_spsc_benchmark(queue, items); - } - - // 正式采集 - vector samples; - for (int i = 0; i < 10; ++i) { - auto start = steady_clock::now(); - run_spsc_benchmark(queue, items); - auto elapsed = steady_clock::now() - start; - samples.push_back(elapsed in microseconds); - } - - sort(samples); - return samples[samples.size() / 2]; // 中位数 -}; +// Benchmark Loop +for (int round = 0; round < warmup + rounds; ++round) { + auto start = now(); + // Producer/Consumer loop + producer(); + consumer(); + auto end = now(); + if (round >= warmup) record_latency(end - start); +} +report_median(latencies); ``` -Your report should include: CPU model and core count, compiler and optimization level, data scale, median latency, and an explanation of your conclusion's boundaries—"this conclusion only applies to SPSC scenarios and does not hold for MPMC scenarios." +Your report should include: CPU model and core count, compiler and optimization level, data scale, median latency, and an explanation of your conclusion boundaries—"This conclusion applies only to SPSC scenarios; it does not hold for MPMC scenarios." -### Validation +### Verification -Validation for this milestone is not a traditional `REQUIRE`, but rather a sanity check of the performance data. You need to confirm: +Verification for this milestone is not a traditional `TEST_CASE`, but a sanity check of performance data. You need to confirm: -- The lock-free version is indeed faster than the mutex version in SPSC scenarios (typically 2-10x faster) -- The trend of performance differences across data scales is reasonable -- You can explain why the mutex version might actually be faster under certain conditions (for example, when contention is extremely low, the mutex overhead is nearly zero) +- The lock-free version is indeed faster than the mutex version in SPSC scenarios (usually 2-10x faster). +- The trend of performance difference changing with data scale is reasonable. +- You can explain why the mutex version might be faster under certain conditions (e.g., when contention is extremely low, mutex overhead is near zero). -## Self-Check List +## Checklist -- [ ] `AtomicCounter` uses `relaxed` order, and `StopFlag` uses an acquire-release pair -- [ ] The CAS loop in `AtomicMaxTracker` correctly handles concurrent updates -- [ ] SPSC data transfer has no loss, no duplication, and correct ordering -- [ ] Tests still pass after replacing seq_cst with acquire-release +- [ ] `AtomicCounter` uses `relaxed` order, `StopToken` uses acquire-release pair +- [ ] `AtomicMax`'s CAS loop correctly handles concurrent updates +- [ ] SPSC data transfer has no loss, no duplication, and correct order +- [ ] Tests pass after replacing `seq_cst` with acquire-release - [ ] After cache line padding, `head_` and `tail_` are not on the same cache line -- [ ] Benchmarks follow a unified methodology (warm-up, multiple collections, take median) -- [ ] You can explain the performance differences between relaxed, acquire-release, and seq_cst -- [ ] You can explain the principle of false sharing and how padding eliminates it -- [ ] You can describe the conditions under which the lock-free approach outperforms the mutex approach, and when it might not +- [ ] Benchmarks follow unified methodology (warm-up, multiple runs, median) +- [ ] Can explain the performance difference between relaxed, acquire-release, and seq_cst +- [ ] Can explain the principle of false sharing and how padding eliminates it +- [ ] Can articulate under what conditions the lock-free solution outperforms mutex, and when it might not - [ ] All tests pass under TSan with no data race reports diff --git a/documents/en/vol5-concurrency/exercises/06-capstone-mini-runtime.md b/documents/en/vol5-concurrency/exercises/06-capstone-mini-runtime.md index b33c0ba5e..012d4992e 100644 --- a/documents/en/vol5-concurrency/exercises/06-capstone-mini-runtime.md +++ b/documents/en/vol5-concurrency/exercises/06-capstone-mini-runtime.md @@ -1,7 +1,7 @@ --- title: 'Capstone: Mini Concurrent Runtime' -description: Combine components from all Labs in Volume 5 to build a mini concurrent - runtime, practicing system design, component composition, and observability. +description: Combine components from all labs in Volume V to build a mini concurrent + runtime, training system design, component composition, and observability. chapter: 10 order: 7 difficulty: advanced @@ -23,22 +23,22 @@ prerequisites: - 'Lab 5: Channel or Actor Runtime' translation: source: documents/vol5-concurrency/exercises/06-capstone-mini-runtime.md - source_hash: fcae6e1b85800920052211d645e7e4c7ca0b35c80c54a1d369cce3414dea1fcc - translated_at: '2026-05-26T11:49:01.898502+00:00' + source_hash: 9703a584a9a9805fad187494a8070d1d93eba952e9c671217c54d1fc84edf144 + translated_at: '2026-06-14T00:20:34.530410+00:00' engine: anthropic - token_count: 1674 + token_count: 1677 --- # Capstone: Mini Concurrent Runtime -## Goal +## Objectives -Volume 5 wraps up by shifting from "knowing many concurrency tools" to "being able to compose a concurrent system." This Capstone does not pursue production-grade completeness; instead, it requires you to combine the finished components from the previous seven Labs to build a runnable mini system—a mini concurrent runtime or network service framework. +Volume 5 moves from "learning many concurrency tools" to "composing concurrent systems." This Capstone does not pursue production-grade completeness, but requires you to combine the finished components from the previous 7 Labs to build a runnable mini-system—a mini concurrent runtime or network service framework. -The focus is not on implementing new components from scratch, but on answering three engineering questions: How do components connect? How does the system stop? How do errors propagate and get handled? +The focus is not on implementing new components from scratch, but on answering three engineering questions: How do components connect? How does the system stop? How are errors propagated and handled? ## Prerequisites -Complete all of Labs 0–5. This Capstone directly reuses components from previous Labs. +Complete all Labs 0–5. This Capstone directly reuses components from previous Labs. ## Environment Setup @@ -46,13 +46,13 @@ Same as Lab 4 (C++20, Linux/WSL2 for epoll, Catch2 v3, TSan). ## Recommended Components -Below is the recommended component list for the mini runtime. Each component comes from a previous Lab: +Below is a list of recommended components for the mini runtime. Each component comes from a previous Lab: | Component | Source Lab | Responsibility | |-----------|------------|----------------| | `JoiningThread` | Lab 0 | Thread lifecycle management | -| `BoundedBlockingQueue` | Lab 1 | Task queue / channel underlying layer | -| `ConcurrentCache` | Lab 1 | Configuration cache / connection pool | +| `BoundedBlockingQueue` | Lab 1 | Task queue / channel bottom layer | +| `ConcurrentCache` | Lab 1 | Config cache / connection pool | | `AtomicCounter` / `AtomicMaxTracker` | Lab 2 | Runtime metrics | | `StopFlag` | Lab 2 | Graceful shutdown signal | | `ThreadPool` | Lab 3 | CPU-bound task scheduling | @@ -61,17 +61,17 @@ Below is the recommended component list for the mini runtime. Each component com ## Milestone 1: Architecture Design and Interface Definition -### Goal +### Objectives -Draw a component diagram of the mini runtime, and define the interaction interfaces between components. Do not write any implementation code—this milestone is purely about design. +Draw a component diagram of the mini runtime and define the interaction interfaces between components. Do not write any implementation code—this milestone is purely about design. ### Why -The first step in system design is not writing code, but clarifying the relationships and responsibility boundaries between components. Specifically, three questions: "Who creates whom?", "Who owns whom?", and "Who can shut down whom?". In concurrent systems, these questions are far more important than in single-threaded systems—an incorrect ownership relationship can lead to dead lock, memory leak, or crashes during shutdown. +The first step of system design is not writing code, but clarifying the relationships and responsibility boundaries between components. Specifically, the three questions: "Who creates whom?", "Who owns whom?", and "Who can shut down whom?". In concurrent systems, these issues are much more important than in single-threaded systems—an incorrect ownership relationship can lead to deadlocks, resource leaks, or crashes during shutdown. ### Implementation Guide -Use a paragraph or a diagram to describe your runtime's architecture. We recommend starting from the "complete path of a request from entry to exit": +Use a paragraph of text or a diagram to describe your runtime's architecture. It is recommended to start with "the complete path of a request from entry to exit": ```cpp 客户端请求 → epoll accept → 协程 handle_connection @@ -81,27 +81,27 @@ Use a paragraph or a diagram to describe your runtime's architecture. We recomme → 协程 write response → 客户端 ``` -Along this path, annotate the responsibility and lifecycle relationship of each component. For example: `EventLoop` owns the epoll fd and the coroutine scheduler; `ThreadPool` owns the worker threads and the task queue; `Channel` connects the coroutine layer and the thread pool layer. +On this path, mark the responsibility and lifecycle relationships of each component. For example: `EventLoop` owns the epoll fd and the coroutine scheduler; `ThreadPool` owns worker threads and the task queue; `Channel` connects the coroutine layer and the thread pool layer. You need to answer the following design questions: -1. Between `EventLoop` and `ThreadPool`, which is created first, and which is shut down first? +1. Between `EventLoop` and `ThreadPool`, which is created first and shut down first? 2. Who is responsible for closing `Channel`—the producer or the consumer? -3. How does an exception in one component propagate to other components? +3. How are exceptions from one component propagated to others? -### Validation +### Verification -Discuss your design with a peer or AI, and confirm there are no missing edge cases. You don't need to write code, but you must be able to answer the three design questions above. +Discuss your design with a peer or AI to ensure no edge cases are missed. You don't need to write code, but you must be able to answer the three design questions above. ## Milestone 2: Component Assembly and Startup -### Goal +### Objectives -Combine all Lab components together, and implement the runtime's startup flow. You don't need to handle network requests—just confirm that all components initialize and run correctly. +Combine components from all Labs to implement the runtime's startup process. You don't need to handle network requests—just confirm that all components are initialized and running correctly. ### Why -The startup order of components matters. `ThreadPool` must be created before `Channel` (because worker threads need to pull tasks from the channel), and `EventLoop` must be created before `ThreadPool` (because coroutine scheduling happens before I/O events). The goal of this milestone is to confirm the correct startup order and ensure there are no cyclic dependencies between components. +The startup order of components is crucial. `ThreadPool` needs to be created before `Channel` (because worker threads need to fetch tasks from the channel), and `EventLoop` needs to be created before `ThreadPool` (because coroutine scheduling happens before I/O events). The goal of this milestone is to confirm that the startup order is correct and that there are no circular dependencies between components. ### Implementation Guide @@ -135,9 +135,9 @@ private: }; ``` -Pitfall warning: The declaration order of members is the initialization order, and the destruction order is the reverse. Ensure that `ThreadPool` is destroyed before `BoundedBlockingQueue` (because worker threads need to pull data from the queue until the queue closes), and that `EventLoop` is destroyed before all channels. +Pitfall Warning: The order of member declaration is the order of initialization, and destruction order is the reverse. Ensure `ThreadPool` is destroyed before `BoundedBlockingQueue` (because worker threads need to fetch data from the queue until the queue is closed), and `EventLoop` is destroyed before all channels. -### Validation +### Verification ```cpp TEST_CASE("Milestone 2: runtime starts and stops cleanly", @@ -157,28 +157,28 @@ TEST_CASE("Milestone 2: runtime starts and stops cleanly", // stop 后不应该崩溃 // 所有 worker 线程应该已经退出 } -```text +```cpp ## Milestone 3: Failure Path Testing -### Goal +### Objectives -Test the runtime's behavior under various failure scenarios: tasks throwing exceptions, client disconnections, queue closures, and component exceptions. +Test the runtime's behavior under various failure scenarios: tasks throwing exceptions, clients disconnecting, queues closing, and component exceptions. ### Why -The correctness of a concurrent system is not only reflected in the "happy path." A production-grade system must gracefully handle various failures—a task execution failure should not crash the entire runtime, a client disconnection should not cause a memory leak, and a component exception should be caught and reported rather than silently lost. +The correctness of a concurrent system is not only reflected in the "happy path." A production-grade system must handle various failures gracefully—task execution failures should not crash the entire runtime, client disconnections should not leak resources, and component exceptions should be caught and reported rather than silently lost. ### Implementation Guide Test the following scenarios: -1. **Task exception**: Submit a task that throws an exception, confirm that `future::get()` can rethrow it, and that the runtime continues running normally -2. **Client disconnection**: Simulate a client disconnecting during coroutine processing, confirm that the coroutine exits correctly without leaking resources -3. **Queue closure**: Close a middle channel while the pipeline is running, confirm that both upstream and downstream handle it correctly -4. **Repeated shutdown**: Call `stop()` multiple times, confirm idempotency +1. **Task Exception**: Submit a task that throws an exception, confirm that `future::get()` can re-throw it, and that the runtime continues to run normally. +2. **Client Disconnect**: Simulate a client disconnecting during coroutine processing, confirm that the coroutine exits correctly without leaking resources. +3. **Queue Closure**: Close a middle channel while the pipeline is running, confirm that both upstream and downstream handle it correctly. +4. **Repeated Shutdown**: Call `stop()` multiple times to confirm idempotency. -### Validation +### Verification ```cpp TEST_CASE("Milestone 3: task exception doesn't crash runtime", @@ -234,28 +234,28 @@ TEST_CASE("Milestone 3: channel close propagates through pipeline", ## Milestone 4: Observability and Performance Validation -### Goal +### Objectives -Add metrics collection to the runtime (`AtomicCounter`, `AtomicMaxTracker`), implement at least one end-to-end benchmark, and validate correctness with TSan. +Add metrics collection (`AtomicCounter`, `AtomicMaxTracker`) to the runtime, implement at least one end-to-end benchmark, and verify correctness with TSan. ### Why -A concurrent system without observability is like a black box—you don't know what it's doing, how it performs, or whether it has problems. The atomic metrics component from Lab 2 comes into play here: count completed tasks, current queue length, and maximum concurrent connections. These metrics don't need to be precise to the millisecond—their value lies in letting you see "the system is running" and "the system is degrading." +A concurrent system without observability is like a black box—you don't know what it is doing, how it performs, or if there are problems. The atomic metrics component from Lab 2 comes into play here: count completed tasks, current queue length, and maximum concurrent connections. These metrics don't need millisecond precision—their value lies in letting you see "the system is running" and "the system is degrading." ### Implementation Guide -Insert metrics collection points at key paths in the runtime: +Insert metrics collection points on the runtime's critical paths: -- When a task is submitted: `active_tasks_.increment()` -- When a task completes: `active_tasks_.decrement()` -- When a new connection is established: `max_connections_.update(current_connections)` +- When a task is submitted `active_tasks_.increment()` +- When a task is completed `active_tasks_.decrement()` +- When a new connection is established `max_connections_.update(current_connections)` - Periodic sampling of queue length (optional) -Write an end-to-end benchmark: start the runtime, submit N tasks, wait for all futures to complete, and report the total time and throughput. Reuse Lab 2's benchmark methodology — warm up, take the median over multiple rounds, pin CPU affinity, and report the environment and boundaries; don't trust a single run or swings under ~5%. +Write an end-to-end benchmark: start the runtime, submit N tasks, wait for all futures to complete, and report total time and throughput. Follow Lab 2's benchmark methodology—warm up, take the median of multiple rounds, fix CPU affinity, report the test environment and boundaries, and don't just look at single runs or fluctuations within 5%. -Finally, run the complete test suite with TSan to confirm there are no data races. +Finally, run the full test suite with TSan to confirm there are no data races. -### Validation +### Verification ```cpp TEST_CASE("Milestone 4: metrics track runtime behavior", @@ -282,17 +282,17 @@ TEST_CASE("Milestone 4: metrics track runtime behavior", } ``` -## Self-Check List - -- [ ] All components from Labs 0–5 are correctly combined -- [ ] Component creation order and destruction order are correct (no cyclic dependencies, no dangling references) -- [ ] `stop()` is idempotent, and does not dead lock or leak -- [ ] There is a clear shutdown sequence: stop accepting new requests → drain the queue → join all threads -- [ ] Task exceptions do not crash the runtime -- [ ] Channel closure correctly propagates to all stages of the pipeline -- [ ] Metrics collection does not affect correctness (using `relaxed` atomic) -- [ ] There is at least one end-to-end benchmark that reports throughput -- [ ] The complete test suite has no data race reports under TSan -- [ ] You can answer: where do you use locks, where do you use atomics, and where do you avoid shared state through message passing -- [ ] You can explain what the benchmark results do not prove (e.g., "local testing does not represent behavior in a network environment") -- [ ] You can describe which component you would prioritize improving if you had more time +## Checklist + +- [ ] Components from all Labs 0–5 are correctly combined. +- [ ] Component creation and destruction order is correct (no circular dependencies, no dangling references). +- [ ] `stop()` is idempotent and does not deadlock or leak. +- [ ] There is a clear shutdown sequence: stop accepting new requests → drain queues → join all threads. +- [ ] Task exceptions do not cause the runtime to crash. +- [ ] Channel closure is correctly propagated to all stages of the pipeline. +- [ ] Metrics collection does not affect correctness (use `relaxed` atomic). +- [ ] At least one end-to-end benchmark exists, reporting throughput. +- [ ] The full test suite runs under TSan with no data race reports. +- [ ] Can answer: Where do we use locks, where do we use atomics, and where do we avoid shared state through message passing? +- [ ] Can explain what the benchmark results do not prove (e.g., "standalone tests do not represent performance in a network environment"). +- [ ] Can describe which component you would prioritize improving if you had more time. diff --git a/documents/en/vol5-concurrency/index.md b/documents/en/vol5-concurrency/index.md index 88d98bdff..0a20f3cb2 100644 --- a/documents/en/vol5-concurrency/index.md +++ b/documents/en/vol5-concurrency/index.md @@ -1,6 +1,6 @@ --- title: 'Volume 5: Concurrent Programming' -description: From thread primitives to coroutine asynchrony +description: From Thread Primitives to Coroutine Asynchrony platform: host tags: - cpp-modern @@ -8,26 +8,26 @@ tags: - intermediate translation: source: documents/vol5-concurrency/index.md - source_hash: cd80a1b1c6da3499274cc7f47f04a8f535d78b5b462ee9bd8364bc375acd7b1d - translated_at: '2026-05-26T11:49:23.141927+00:00' + source_hash: c01fb4c1ae045c817de2cbdadd809fc4370e8e5d467bf7418ad57fcd76c0c207 + translated_at: '2026-06-14T00:20:39.830231+00:00' engine: anthropic token_count: 262 --- -# Volume 5: Concurrent Programming +# Volume V: Concurrent Programming -From thread primitives to asynchronous coroutines, from locks to lock-free, from synchronous to task-based — Volume 5 helps you build complete concurrency judgment. Our principle is: **correctness first, performance second; locks first, lock-free second; synchronous first, task-based second**. +From thread primitives to asynchronous coroutines, from locks to lock-free, from synchronization to tasks—Volume V helps you build a complete understanding of concurrency. Our principle is: **correctness first, then performance; locks first, then lock-free; synchronization first, then tasks**. ## Chapter Navigation - ch00 · Concurrency Thinking and Fundamentals + ch00 · Concurrent Thinking and Fundamentals ch01 · Thread Lifecycle and RAII - ch02 · Mutex, Condition Variables, and Synchronization Primitives - ch03 · Atomic Operations and Memory Models + ch02 · Mutexes, Condition Variables, and Synchronization Primitives + ch03 · Atomic Operations and Memory Model ch04 · Concurrent Data Structures ch05 · Futures, Tasks, and Thread Pools ch06 · Asynchronous I/O and Coroutines - ch07 · Actor and Channel + ch07 · Actors and Channels ch08 · Debugging, Testing, and Performance ch09 · Distributed Bridging Appendix diff --git a/documents/en/vol8-domains/embedded/02-type-safe-register-access.md b/documents/en/vol8-domains/embedded/02-type-safe-register-access.md index 261f01af8..a3a2c07cc 100644 --- a/documents/en/vol8-domains/embedded/02-type-safe-register-access.md +++ b/documents/en/vol8-domains/embedded/02-type-safe-register-access.md @@ -19,29 +19,29 @@ tags: title: Type-Safe Register Access translation: source: documents/vol8-domains/embedded/02-type-safe-register-access.md - source_hash: ee7289ceef37902b9cdc6d8479414487f7cdf0f30acb7c738bf4937d04ab1834 - translated_at: '2026-05-26T11:37:19.392812+00:00' + source_hash: 01e1bfe6b9c623aff34bb3e910c4abf01ca82e1b62702ccfe41fab167c2923f9 + translated_at: '2026-06-14T00:20:46.752858+00:00' engine: anthropic - token_count: 1105 + token_count: 1107 --- # Embedded C++ Tutorial — Type-Safe Register Access -When writing register operations, our typical appetizer is a one-line tragedy like this: +When writing register operations, a common starter is this one-line tragedy: ```cpp *(volatile uint32_t*)0x40001000 |= (1 << 3); ``` -Its advantage is that it is short and concise; the disadvantage is that you won't understand it tomorrow, the compiler understands it but isn't fully satisfied, and you might also step on a landmine of undefined behavior (UB). +Its advantage is that it is short and concise; the downside is that you won't understand it tomorrow, the compiler understands it but isn't happy about it, and you might step on landmines of undefined behavior. -We use **compile-time constants + templates + strongly-typed enumerations** to encapsulate register addresses, bit fields, and operations. At the same time, we use **constexpr mask / static_assert** to catch errors at compile time. We must retain `volatile` (to tell the compiler not to optimize away hardware accesses) and use memory barriers when necessary to guarantee visibility and ordering. +We use **compile-time constants + templates + scoped enumerations** to encapsulate register addresses, bit fields, and operations. At the same time, we use **constexpr masks / static_assert** to catch errors at compile time. We must preserve `volatile` (telling the compiler not to optimize away hardware accesses) and use memory barriers when necessary to guarantee visibility and ordering. ------ ## A Concise Type-Safe Register Wrapper -Below is a small yet complete implementation template. It can read and write registers, safely read and write fields, and support user-defined strongly-typed enumeration types. +Below is a small yet complete implementation template. It can read and write registers, safely read and write fields, and supports user-defined scoped enumeration types. ```cpp // reg.hpp @@ -131,17 +131,17 @@ struct reg_field { ``` -> Note: The `compiler_barrier()` in `mmio_reg` above uses `asm volatile("" ::: "memory")`, which is the lightest compiler barrier. On ARM Cortex-M, if you need to ensure bus ordering or cache coherency, you should use `__DSB()` / `__ISB()` or equivalent functions provided by the platform SDK at critical locations. +> Note: The `read` and `write` functions above use `std::atomic_thread_fence(std::memory_order_seq_cst)`, which is the lightest compiler barrier. On ARM Cortex-M, if you need to ensure bus ordering or cache coherency, you should use `__dsb()` / `__isb()` or equivalent functions provided by the platform SDK at critical locations. ------ ## Usage Example -Suppose we have a 32-bit UART control register `UART_CR` at address `0x40001000`, defined as: +Assume we have a 32-bit UART control register `UART0_CTRL`, address `0x4000_1000`, defined as: -- `EN` bit 0 (enable), +- `ENABLE` bit 0 (Enable), - `MODE` bits 1~2 (2-bit mode), -- `BAUDDIV` bits 8~15 (8-bit baud rate divider). +- `BAUD_DIV` bits 8~15 (8-bit baud rate divider). ```cpp // uart_regs.hpp @@ -174,14 +174,14 @@ void uart_init() { ``` -The advantages are immediately visible: field positions, widths, and valid values are all encoded in the type system. The code reads like documentation rather than magical bit manipulation. +The benefits are immediately visible: field positions, widths, and legal values are all encoded within the type system. The code reads like documentation rather than magical bit manipulation. ------ ## Preventing Common Errors -1. **Ensure consistent type widths**: The `uint32_t` of `mmio_reg` must match the actual width of the hardware register. `static_assert` can help you catch errors at compile time. -2. **Avoid raw `|=`/`&=` on the same register, which can cause read-modify-write timing issues**: If a register is specifically designed as "write-1-to-clear" or "write-1-to-set", use explicitly wrapped `set_bits()` / `clear_bits()` or dedicated functions to prevent misuse. -3. **Consider concurrency and interrupts**: Read-modify-write operations may not be atomic in an interrupt or multi-core environment. For register modifications that must be atomic, disable interrupts in a critical section or use hardware-provided atomic accesses. -4. **Memory barriers**: After initializing a peripheral or swapping control registers, if you need to guarantee that subsequent reads/writes take effect on the hardware immediately, please use appropriate DSB/ISB or `atomic_thread_fence`. -5. **Don't pass registers around like global variables**: Try to keep register wrappers as `constexpr` types/aliases to facilitate static auditing and automatic documentation generation. +1. **Ensure consistent type width**: The `ValueType` in `Register` must match the actual width of the hardware register. `static_assert` can help you discover errors at compile time. +2. **Avoid bare `*=` / `|=` on the same register to prevent read-modify-write timing issues**: If a register is specifically designed as "write-1-to-clear" or "write-1-to-set", use explicitly encapsulated `set_bits` / `clear_bits` or dedicated functions to prevent misuse. +3. **Consider concurrency and interrupts**: Read-modify-write operations may not be atomic in interrupt or multi-core environments. For register modifications that must be atomic, disable interrupts in a critical section or use atomic accesses provided by the hardware. +4. **Memory barriers**: After initializing peripherals or swapping control registers, if you need to ensure that subsequent reads/writes take effect on the hardware immediately, please use appropriate DSB/ISB or `atomic_thread_fence`. +5. **Don't pass registers around like global variables**: Try to keep register encapsulations as `constexpr` types/aliases to facilitate static auditing and automatic documentation generation. diff --git a/documents/en/vol8-domains/embedded/03-circular-buffer.md b/documents/en/vol8-domains/embedded/03-circular-buffer.md index b68ff0373..fff3f32c1 100644 --- a/documents/en/vol8-domains/embedded/03-circular-buffer.md +++ b/documents/en/vol8-domains/embedded/03-circular-buffer.md @@ -19,38 +19,38 @@ tags: title: Circular Buffer Implementation translation: source: documents/vol8-domains/embedded/03-circular-buffer.md - source_hash: 244238468099658b6070bdd00700e98ca2c24f4e869ccb425b756eb1ff99b464 - translated_at: '2026-05-26T11:37:34.986895+00:00' + source_hash: 8c134e19ee132d94c025e8b4c70083d7d6ca8206d7b828f8d8fb6396ee391a86 + translated_at: '2026-06-14T00:20:56.830619+00:00' engine: anthropic - token_count: 980 + token_count: 981 --- # Embedded C++ Tutorial — Circular Buffer -In the embedded world, one type of problem pops up again and again: **a data source continuously produces data, a consumer processes it slowly, and we want to avoid `malloc` in between.** Enter an ancient yet timeless data structure — the **circular buffer (also known as a ring buffer)**. +In the embedded world, one problem recurs constantly: **a data source produces data continuously, a consumer processes it slowly, and we want to avoid `malloc` in between.** Thus, an ancient but timeless data structure takes the stage—the **Circular Buffer (Ring Buffer)**. -Think of it as a warehouse with a fixed size; once full, it starts over from the beginning. No resizing, no fragmentation, no "new failed" errors. It is perfectly suited for MCUs, drivers, interrupts, DMA, UARTs, audio streams, and more. +You can think of it as a warehouse with a fixed size; when it's full, we start over from the beginning. No resizing, no fragmentation, no "new failed," making it perfect for MCUs, drivers, interrupts, DMA, serial ports, audio streams, and other scenarios. ------ ## Why Does Embedded Love Circular Buffers So Much? -In the PC world, we can freely `new` and `std::vector::push_back`. But in embedded systems, these operations sound dangerous: +In the PC world, we can freely `malloc` and `new`. But in embedded systems, these operations sound dangerous: -- Heap memory is small and prone to fragmentation -- We cannot call `malloc` in an interrupt context -- Real-time systems cannot tolerate unpredictable latency +- Heap memory is small and prone to fragmentation. +- We cannot `malloc` within an interrupt context. +- Real-time systems cannot tolerate unpredictable latency. -The characteristics of a circular buffer, however, make it practically tailor-made for embedded: +The characteristics of a circular buffer are practically tailor-made for embedded systems: -- **Fixed size, determined at compile time or initialization** -- **O(1) enqueue / dequeue** -- **Contiguous memory, cache-friendly** -- **No dynamic allocation required** -- **Simple to implement, easy to make lock-free / interrupt-safe** +- **Fixed size, determined at compile time or initialization.** +- **O(1) enqueue / dequeue.** +- **Contiguous memory, cache-friendly.** +- **No dynamic allocation required.** +- **Simple implementation, easy to make lock-free / interrupt-safe.** -To sum it up in one sentence: +To summarize in one sentence: -> **It's not clever, but it's reliable.** +> **It isn't smart, but it is reliable.** ------ @@ -58,231 +58,200 @@ To sum it up in one sentence: A circular buffer is essentially: -- A fixed-size array +- A fixed-size array. - Two indices: - - `head`: write position - - `tail`: read position + - `write_idx`: The write position. + - `read_idx`: The read position. -When an index reaches the end of the array, it **wraps around to the beginning**, forming a circle. - -```cpp - -[ 0 ][ 1 ][ 2 ][ 3 ][ 4 ][ 5 ] - ↑ ↑ - tail head +When an index reaches the end of the array, it **wraps around to the beginning**, like a circle. +```mermaid +graph LR + A[Buffer Array] --> B[write_idx] + A --> C[read_idx] + B -- "Write Data" --> D[Move write_idx] + C -- "Read Data" --> E[Move read_idx] ``` -Writing data: move `head` -Reading data: move `tail` +Writing data: Move `write_idx`. +Reading data: Move `read_idx`. There is only one key question to figure out: -👉 **How do we distinguish between "full" and "empty"?** +👉 **How to distinguish "full" from "empty"?** ------ -## How to Distinguish "Empty" and "Full"? (A Classic Puzzle) +## How to Distinguish "Empty" and "Full"? (The Classic Puzzle) There are three common approaches: -1. **Waste one element (most common)** -2. Maintain an additional `count` -3. Use an additional `full` flag +1. **Waste one element (most common).** +2. Maintain an extra `count`. +3. Use an extra `bool` flag. -In embedded development, **approach 1 is the most popular**: simple, unambiguous, and logically clear. The rules are: +In embedded systems, **Approach 1 is the most popular**: simple, unambiguous, and logically clear. The rules are: -- Buffer size is `N` -- It can actually store a maximum of `N - 1` elements +- Buffer size is `Capacity + 1`. +- Actual maximum storage is `Capacity` elements. - Condition checks: - - Empty: `head == tail` - - Full: `(head + 1) % N == tail` + - Empty: `read_idx == write_idx` + - Full: `(write_idx + 1) % Size == read_idx` -Yes, we sacrifice one slot in exchange for a lifetime of peace of mind. +Yes, we sacrifice one slot to buy a lifetime of peace. ------ ## A Clean C++ Circular Buffer Implementation -Below is a **no dynamic memory, templated, embedded-friendly** implementation. +Below is a **no-dynamic-memory, templated, embedded-friendly** implementation. ### Basic Interface Design ```cpp -#pragma once -#include -#include +template +class CircularBuffer { + // Actual array size = User available capacity + 1 + T data_[Capacity + 1]; + size_t read_idx_ = 0; + size_t write_idx_ = 0; -template -class RingBuffer { public: - bool push(const T& value); - bool pop(T& out); - - bool empty() const; - bool full() const; - - std::size_t size() const; - std::size_t capacity() const { return Capacity - 1; } - -private: - std::array buffer_{}; - std::size_t head_ = 0; - std::size_t tail_ = 0; + // ... methods }; - ``` Note one detail: -👉 **`Capacity` actual array size = user-usable capacity + 1** +👉 **`data_[Capacity + 1]` actual array size = user available capacity + 1** ------ -## Enqueue (push): Move Forward One Step +## Enqueue (push): Step Forward ```cpp -template -bool RingBuffer::push(const T& value) -{ +bool push(const T& item) { if (full()) { - return false; // 缓冲区满了 + return false; // Buffer full } - buffer_[head_] = value; - head_ = (head_ + 1) % Capacity; + data_[write_idx_] = item; + write_idx_ = (write_idx_ + 1) % (Capacity + 1); return true; } - ``` -There is no dark magic here: +There is no black magic here: -- First, check if it is full -- Write the data -- Move `head` -- If it reaches the end, wrap around to the beginning +- First check if full. +- Write data. +- Move `write_idx_`. +- If at the end, wrap to the beginning. -**O(1), it will never be slow.** +**O(1), never slow.** ------ -## Dequeue (pop): The Consumer Takes the Stage +## Dequeue (pop): The Consumer Enters ```cpp -template -bool RingBuffer::pop(T& out) -{ +bool pop(T& item) { if (empty()) { - return false; // 没数据 + return false; // Buffer empty } - out = buffer_[tail_]; - tail_ = (tail_ + 1) % Capacity; + item = data_[read_idx_]; + read_idx_ = (read_idx_ + 1) % (Capacity + 1); return true; } - ``` Equally simple: -- If empty, fail -- Read the data -- Move `tail` +- Fail if empty. +- Read data. +- Move `read_idx_`. ------ -## State Check Functions +## Status Check Functions ```cpp -template -bool RingBuffer::empty() const -{ - return head_ == tail_; -} - -template -bool RingBuffer::full() const -{ - return (head_ + 1) % Capacity == tail_; +bool empty() const { + return read_idx_ == write_idx_; } -template -std::size_t RingBuffer::size() const -{ - if (head_ >= tail_) { - return head_ - tail_; - } - return Capacity - (tail_ - head_); +bool full() const { + return (write_idx_ + 1) % (Capacity + 1) == read_idx_; } - ``` -The `size()` pattern is very common in embedded development, -avoiding complex branching without using an additional counter. +The `full()` check is very common in embedded systems; it avoids complex branching and doesn't use an extra counter. ------ ## A Real-World Embedded Use Case -### UART Reception (ISR + Main Loop) +### Serial Reception (ISR + Main Loop) ```cpp -RingBuffer rx_buffer; +CircularBuffer rx_buffer; -void USART_IRQHandler() -{ - uint8_t data = UART_Read(); - rx_buffer.push(data); // 中断里只做这件事 +// UART Interrupt Service Routine +void USART1_IRQHandler() { + if (USART1->ISR & USART_ISR_RXNE) { + uint8_t data = USART1->RDR; + rx_buffer.push(data); // Non-blocking write + } } -int main() -{ +// Main Loop +int main() { while (1) { - uint8_t ch; - if (rx_buffer.pop(ch)) { - process_char(ch); + uint8_t byte; + if (rx_buffer.pop(byte)) { + process_byte(byte); // Process slowly } + // Do other tasks... } } - ``` -This approach has several deeply embedded-friendly advantages: +This approach has several very "embedded" advantages: -- The logic inside the ISR is extremely short -- No `malloc` -- The main loop processes data at its own pace -- Even if processing is a bit slow, it will not block interrupts +- The logic inside the ISR is extremely short. +- No `malloc`. +- The main loop processes data at its own pace. +- Even if processing is slow, it won't block the interrupt. ------ -## A Practical Note on Thread Safety / Interrupt Safety +## A Reality Check on Thread Safety / Interrupt Safety The implementation above is: -- **Single producer + single consumer** -- One runs in an interrupt, the other in the main loop +- **Single Producer + Single Consumer (SPSC)** +- One runs in an interrupt, the other in the main loop. On many MCUs, this is **naturally safe** (as long as index reads and writes are atomic). -But if you encounter any of the following situations: +However, if you encounter one of the following situations: -- Multithreading -- Multiple producers -- SMP -- RTOS inter-task communication +- Multithreading. +- Multiple producers. +- SMP (Symmetric Multi-Processing). +- Communication between RTOS tasks. -Then you will need: +You will need: -- Disabling interrupts -- Atomic variables -- Or a mutex / spinlock +- Critical sections (disable interrupts). +- Atomic variables. +- Or a mutex / spinlock. ------ -## Comparing with std::queue / std::vector +## Comparison with std::queue / std::vector -| Approach | Dynamic Allocation | Deterministic | Embedded-Friendly | -| -------------- | ------------------ | ------------- | ----------------- | -| std::vector | Yes | No | ❌ | -| std::queue | Depends on underlying container | No | ❌ | -| Circular Buffer | No | Yes | ✅ | +| Approach | Dynamic Allocation | Deterministic | Embedded Friendly | +| ------------- | ------------------ | ------------- | ----------------- | +| std::vector | Yes | No | ❌ | +| std::queue | Depends on underlying container | No | ❌ | +| Circular Buffer | No | Yes | ✅ | diff --git a/documents/en/vol8-domains/embedded/04-intrusive-containers.md b/documents/en/vol8-domains/embedded/04-intrusive-containers.md index 4be0919ca..442eb5684 100644 --- a/documents/en/vol8-domains/embedded/04-intrusive-containers.md +++ b/documents/en/vol8-domains/embedded/04-intrusive-containers.md @@ -5,7 +5,7 @@ cpp_standard: - 14 - 17 - 20 -description: Intrusive container design +description: Intrusive Container Design difficulty: intermediate order: 4 platform: stm32f1 @@ -19,132 +19,155 @@ tags: title: Intrusive Container Design translation: source: documents/vol8-domains/embedded/04-intrusive-containers.md - source_hash: 24e237b2960b248f9f2dc4c56c20e8e807a888d72f4844b5541a5197c450bd42 - translated_at: '2026-05-26T11:37:35.486345+00:00' + source_hash: be6a1adfb9f0ecf819e11505b29abc841596da95c16afb75d38001765af4d2f5 + translated_at: '2026-06-14T00:21:18.768776+00:00' engine: anthropic - token_count: 1424 + token_count: 1425 --- -# Modern C++ for Embedded Systems Tutorial — Intrusive Container Design +# Modern C++ for Embedded: Intrusive Container Design -Do you remember what standard containers do to your data? They copy pointers, allocate nodes, maintain extra memory layouts, and at some point silently chew up your cache locality. Intrusive containers are more straightforward: data objects stick their own hands out to act as list nodes — who's paying for extra memory and indirection? Not me. +Do you remember what standard containers do with your data? They copy pointers, allocate nodes, maintain extra memory layouts, and silently devour your cache locality at some point. Intrusive containers are more straightforward: the data objects stick their own hands out to act as list nodes—who needs extra memory and indirection? Not me. ------ -## What Are Intrusive Containers, and Why Are They So Great for Embedded +## What are Intrusive Containers and Why are They Great for Embedded Systems -The key point of intrusive containers: node information (next/prev/...) lives directly inside the user object, rather than allocating a separate node to wrap the object pointer. The advantages are obvious: +The key point of intrusive containers is that node information (next/prev/...) is placed directly inside the user object, rather than allocating a separate node wrapper to hold the object pointer. The advantages are obvious: -- Zero extra allocation — no need to malloc/new a wrapper on every push (extremely important). -- Better cache locality — objects and metadata are together, making traversal faster. -- Smaller memory footprint and determinism — very friendly for memory-constrained or real-time systems. +- **Zero extra allocation** — No need to `malloc`/`new` a wrapper on every `push` (crucial). +- **Better cache locality** — Objects and metadata are together, making traversal faster. +- **Smaller memory footprint and determinism** — Very friendly for memory-constrained or real-time systems. -The disadvantages are equally straightforward: +The disadvantages are equally direct: -- Objects are coupled to the container interface (intrusion), requiring source modifications to the object structure. -- If an object needs to be in multiple lists simultaneously, it requires multiple "hook" members or multiple inheritance. -- Improper use can lead to dangling pointers or duplicate insertions, requiring more careful lifecycle management. +- **Objects are coupled to the container interface** (intrusive), requiring modifications to the object structure. +- **If an object needs to be in multiple lists simultaneously**, it requires multiple "hook" members or multiple inheritance. +- **Misuse can lead to dangling pointers or duplicate insertion issues**, requiring more careful lifecycle management. -Applicable scenarios: task schedulers, free-block lists, driver lists, kernel/RTOS data structures, memory pool free-lists, and more. +**Applicable scenarios:** task schedulers, free-lists for idle blocks, driver lists, kernel/RTOS data structures, memory pool free-lists, etc. ------ ## Two Common Implementation Strategies -1. **Base class hook (inheritance)**: Objects inherit a hook base class that contains next/prev. Type-safe, and easy to cast. -2. **Member hook**: Objects contain a hook member (more flexible, allowing multiple hook instances), but this requires the `container_of` trick to convert a hook pointer back to an object pointer. +1. **Base class hook (inheritance)**: The object inherits from a hook base class that contains `next`/`prev`. It is type-safe and easy to cast. +2. **Member hook**: The object contains a hook member (more flexible, allows multiple hook instances), but requires the `offsetof` technique to convert the hook pointer back to the object pointer. -Below, we first implement a clean, ready-to-use "base class hook" doubly linked list (suitable for tutorials and embedded), and then discuss the ideas and caveats of the member hook approach. +Below, we will first implement a clean, ready-to-use "base class hook" doubly linked list (suitable for tutorials and embedded systems), and then discuss the logic and caveats of member hooks. ------ -## Code: A Simple, Type-Safe Intrusive Doubly Linked List (Inheritance-Based) +## Code: Simple, Type-Safe Intrusive Doubly Linked List (Inheritance-based) -The goal of the following implementation: small and clear, compatible with C++11, and suitable for embedded compilers. +The goal of this implementation: small and clear, C++11 compatible, suitable for embedded compilers. ```cpp -// intrusive_list.h +// intrusive_list.hpp #pragma once -#include -#include - -// Intrusive list node base — 继承它即可成为链表节点 -template -struct IntrusiveListNode { - T* prev = nullptr; - T* next = nullptr; + +// A minimal, type-safe intrusive doubly linked list node. +// T must inherit from IntrusiveNode. +template +class IntrusiveNode { +public: + IntrusiveNode() : prev(nullptr), next(nullptr) {} + + // Check if the node is currently part of a list + bool is_linked() const { + return next != nullptr || prev != nullptr; + } + + // Remove this node from the list. + // Safe to call only if the node is actually linked. + void unlink() { + if (next) { + next->prev = prev; + } + if (prev) { + prev->next = next; + } + next = prev = nullptr; + } + +private: + T* prev; + T* next; + + friend class IntrusiveList; }; -// Intrusive doubly linked list -template +// The intrusive list container itself. +// Does NOT manage memory ownership; it only manages pointers. +template class IntrusiveList { public: IntrusiveList() : head(nullptr), tail(nullptr) {} bool empty() const { return head == nullptr; } - void push_front(T* node) { - assert(node && node->prev == nullptr && node->next == nullptr && "节点必须处于未链接状态"); - node->next = head; - if (head) head->prev = node; - head = node; - if (!tail) tail = node; - } + // Push to the front of the list + void push_front(T* item) { + if (!item) return; - void push_back(T* node) { - assert(node && node->prev == nullptr && node->next == nullptr && "节点必须处于未链接状态"); - node->prev = tail; - if (tail) tail->next = node; - tail = node; - if (!head) head = node; - } + item->IntrusiveNode::next = head; + item->IntrusiveNode::prev = nullptr; - T* pop_front() { - if (!head) return nullptr; - T* n = head; - head = head->next; - if (head) head->prev = nullptr; - else tail = nullptr; - n->next = n->prev = nullptr; - return n; + if (head) { + head->IntrusiveNode::prev = item; + } else { + tail = item; // List was empty + } + head = item; } - void erase(T* node) { - assert(node && "erase null"); - if (node->prev) node->prev->next = node->next; - else head = node->next; + // Push to the back of the list + void push_back(T* item) { + if (!item) return; - if (node->next) node->next->prev = node->prev; - else tail = node->prev; + item->IntrusiveNode::prev = tail; + item->IntrusiveNode::next = nullptr; - node->prev = node->next = nullptr; + if (tail) { + tail->IntrusiveNode::next = item; + } else { + head = item; // List was empty + } + tail = item; } - void clear() { - T* cur = head; - while (cur) { - T* nxt = cur->next; - cur->prev = cur->next = nullptr; - cur = nxt; + // Standard iteration support + T* front() { return head; } + T* back() { return tail; } + const T* front() const { return head; } + const T* back() const { return tail; } + + // Iterator implementation for range-based for loops + class iterator { + public: + iterator(T* ptr) : current(ptr) {} + + T& operator*() { return *current; } + T* operator->() { return current; } + + // Prefix increment + iterator& operator++() { + if (current) current = current->IntrusiveNode::next; + return *this; + } + + // Postfix increment + iterator operator++(int) { + iterator temp = *this; + ++(*this); + return temp; + } + + bool operator!=(const iterator& other) const { + return current != other.current; } - head = tail = nullptr; - } - // 简单迭代器(只读/可写) - struct iterator { - using iterator_category = std::forward_iterator_tag; - using value_type = T; - using pointer = T*; - using reference = T&; - - explicit iterator(T* p) : p(p) {} - reference operator*() const { return *p; } - pointer operator->() const { return p; } - iterator& operator++() { p = p->next; return *this; } - iterator operator++(int) { iterator tmp = *this; ++*this; return tmp; } - bool operator==(const iterator& o) const { return p == o.p; } - bool operator!=(const iterator& o) const { return p != o.p; } private: - T* p; + T* current; }; iterator begin() { return iterator(head); } @@ -154,102 +177,127 @@ private: T* head; T* tail; }; - ``` -**How to use it:** +**How to use:** ```cpp -// example.cpp -#include "intrusive_list.h" -#include +#include "intrusive_list.hpp" +#include -struct Task : IntrusiveListNode { - int id; - Task(int i): id(i) {} +// Example 1: Task Control Block +class Task : public IntrusiveNode { +public: + const char* name; + int priority; + + Task(const char* n, int p) : name(n), priority(p) {} + + void run() { + printf("Running task: %s (Priority %d)\n", name, priority); + } }; int main() { - IntrusiveList runq; - Task a(1), b(2), c(3); + Task task1("Idle", 0); + Task task2("Logger", 10); + Task task3("Network", 5); - runq.push_back(&a); - runq.push_back(&b); - runq.push_front(&c); // 链表顺序: c, a, b + IntrusiveList ready_queue; - for (auto &t : runq) { - std::cout << "Task " << t.id << "\n"; + ready_queue.push_back(&task1); + ready_queue.push_back(&task2); + ready_queue.push_front(&task3); // Network goes to front + + printf("--- Task Queue ---\n"); + for (auto& t : ready_queue) { + t.run(); } - runq.erase(&a); + // Remove a specific task + task2.unlink(); - if (auto p = runq.pop_front()) { - std::cout << "pop " << p->id << "\n"; + printf("\n--- After removing Logger ---\n"); + for (auto& t : ready_queue) { + printf("Task: %s\n", t.name); } -} + return 0; +} ``` -This code can be compiled directly with any embedded-compatible C++ compiler (as long as it supports basic templates and `nullptr`). +This code compiles directly with embedded-compatible C++ compilers (as long as they support basic templates and `constexpr`). ------ -## Member Hook: When an Object Needs to Appear in Multiple Lists +## Member Hook: When Objects Need to Appear in Multiple Lists -The inheritance approach is simple, but if an object needs to belong to multiple lists simultaneously (for example, in both a ready_list and a wait_list), you need multiple hook members or the member hook approach. +The inheritance approach is simple, but if an object needs to belong to multiple lists simultaneously (e.g., in both a `ready_list` and a `wait_list`), you need multiple hook members or use the member hook approach. -The key to the member hook is `container_of` — given a pointer to a hook member, calculating the pointer back to the containing object (a macro commonly used in the Linux kernel). +The key to member hooks is `offsetof` — given a pointer to a hook member, calculate the pointer to the containing object (a macro commonly used in the Linux kernel). -A simple macro-based implementation (clear and widely used): +A simple macro implementation (clear and commonly used): ```cpp -#include // offsetof -#define CONTAINER_OF(ptr, type, member) \ - ((type*) ( (char*)(ptr) - offsetof(type, member) )) +#include + +// A generic hook node for member lists +struct LinkNode { + LinkNode* prev = nullptr; + LinkNode* next = nullptr; +}; +// Helper macro: container_of +// Given ptr (address of member), type (container type), and member (member name) +#define GET_CONTAINER(ptr, type, member) \ + reinterpret_cast(reinterpret_cast(ptr) - offsetof(type, member)) ``` Example structure: ```cpp -struct MyObject { - IntrusiveListNode ready_hook; // for ready list - IntrusiveListNode wait_hook; // for wait list - int data; -}; +class Device { +public: + int id; + LinkNode dev_list_hook; // Hook for global device list + LinkNode ready_hook; // Hook for ready queue + LinkNode wait_hook; // Hook for wait queue -// 操作 ready list 时,将传入 &obj->ready_hook,然后用 CONTAINER_OF 转回 MyObject* + Device(int i) : id(i) {} +}; +// Usage: +// Device* d = GET_CONTAINER(node_ptr, Device, dev_list_hook); ``` -The member hook is more flexible, but requires special attention during use: `offsetof` must match the actual member name, and it is strongly recommended to check whether the hook is already linked before insertion (to avoid duplicate insertions). +Member hooks are more flexible, but require special care when using: the `member` name in `GET_CONTAINER` must match the actual member name; and it is strongly recommended to check if the hook is already linked before insertion to avoid duplicate insertion. ------ -## Design Recommendations and Pitfall Guide +## Design Advice and Pitfall Prevention -1. **Object lifecycles must be explicit**: Nodes in a list must be removed from all lists before being destroyed. Otherwise, dangling pointers will occur, and the consequence is usually a hard-to-locate crash. -2. **Check state before insertion**: Add a `bool linked` field or assertion to the hook to prevent duplicate insertions. Make good use of `assert` in test code. -3. **Prefer member hooks for multi-hook requirements**: If an object switches between multiple containers frequently, member hooks are more flexible. -4. **Be careful with memory barriers/atomicity in concurrent scenarios**: If you need to manipulate lists in an ISR or on multi-core systems, you must use locks, atomic CAS, or specialized lock-free algorithms (beyond the scope of this article). -5. **Provide an RAII wrapper**: Consider providing a small `IntrusiveListGuard` or `ScopedUnlink` to ensure objects are safely unregistered when exceptions or early returns occur. Embedded code might not have exceptions, but RAII helps write safer cleanup code. -6. **Debug information**: During development, printing node states (id/address/prev/next) can quickly help locate errors. -7. **Don't overuse them**: Intrusive containers are not a silver bullet. If you don't care about per-allocation overhead, or if the object is immutable (third-party library), don't intrude into the object. Standard `std::list`/`vector` are simpler, safer, and easier to maintain. +1. **Object lifecycles must be explicit**: Nodes in a list must be removed from all lists before being destroyed. Otherwise, dangling pointers will appear, often leading to hard-to-locate crashes. +2. **Check state before insertion**: Add an `is_linked` field or assertion to the hook to prevent duplicate insertion. Use `assert` frequently in test code. +3. **Prefer member hooks for multiple hook requirements**: If an object switches between containers frequently, member hooks are more flexible. +4. **Be careful with memory barriers/atomicity in concurrent scenarios**: If you operate on lists in an ISR or multi-core environment, you must use locks, atomic CAS, or specialized lock-free algorithms (beyond the scope of this article). +5. **Provide RAII wrappers**: Consider providing a small `ScopeGuard` or `IntrusiveListAutoUnlink` to ensure objects are safely unlinked on exceptions or early returns. Embedded code might not use exceptions, but RAII helps write safer release code. +6. **Debug information**: During development, printing node status (id/address/prev/next) can quickly pinpoint errors. +7. **Don't abuse them**: Intrusive containers are not a silver bullet. If you don't care about per-allocation overhead or the object is immutable (third-party library), don't intrude on the object; standard `std::list`/`std::vector` are simpler, safer, and easier to maintain. ------ ## When to Choose Intrusive Containers -In embedded, kernel, and real-time systems, resources and latency are the top priorities, making intrusive data structures a very natural choice in these scenarios. They are particularly suitable for: +In embedded / kernel / real-time systems, resources and latency are top priorities. Intrusive data structures are a very natural choice in these scenarios. They are particularly suitable for: -- Systems that require determinism and avoid heap allocation (bootloaders, RTOS kernels). -- High-performance free-lists, task queues, timer wheels, and more. -- Scenarios where you want the smallest possible memory footprint. +- Systems requiring determinism and avoiding heap allocation (bootloaders, RTOS kernels). +- High-performance free-lists, task queues, timer wheels, etc. +- Scenarios where minimal memory footprint is desired. -If you are working on standard application-layer business logic, or if objects come from third-party libraries (where you cannot modify the structure), the maintenance cost of an intrusive approach may outweigh the benefits. +If you are working on general application-layer business logic, or if objects come from third-party libraries (where structure modification is impossible), the maintenance cost of intrusive solutions may outweigh the benefits. ------ ## Conclusion -The idea behind intrusive containers is not complicated: let the data take responsibility for its own "positioning." However, this requires you to have a clearer understanding of the object's responsibilities — who inserts it, who removes it, and when it gets removed. Turn those responsibilities into code, and turn that code into conventions. For embedded systems, this is a very pragmatic engineering philosophy: save a byte of memory, gain a bit more determinism. +The idea behind intrusive containers isn't complex: let the data take responsibility for its own "position". However, this requires you to be clearer about the object's responsibilities—who inserts it, who deletes it, and when it is deleted. Codify these responsibilities into code, and then turn that code into standards. For embedded systems, this is a very "pragmatic" engineering philosophy: save a bit of memory, gain a bit of determinism. diff --git a/documents/roadmap/index.md b/documents/roadmap/index.md index 8081ab60e..024651e0c 100644 --- a/documents/roadmap/index.md +++ b/documents/roadmap/index.md @@ -1,13 +1,13 @@ --- title: "学习路线图" -description: "从零基础到工程实战的现代 C++ 学习路径——按背景选起点、十卷递进详解、学习节奏与配套资源" +description: "从零基础到嵌入式实战的现代 C++ 学习路径——按背景选起点、十卷递进详解、学习节奏与配套资源" --- # 学习路线图 -这份路线图告诉你:**这套教程该怎么学、从哪里开始、每一卷教什么**。 +这套教程是一份系统化的现代 C++ 学习材料,**十卷从入门一路走到嵌入式实战**。这份路线图只回答三个问题:该怎么学、从哪里开始、每一卷教什么。 -它面向「想系统掌握现代 C++」的人——无论你是零基础、有 C/嵌入式背景,还是已经会写 C++ 想补齐工程能力。下面先按背景选起点,再逐卷展开。 +无论你是零基础、有 C/嵌入式底子,还是已经会写 C++ 想补齐工程能力,下面都先帮你按背景选起点,再逐卷展开。 > 这里是**学习路线图**(读者怎么学)。项目本身的开发进展与规划是另一回事,见文末[内容成熟度与项目路线图](#内容成熟度与项目路线图)。 @@ -19,168 +19,150 @@ description: "从零基础到工程实战的现代 C++ 学习路径——按背 基础 → 现代特性 → 标准库 → 高级 → 并发 → 性能 → 工程 → 领域实战 ``` -- **不是语法速查**:每个关键概念都配可编译的 CMake 示例,能跑、能改、能验证。 -- **卷与卷之间有依赖**:后面的卷默认你掌握了前面的核心(尤其卷一→卷二是关键分水岭)。 -- **可以跳读**:有相关背景的读者不必从卷一第一页读起,按下面的「三条路径」选起点即可。 -- **配套资源随时查**:[C++ 特性参考卡](/cpp-reference/)(C++98→C++23 速查)、[实战项目](/projects/)、[课程笔记](/vol10-open-lecture-notes/)。 +几点先说清楚: + +- **不是语法速查**。每个关键概念都配可编译的 CMake 示例,能跑、能改、能验证。 +- **卷与卷之间有依赖**。后面的卷默认你掌握了前面的核心,其中**卷一 → 卷二是最关键的分水岭**——过了卷二,你才算真正进入「现代 C++」。 +- **可以跳读**。有相关背景的读者不必从卷一第一页读起,按下面的「三条路径」选起点即可。 +- **配套资源随时查**。[C++ 特性参考卡](/cpp-reference/)(按标准版本 + 功能类别双视图速查)、[实战项目](/projects/)、[课程笔记](/vol10-open-lecture-notes/)。 ## 三条学习路径(按背景选起点) ```mermaid flowchart TD - Start(["你的背景?"]) - A["零基础 / 仅会 C"] --> V1["卷一 基础入门
(含 C 速通)"] - B["有 C 或嵌入式经验"] --> V2a["卷二 现代特性
(补现代写法)"] - C["已会 C++"] --> Goal["按目标选专题"] + Start(["你的背景?"]) --> A["零基础 / 仅会 C"] + Start --> B["有 C 或嵌入式经验"] + Start --> C["已会 C++"] + + A --> V1["卷一 基础入门
含 C 速通"] + B --> V2["卷二 现代特性"] + V1 --> V2 - V1 --> V2["卷二 现代特性"] V2 --> V3["卷三 标准库"] - V3 --> V4["卷四 高级"] V2 --> V5["卷五 并发"] + V3 --> V4["卷四 高级"] + V4 --> V5 V5 --> V6["卷六 性能"] V6 --> V7["卷七 工程"] V7 --> V8["卷八 领域/嵌入式"] - V2a --> V8 - V8 --> V9["卷九 开源研读"] - V8 --> V10["卷十 课程笔记"] - - Goal --> Conc["卷五 并发"] - Goal --> Perf["卷六 性能"] - Goal --> Eng["卷七 工程"] - Goal --> Src["卷九 开源研读"] - Goal --> Adv["卷四 高级前沿"] - - Start --> A - Start --> B - Start --> C + + C --> Goal["按目标选专题"] + Goal --> V4 + Goal --> V5 + Goal --> V6 + Goal --> V7 + Goal --> V9["卷九 开源研读"] + + V8 --> V9 + V8 -.穿插.-> V10["卷十 课程笔记"] ``` -- **路径 A · 零基础 / 仅会 C**:从 [卷一](/vol1-fundamentals/) 开始(含 C 语言速通),沿主线一卷卷走。最稳,也最长。 -- **路径 B · 有 C 或嵌入式经验**:你的语法底子够,直接进 [卷二](/vol2-modern-features/) 补「现代 C++ 写法」,然后扎进 [卷八 嵌入式](/vol8-domains/) 实战,按需补并发(五)/性能(六)/工程(七)。 -- **路径 C · 已会 C++**:按目标直取专题——要并发读 [卷五](/vol5-concurrency/),要性能读 [卷六](/vol6-performance/),要工程化读 [卷七](/vol7-engineering/),想读大型源码读 [卷九](/vol9-open-source-project-learn/),追前沿读 [卷四](/vol4-advanced/)。 +**路径 A · 零基础 / 仅会 C** —— 起点 [卷一](/vol1-fundamentals/)(含完整的 C 语言速通)。沿主线一卷一卷走,最稳,也最长。跳读策略:已有编程经验的话,C 速通可快速过,重点啃值类别、OOP、模板初步。 + +**路径 B · 有 C 或嵌入式经验** —— 你的语法底子够,直接进 [卷二](/vol2-modern-features/) 补「现代 C++ 写法」,然后扎进 [卷八 嵌入式](/vol8-domains/) 实战;并发(卷五)、性能(卷六)、工程(卷七)按需补。 + +**路径 C · 已会 C++** —— 按目标直取专题:要并发/异步读 [卷五](/vol5-concurrency/),要性能读 [卷六](/vol6-performance/),要工程化读 [卷七](/vol7-engineering/),想读大型源码读 [卷九](/vol9-open-source-project-learn/),追前沿读 [卷四](/vol4-advanced/)。 ## 卷级详解 ### 卷一 · 基础入门 -- **定位**:从零建立 C++ 完整知识体系,含一份完整的 C 语言速通教程。 -- **关键主题**:环境搭建 · 类型系统与值类别 · 控制流与函数 · 指针与引用 · 数组与字符串(`std::array`/`std::string`) · 类与面向对象 · 继承与多态 · 模板初步与异常。 -- **规模**:约 102 篇(全仓最厚)。 -- **亮点**:C 语言速通复习 · 值类别深度剖析 · 智能指针预览 · STL 初见。 -- **难度 / 前置**:入门 → 中级 / 无前置。 -- **建议节奏**:零基础全读;有基础可跳过 C 速通,重点看值类别、OOP、模板初步。 +- **定位**:从零建立完整的 C++ 知识体系,是全仓的地基与起点;附一份完整的 C 语言速通(含嵌入式相关的高级 C)。 +- **关键主题**:环境搭建 · 类型系统与值类别 · 控制流与函数 · 指针与引用 · 数组与字符串 · 类与面向对象 · 运算符重载 · 继承与多态 · 模板初步 · 异常处理 · STL 初见 · 内存模型基础;C 速通覆盖指针精髓、结构体与对齐、C 陷阱、嵌入式 C 模式。 +- **难度 · 前置**:入门 → 中级 / 无前置。 +- **建议节奏**:零基础全读;有底子的跳过 C 速通,重点看值类别、OOP、模板初步——这几块决定后面顺不顺。这卷正在重写(快速入门 → 全栈入门),章节可能微调,但核心主题稳定。 ### 卷二 · 现代特性 - **定位**:系统掌握 C++11/14/17 核心特性,是「会写 C++」与「会写现代 C++」的关键分水岭。 -- **关键主题**:移动语义与右值引用 · 智能指针与 RAII · `constexpr` 编译期计算 · Lambda 与函数式 · 类型安全(`enum class`/`variant`/`optional`) · 结构化绑定 · `auto`/`decltype`/`string_view`/`filesystem`。 -- **规模**:约 56 篇。 -- **亮点**:移动语义实践 · RAII 深度剖析 · Lambda 捕获详解 · `string_view` 性能与陷阱。 -- **难度 / 前置**:中级 / 卷一。 -- **建议节奏**:核心转折卷,务必精读;这一卷决定你后续所有卷的顺畅度。 +- **关键主题**:移动语义与右值引用 · 智能指针与 RAII · `constexpr` 编译期计算 · Lambda 与函数式 · 类型安全(`enum class`/`variant`/`optional`) · 结构化绑定 · `auto`/`decltype` · 属性 · `string_view` · `filesystem` · 现代错误处理(`optional`/`expected`) · 用户自定义字面量。 +- **难度 · 前置**:中级 / 卷一。 +- **建议节奏**:核心转折卷,务必精读;它决定你后续所有卷的顺畅度。 ### 卷三 · 标准库深入 - **定位**:STL 容器与字符串的实现细节、性能与内存底层机制。 -- **关键主题**:`vector` 动态扩容与迭代器失效 · `string` 内存模型与小字符串优化 · `char8_t` 与 UTF-8 · `span` · 自定义分配器 · 对象大小与平凡类型。 -- **规模**:约 8 篇(篇幅小但深)。 -- **亮点**:`vector` 实现与性能分析 · `string` 内存模型深度剖析 · 自定义分配器。 -- **难度 / 前置**:中级 / 卷一、卷二。 -- **建议节奏**:按需精读;做性能敏感或嵌入式开发时回头看。 +- **关键主题**:`vector` 三指针表示 / 扩容 / 迭代器失效 · `string` 内存模型与小字符串优化 · `char8_t` 与 UTF-8 · `array` · `span` · 对象大小与平凡类型 · 自定义分配器。 +- **难度 · 前置**:中级 / 卷一、卷二。 +- **建议节奏**:篇幅小而深,按需精读;做性能敏感或嵌入式开发时回头看。其中 `vector`/`string`/`char8_t` 几篇最稳,可先读,其余在重写中。 ### 卷四 · 高级主题 -- **定位**:C++20/23/26 前沿特性与元编程技术,写库、写高性能泛型代码的人必经。 -- **关键主题**:模板体系(C++11→23) · 协程与调度器 · Ranges 与管道式编程 · 三路比较 `<=>` · 空基类优化 · Modules。 -- **规模**:约 12 篇(部分章节重写中)。 -- **亮点**:协程调度器实现 · Ranges 管道实践 · 三路比较运算符 · C++ Modules(MSVC)。 -- **难度 / 前置**:高级 / 卷二、卷三。 -- **建议节奏**:先读协程、Ranges、三路比较三块;其余随用随补。 +- **定位**:C++20/23 前沿特性与元编程技术,写库、写高性能泛型代码的人必经。 +- **关键主题**:协程(基础 + 调度器实现) · Ranges(views + 管道实践) · 三路比较 `<=>` · 空基类优化 · C++ Modules(MSVC) · 指定初始化。 +- **难度 · 前置**:高级 / 卷二、卷三。 +- **建议节奏**:先读协程、Ranges、三路比较三块;模板体系(C++11→23 元编程)等内容在规划中,随用随补。 ### 卷五 · 并发编程 -- **定位**:从线程原语到协程异步,建立完整并发判断力(先正确再性能、先锁再无锁、先同步再任务)。 -- **关键主题**:线程生命周期与 RAII · 互斥与同步原语 · `atomic` 与六种内存序 · 无锁数据结构(SPSC/MPMC) · `future` 与线程池 · 协程与事件循环 · Actor/Channel。 -- **规模**:约 44 篇 + 9 个练习项目(Lab 0–5 + Capstone)。 -- **亮点**:内存序详解 · 无锁队列 · 协程 Echo 服务器 · Mini Concurrent Runtime(Capstone)。 -- **难度 / 前置**:中高 / 卷一 ~ 卷四。 -- **建议节奏**:规模最大、配套 Lab 最多,强烈建议动手做 Lab,不要只读。 +- **定位**:从线程原语到协程异步,建立完整并发判断力——先正确再性能、先锁再无锁、先同步再任务。 +- **关键主题**:线程生命周期与 RAII · 互斥与同步原语(含 `latch`/`barrier`/`semaphore`) · `atomic` 与六种内存序 · 无锁数据结构(SPSC/MPMC 队列) · `future` 与线程池 · 协程与事件循环(Echo 服务器) · Actor/Channel。 +- **难度 · 前置**:中高 / 卷一 ~ 卷四。 +- **建议节奏**:全仓投入最重、动手项目最多的一卷,配有 Lab 0–5 + Capstone(Mini Concurrent Runtime)。强烈建议动手做 Lab,别只读。 ### 卷六 · 性能优化 -- **定位**:CPU 缓存、SIMD、汇编阅读、优化模式等 C++ 性能核心技术。 -- **关键主题**:内联与编译器优化 · 性能与代码大小评估 · AVX/AVX2。 -- **规模**:3 篇(重写扩充中)。 -- **亮点**:内联与编译器优化 · AVX/AVX2 深入。 -- **难度 / 前置**:中高 / 卷五。 -- **建议节奏**:内容正在扩充;先建立缓存层级与 SIMD 直觉,后续按专题深入。 +- **定位**:编译器优化、代码大小评估、SIMD 等 C++ 性能核心技术。 +- **关键主题**:内联与编译器优化(破除「`inline` = 性能开关」的迷思) · 性能与代码大小评估 · AVX/AVX2。 +- **难度 · 前置**:中高 / 卷五。 +- **建议节奏**:内容在扩充;先建立缓存层级与 SIMD 直觉,后续按专题深入。 ### 卷七 · 工程实践 - **定位**:C++ 软件工程落地——构建、交叉编译、链接、调试、平台开发。 -- **关键主题**:CMake 与交叉编译 · 编译器选项 · 链接器与链接脚本 · WSL 开发 · MSVC 调试 · 文件 I/O 实践。 -- **规模**:约 8 篇。 -- **亮点**:交叉编译与 CMake · 链接器与链接脚本 · 文件拷贝器(完整 I/O 项目) · MSVC 调试原理。 -- **难度 / 前置**:中级 / 建议先读「编译与链接深入」。 -- **建议节奏**:配合 [编译与链接](/compilation/) 一起学;按当前工程栈挑读。 +- **关键主题**:CMake 与交叉编译 · 编译器选项 · 链接器与链接脚本 · WSL 开发 · MSVC 调试原理 · C++ Modules(VS2026) · 文件 I/O(文件拷贝器实战)。 +- **难度 · 前置**:中级 / 建议先读「编译与链接深入」。 +- **建议节奏**:配合[编译与链接](/compilation/)一起学,按当前工程栈挑读。 ### 编译与链接深入 - **定位**:C/C++ 编译、链接、静态/动态库、符号可见性的底层机制,是工程实践的基础。 -- **关键主题**:编译链接概述 · 静态库 · 动态库设计与原则 · 符号可见性 · 运行时加载 · 库搜索逻辑。 -- **规模**:约 10 篇。 -- **亮点**:动态库设计 · 符号可见性(ABI 层控制) · 动态库作为可执行文件。 -- **难度 / 前置**:中级 / C++ 基础。 -- **建议节奏**:作为卷七的前置;做嵌入式/交叉编译前必读。 +- **关键主题**:编译链接概述 · 复用与库的概念 · 静态库 · 动态库(设计原则 / 符号可见性 / 运行时加载 / 库检索逻辑 / 动态库可执行化)。 +- **难度 · 前置**:中级 / C++ 基础。 +- **建议节奏**:作为卷七的前置;做嵌入式/交叉编译前必读。这卷内容已完整稳定。 ### 卷八 · 领域应用 -- **定位**:现代 C++ 在各垂直领域的实战,**主线是嵌入式**(STM32F1/F4)。 -- **关键主题**:STM32 环境搭建 · LED/按键/UART 全流程(从 C 重构到 C++23) · 零开销抽象 · 寄存器访问 · 中断安全 ·(网络/GUI/数据存储 规划中)。 -- **规模**:约 75 篇(其中嵌入式 62 篇)。 -- **亮点**:LED 点灯 13 篇系列 · UART 串口 13 篇系列(含协程/`expected`/concepts) · 中断安全的代码 · 嵌入式零开销抽象。 -- **难度 / 前置**:中级 / 卷一 ~ 卷七。 -- **建议节奏**:嵌入式是当前最完整的领域主线,按外设循序渐进;有 STM32 板子可同步实操。 +- **定位**:现代 C++ 在各垂直领域的实战,**主线是嵌入式**。 +- **关键主题**:STM32(**仅 STM32F1,如 Blue Pill;暂无 F4**)环境搭建 · LED / 按键 / UART 三条全流程(从 C 写法重构到 C++23 模板封装) · 零开销抽象 · 类型安全寄存器访问 · 循环缓冲 / 对象池 / 侵入式容器等嵌入式模式 · 中断安全;另有 C++ 深潜(指针语义系列)。网络 / GUI / 数据存储 / 算法等子域在规划中。 +- **难度 · 前置**:中级 / 卷一 ~ 卷七。 +- **建议节奏**:嵌入式是当前最完整的领域主线,按外设循序渐进;手头有 STM32F1 板子可同步实操。 ### 卷九 · 开源项目学习 -- **定位**:分析工业级开源项目源码,学真实世界的 C++ 设计与实现。 -- **关键主题**:Chromium `OnceCallback` 回调设计机制 ·(更多项目规划中)。 -- **规模**:约 20 篇。 -- **亮点**:OnceCallback——从 Chromium 学到的回调设计(完整系列)。 -- **难度 / 前置**:中高 / 卷一 ~ 卷七(尤其卷四、卷五、卷七)。 +- **定位**:拆解工业级开源项目源码,学真实世界的 C++ 设计与实现。 +- **关键主题**:目前聚焦 Chromium 的 `OnceCallback` 回调组件——从动机、API 设计、核心骨架到 `bind_once`,并穿插 C++23 `deducing this`、`move_only_function` 等前置深潜。更多项目在规划中。 +- **难度 · 前置**:中高 / 卷一 ~ 卷七(尤其卷四、卷五)。 - **建议节奏**:读源码导向;建议先掌握卷四高级特性,再来读工业级实现。 ### 卷十 · 课程与演讲笔记 -- **定位**:CppCon 等技术会议演讲与开源课程的阅读笔记和二次创作。 -- **关键主题**:CppCon 2025——概念泛型编程 · Ranges · 移动语义 · 底层汇编阅读。 -- **规模**:约 24 篇。 -- **亮点**:Concept-based Generic Programming · Back to Basics: Ranges · Back to Basics: Move Semantics。 -- **难度 / 前置**:中级 / 卷一 ~ 卷五。 -- **建议节奏**:用作「深化」——学完对应卷后看相关演讲笔记加固理解。 +- **定位**:CppCon 等技术会议演讲的阅读笔记和二次创作。 +- **关键主题**:目前是 CppCon 2025 四场——Bjarne Stroustrup《Concept-based Generic Programming》、Matt Godbolt《Some Assembly Required》(读汇编 / Compiler Explorer)、Mike Shah《Back to Basics: Ranges》、Ben Saks《Back to Basics: Move Semantics》。 +- **难度 · 前置**:中级 / 卷一 ~ 卷五。 +- **建议节奏**:用作「深化」——学完对应卷后看相关演讲笔记加固理解,穿插在读主线之间。 ## 学习节奏与建议 -- **时间预期**:零基础走完整条主线是长期工程(数百篇 + 实战),别指望速成;按卷设里程碑,每卷配示例动手敲。 -- **推荐顺序**:严格按 一→二→三→四→五→六→七→八 的依赖走最稳;有背景则按「三条路径」切入。 -- **跳读策略**:卷一可跳 C 速通;卷三/卷六/卷九篇幅小或扩充中,按需读;卷十可穿插在对应卷之后当复习。 -- **用实战串联**:每学完一块,去 [实战项目](/projects/) 找对应项目练手(协程服务器、并发运行时、嵌入式等),把零散知识捏成完整能力。 +- **时间预期**:零基础走完整条主线是长期工程,别指望速成;按卷设里程碑,每卷配示例动手敲。 +- **推荐顺序**:严格按 一 → 二 → 三 → 四 → 五 → 六 → 七 → 八 的依赖走最稳;有背景则按「三条路径」切入。 +- **跳读策略**:卷一可跳 C 速通;卷三、卷六篇幅小或扩充中,按需读;卷九目前聚焦单一项目,等积累够了再读;卷十穿插在对应卷之后当复习。 +- **用实战串联**:每学完一块,去[实战项目](/projects/)找对应项目练手(协程服务器、并发运行时、嵌入式等),把零散知识捏成完整能力。 ## 配套资源 -- [C++ 特性参考卡](/cpp-reference/):C++98→C++23 共约 46 篇速查,按标准版本与功能类别双视图。 -- [贯穿式实战项目](/projects/):把各卷知识串成可交付项目。 +- [C++ 特性参考卡](/cpp-reference/):C++98 → C++23 速查,按标准版本与功能类别双视图组织,每张卡标注嵌入式适用性。 +- [贯穿式实战项目](/projects/):一个项目索引页,把散落在各卷的实战(协程 Echo 服务器、Mini Concurrent Runtime、Chromium OnceCallback 研读等)串起来;规划中还有手写 STL、迷你 HTTP 服务器、迷你 GUI、嵌入式迷你 OS。 - [社区文章](/community/):社区来稿与审阅收录,也欢迎你投稿。 - [卷十 课程笔记](/vol10-open-lecture-notes/):CppCon 等顶级演讲的二次创作,深化用。 ## 内容成熟度与项目路线图 -各卷的当前状态(供你判断哪部分内容最扎实): +各卷的当前状态,供你判断哪部分内容最扎实(只给定性判断,不纠结具体篇数): -- ✓ **成熟稳定**:卷一 基础、卷二 现代特性、编译与链接深入。 -- ✦ **推进中**:卷三 标准库、卷四 高级、卷五 并发、卷七 工程、卷八 嵌入式、卷十 课程笔记。 -- ◇ **扩充/规划中**:卷六 性能、卷九 开源研读、卷八的 网络/GUI/数据 子领域。 +- ✓ **成熟稳定**:卷二 现代特性、卷五 并发、编译与链接深入。 +- ✦ **推进中**:卷一 基础(正重写为全栈入门)、卷七 工程、卷八 领域(嵌入式主线已完整)、卷九 开源研读、卷十 课程笔记。 +- ◇ **扩充 / 重写中**:卷三 标准库(半数在重写)、卷四 高级(模板体系等在规划)、卷六 性能(扩充中)。 想看**项目本身的开发规划**(要做什么、发布节奏、TODO 优先级),那是另一份文档: diff --git a/site/.vitepress/config/nav.ts b/site/.vitepress/config/nav.ts index d677830c4..6b777a969 100644 --- a/site/.vitepress/config/nav.ts +++ b/site/.vitepress/config/nav.ts @@ -78,6 +78,6 @@ export const navEn: DefaultTheme.NavItem[] = [ { text: 'Reference', link: '/en/cpp-reference/' }, { text: 'Appendix', link: '/en/appendix/' }, { text: 'Community', link: '/en/community/' }, - { text: 'Roadmap', link: '/en/community/dev/' }, + { text: 'Roadmap', link: '/en/roadmap/' }, { text: 'Team', link: '/en/team/' }, ] diff --git a/site/.vitepress/theme/components/HomeRoadmap.vue b/site/.vitepress/theme/components/HomeRoadmap.vue index 6040c1b65..4aa66a7d9 100644 --- a/site/.vitepress/theme/components/HomeRoadmap.vue +++ b/site/.vitepress/theme/components/HomeRoadmap.vue @@ -1,6 +1,6 @@