sysprog21 · jserv · May 6, 2026 · May 6, 2026
diff --git a/assets/ruleset.json b/assets/ruleset.json
@@ -1267,7 +1267,8 @@
       "type": "cross_strait",
       "context": "@domain IT。tw「優化」泛用於商業；IT optimize 改「最佳化」以區分 improve",
       "english": "optimize",
-      "negative_context_clues": ["流程", "體驗", "服務"]
+      "negative_context_clues": ["流程", "體驗", "服務"],
+      "editorial_confidence": "low"
     },
     {
       "from": "優步",
@@ -1302,7 +1303,7 @@
       "from": "元數據",
       "to": [],
       "type": "cross_strait",
-      "context": "@domain 資料。源自希臘文 meta- (關於) + data (資料)，原意為「描述資料的資料」",
+      "context": "@domain 資料。源自希臘文 meta- (關於) + data (資料)，原意為「描述資料的資料」。preferred: metadata；可接受: 詮釋資料 / 後設資料；rejected: 元資料",
       "english": "metadata"
     },
     {
@@ -1319,6 +1320,13 @@
       "context": "@domain 程式設計",
       "english": "metaprogramming/meta-programming"
     },
+    {
+      "from": "元資料",
+      "to": [],
+      "type": "cross_strait",
+      "context": "@domain 資料。`元資料` 為機械式 Sinification (從 `元數據` 字面替換而來)，無 NAER / MoE 立足點。preferred: metadata；可接受替代: 詮釋資料 / 後設資料",
+      "english": "metadata"
+    },
     {
       "from": "元音",
       "to": ["母音"],
@@ -3489,7 +3497,8 @@
       "type": "confusable",
       "context": "限 IT 語境。電影/戲劇場景為正確 tw 用法",
       "english": "scenario",
-      "context_clues": ["應用", "部署", "測試", "系統", "開發", "架構", "軟件", "軟體", "程式", "行程", "核心", "記憶體", "CPU"]
+      "context_clues": ["應用", "部署", "測試", "系統", "開發", "架構", "軟件", "軟體", "程式", "行程", "核心", "記憶體", "CPU"],
+      "editorial_confidence": "low"
     },
     {
       "from": "塑料",
@@ -7416,8 +7425,9 @@
       "from": "消息",
       "to": ["訊息"],
       "type": "cross_strait",
-      "context": "@domain IT",
-      "english": "message"
+      "context": "@domain IT。`好消息`/`壞消息`/`消息來源` 為合法 zh-TW 用法",
+      "english": "message",
+      "positional_clues": ["not_after:好", "not_after:壞", "not_before:來源"]
     },
     {
       "from": "消息環",
@@ -9097,7 +9107,8 @@
       "type": "cross_strait",
       "context": "@domain 程式設計",
       "english": "algorithm",
-      "exceptions": ["演算法"]
+      "exceptions": ["演算法"],
+      "editorial_confidence": "low"
     },
     {
       "from": "箭頭操作符",

diff --git a/benches/scanner.rs b/benches/scanner.rs
@@ -480,6 +480,7 @@ fn bench_cpu_attribution_100kb(c: &mut Criterion) {
         heading_severity_boost: false,
         political_stance: PoliticalStance::RocCentric,
         offset_only: false,
+        exempt_blockquotes: false,
     };
 
     // Spelling-only config.

diff --git a/build.rs b/build.rs
@@ -33,6 +33,16 @@ struct SpellingRule {
     positional_clues: Option<Vec<String>>,
     #[serde(default)]
     tags: Option<Vec<String>>,
+    #[serde(default)]
+    editorial_confidence: Option<EditorialConfidence>,
+}
+
+#[derive(serde::Serialize, serde::Deserialize)]
+#[serde(rename_all = "lowercase")]
+enum EditorialConfidence {
+    High,
+    Medium,
+    Low,
 }
 
 #[derive(serde::Serialize, serde::Deserialize)]

diff --git a/scripts/check-ruleset.py b/scripts/check-ruleset.py
@@ -78,6 +78,7 @@ def dedup_sort(
     "negative_context_clues",
     "positional_clues",
     "tags",
+    "editorial_confidence",
 }
 
 # Field order for spelling rules (stable, human-scannable output).
@@ -93,6 +94,7 @@ def dedup_sort(
     "positional_clues",
     "exceptions",
     "tags",
+    "editorial_confidence",
 ]
 
 CASE_FIELD_ORDER = ["term", "alternatives", "disabled"]

diff --git a/src/cache.rs b/src/cache.rs
@@ -63,6 +63,10 @@ pub struct ScanParams {
     pub translationese_domain: String,
     // AI threshold level (formatted f32) — different multipliers produce different results.
     pub ai_threshold: String,
+    // Markdown blockquote-exemption flag — changes which spans get
+    // scanned, so cache hits must be invalidated when toggled.
+    #[serde(default)]
+    pub exempt_blockquotes: bool,
 }
 
 /// A single cached entry.
@@ -420,6 +424,7 @@ mod tests {
             detect_translationese: false,
             translationese_domain: "general".into(),
             ai_threshold: "1.0".into(),
+            exempt_blockquotes: false,
         }
     }
 
@@ -433,6 +438,7 @@ mod tests {
             detect_translationese: false,
             translationese_domain: "general".into(),
             ai_threshold: "1.0".into(),
+            exempt_blockquotes: false,
         }
     }
 

diff --git a/src/config.rs b/src/config.rs
@@ -26,6 +26,38 @@ pub struct ProjectConfig {
     pub suppressions: Option<String>,
     pub packs: Option<Vec<String>>,
     pub translation_memory: Option<String>,
+    pub markdown: Option<MarkdownConfig>,
+    pub glossary: Option<GlossaryConfig>,
+}
+
+/// Markdown-specific scanning options (35.7).
+#[derive(Debug, Default, Deserialize)]
+#[serde(default)]
+pub struct MarkdownConfig {
+    /// When true, treat pulldown-cmark `Tag::BlockQuote` ranges as
+    /// exclusion zones.  Useful for documents that quote mainland-Chinese
+    /// sources for illustrative purposes.  Off by default.
+    pub exempt_blockquotes: Option<bool>,
+}
+
+/// Project glossary section (35.9).  Layered above the embedded ruleset
+/// and pack store but below banned-term enforcement and translation
+/// memory.  Precedence: glossary `banned` > TM > glossary `preferred` >
+/// domain pack > embedded ruleset.
+#[derive(Debug, Default, Deserialize)]
+#[serde(default)]
+pub struct GlossaryConfig {
+    /// Terms that must always be flagged regardless of context clues.
+    /// E.g. ["線程", "內存"] forces those calques to fire even in
+    /// otherwise ambiguous prose.
+    pub banned: Option<Vec<String>>,
+    /// Project-preferred zh-TW forms.  Used by the consistency report
+    /// (35.1) to choose the canonical suggestion when both TW-preferred
+    /// and CN-preferred variants appear in the same document.
+    pub preferred: Option<Vec<String>>,
+    /// Names that should never be flagged (added to the suppression
+    /// list).  E.g. ["TSMC", "MediaTek"].
+    pub proper_nouns: Option<Vec<String>>,
 }
 
 impl ProjectConfig {