Skip to content

Commit b5432dd

Browse files
committed
Narrow 聯繫 cross_strait flagging to contact-copy
The previous standalone "聯繫 → 聯絡" rule treated a word that the MoE concise dictionary records as valid zh-TW as a blanket cross-strait term, producing false positives on general prose such as "民間文化聯繫", "國際聯繫", and "保持聯繫". Replace it with explicit compound rules covering the UI/customer-service phrases where 聯絡 is idiomatic TW form: 聯繫我們, 聯繫方式, 聯繫資訊, 聯繫管道, 聯繫電話, 聯繫客服, 如需協助請聯繫. 聯繫歷史 → 通話記錄 is unchanged. Add scanner regression test that locks both directions of the behavior: the new compounds fire exactly once on contact-copy spans, and ordinary prose containing 聯繫 stays silent. Close #89
1 parent 9b977ca commit b5432dd

2 files changed

Lines changed: 143 additions & 4 deletions

File tree

assets/ruleset.json

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3768,6 +3768,13 @@
37683768
"type": "ai_filler",
37693769
"context": "humanize.C7: sycophantic chatbot opener; delete"
37703770
},
3771+
{
3772+
"from": "如需協助請聯繫",
3773+
"to": ["如需協助請聯絡"],
3774+
"type": "cross_strait",
3775+
"context": "@domain IT。客服 CTA;避免把日常用法「聯繫」一概改寫",
3776+
"english": "if you need help, please contact"
3777+
},
37713778
{
37723779
"from": "委託",
37733780
"to": ["委派"],
@@ -9686,11 +9693,25 @@
96869693
"context_clues": ["程式", "軟體", "系統", "電腦", "網路"]
96879694
},
96889695
{
9689-
"from": "聯繫",
9690-
"to": ["聯絡"],
9696+
"from": "聯繫客服",
9697+
"to": ["聯絡客服"],
96919698
"type": "cross_strait",
9692-
"context": "@domain IT",
9693-
"english": "contact"
9699+
"context": "@domain IT。客服 CTA;避免把日常用法「聯繫」一概改寫",
9700+
"english": "contact support"
9701+
},
9702+
{
9703+
"from": "聯繫我們",
9704+
"to": ["聯絡我們"],
9705+
"type": "cross_strait",
9706+
"context": "@domain IT。客服 CTA / contact us copy;避免把日常用法「聯繫」一概改寫",
9707+
"english": "contact us"
9708+
},
9709+
{
9710+
"from": "聯繫方式",
9711+
"to": ["聯絡方式"],
9712+
"type": "cross_strait",
9713+
"context": "@domain IT。聯絡資訊欄位標籤;避免把日常用法「聯繫」一概改寫",
9714+
"english": "contact method"
96949715
},
96959716
{
96969717
"from": "聯繫歷史",
@@ -9699,6 +9720,27 @@
96999720
"context": "@domain 通訊",
97009721
"english": "call history"
97019722
},
9723+
{
9724+
"from": "聯繫管道",
9725+
"to": ["聯絡管道"],
9726+
"type": "cross_strait",
9727+
"context": "@domain IT。聯絡資訊欄位標籤;避免把日常用法「聯繫」一概改寫",
9728+
"english": "contact channel"
9729+
},
9730+
{
9731+
"from": "聯繫資訊",
9732+
"to": ["聯絡資訊"],
9733+
"type": "cross_strait",
9734+
"context": "@domain IT。聯絡資訊欄位標籤;避免把日常用法「聯繫」一概改寫",
9735+
"english": "contact information"
9736+
},
9737+
{
9738+
"from": "聯繫電話",
9739+
"to": ["聯絡電話"],
9740+
"type": "cross_strait",
9741+
"context": "@domain IT。聯絡資訊欄位標籤;避免把日常用法「聯繫」一概改寫",
9742+
"english": "contact phone"
9743+
},
97029744
{
97039745
"from": "聲卡",
97049746
"to": ["音效卡"],

src/engine/scan/mod.rs

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2076,6 +2076,30 @@ mod tests {
20762076
);
20772077
}
20782078

2079+
#[test]
2080+
fn contact_cta_rules_do_not_force_low_editorial_confidence() {
2081+
let ruleset = crate::rules::loader::load_embedded_ruleset().unwrap();
2082+
let scanner = Scanner::new(ruleset.spelling_rules, ruleset.case_rules);
2083+
2084+
let issues = scanner.scan("如需協助請聯繫客服").issues;
2085+
assert_eq!(
2086+
issues.len(),
2087+
1,
2088+
"expected CTA phrase to match once: {issues:?}"
2089+
);
2090+
assert_eq!(issues[0].found, "如需協助請聯繫");
2091+
assert_eq!(issues[0].editorial_confidence, None);
2092+
2093+
let issues = scanner.scan("歡迎聯繫我們").issues;
2094+
assert_eq!(
2095+
issues.len(),
2096+
1,
2097+
"expected contact-us phrase to match once: {issues:?}"
2098+
);
2099+
assert_eq!(issues[0].found, "聯繫我們");
2100+
assert_eq!(issues[0].editorial_confidence, None);
2101+
}
2102+
20792103
// --- positional_clues tests ---
20802104

20812105
#[test]
@@ -2482,6 +2506,79 @@ mod tests {
24822506
);
24832507
}
24842508

2509+
#[test]
2510+
fn lian_xi_contact_copy_uses_phrase_rules_without_general_prose_fp() {
2511+
let rules = vec![
2512+
SpellingRule::new("聯繫我們", vec!["聯絡我們".into()], RuleType::CrossStrait),
2513+
SpellingRule::new("聯繫方式", vec!["聯絡方式".into()], RuleType::CrossStrait),
2514+
SpellingRule::new("聯繫資訊", vec!["聯絡資訊".into()], RuleType::CrossStrait),
2515+
SpellingRule::new("聯繫管道", vec!["聯絡管道".into()], RuleType::CrossStrait),
2516+
SpellingRule::new("聯繫電話", vec!["聯絡電話".into()], RuleType::CrossStrait),
2517+
SpellingRule::new("聯繫客服", vec!["聯絡客服".into()], RuleType::CrossStrait),
2518+
SpellingRule::new(
2519+
"如需協助請聯繫",
2520+
vec!["如需協助請聯絡".into()],
2521+
RuleType::CrossStrait,
2522+
),
2523+
];
2524+
let scanner = Scanner::new(rules, vec![]);
2525+
2526+
let issues = scanner.scan("歡迎聯繫我們").issues;
2527+
assert_eq!(issues.len(), 1, "should flag contact CTA: {issues:?}");
2528+
2529+
let issues = scanner.scan("請查看聯繫方式").issues;
2530+
assert_eq!(issues.len(), 1, "should flag contact label: {issues:?}");
2531+
2532+
let issues = scanner.scan("最新聯繫資訊如下").issues;
2533+
assert_eq!(
2534+
issues.len(),
2535+
1,
2536+
"should flag contact info label: {issues:?}"
2537+
);
2538+
2539+
let issues = scanner.scan("若需協助可參考聯繫管道").issues;
2540+
assert_eq!(
2541+
issues.len(),
2542+
1,
2543+
"should flag contact channel label: {issues:?}"
2544+
);
2545+
2546+
let issues = scanner.scan("聯繫電話:02-1234-5678").issues;
2547+
assert_eq!(
2548+
issues.len(),
2549+
1,
2550+
"should flag contact phone label: {issues:?}"
2551+
);
2552+
2553+
let issues = scanner.scan("請聯繫客服取得協助").issues;
2554+
assert_eq!(issues.len(), 1, "should flag support CTA: {issues:?}");
2555+
2556+
let issues = scanner.scan("如需協助請聯繫").issues;
2557+
assert_eq!(
2558+
issues.len(),
2559+
1,
2560+
"should flag imperative support CTA: {issues:?}"
2561+
);
2562+
2563+
let issues = scanner.scan("我們與學界保持密切聯繫").issues;
2564+
assert!(
2565+
issues.is_empty(),
2566+
"should not flag ordinary prose: {issues:?}"
2567+
);
2568+
2569+
let issues = scanner.scan("請加強國際聯繫").issues;
2570+
assert!(
2571+
issues.is_empty(),
2572+
"should not flag ordinary prose: {issues:?}"
2573+
);
2574+
2575+
let issues = scanner.scan("我們透過電話聯繫對方").issues;
2576+
assert!(
2577+
issues.is_empty(),
2578+
"should not flag ordinary prose: {issues:?}"
2579+
);
2580+
}
2581+
24852582
#[test]
24862583
fn translationese_pipeline_keeps_only_indexed_zy2_issue() {
24872584
let scanner = Scanner::new(vec![], vec![]);

0 commit comments

Comments
 (0)