Skip to content

Commit f7cdf41

Browse files
committed
Add test
1 parent ffb64f2 commit f7cdf41

2 files changed

Lines changed: 107 additions & 4 deletions

File tree

libs/braillify/src/rules/math/rule_19.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@ fn single_numeric(content: &[MathToken]) -> Option<String> {
1414
}
1515
}
1616

17+
fn prev_non_space(tokens: &[MathToken], mut idx: usize) -> Option<&MathToken> {
18+
while idx > 0 {
19+
idx -= 1;
20+
let token = tokens.get(idx)?;
21+
if !matches!(token, MathToken::Space) {
22+
return Some(token);
23+
}
24+
}
25+
None
26+
}
27+
1728
fn is_plain_numeric_subscript(content: &[MathToken]) -> bool {
1829
content
1930
.iter()
@@ -63,6 +74,17 @@ pub fn encode_subscript(
6374
return Ok(true);
6475
}
6576

77+
if let Some(base) = single_numeric(content)
78+
&& matches!(prev_non_space(tokens, *i), Some(MathToken::Number(_)))
79+
{
80+
result.push(48);
81+
result.push(38);
82+
rule_1::encode_number_literal(&base, result);
83+
result.push(52);
84+
*i += 1;
85+
return Ok(false);
86+
}
87+
6688
result.push(48);
6789
if should_group_subscript(content) {
6890
result.push(55);
@@ -85,6 +107,27 @@ pub fn encode_subscript(
85107
Ok(false)
86108
}
87109

110+
#[cfg(test)]
111+
mod tests {
112+
use super::super::encoder::encode_math_expression;
113+
114+
#[test]
115+
fn encodes_number_base_notation_without_explicit_subscript_parentheses() {
116+
assert_eq!(
117+
encode_math_expression("1010₂").expect("math encoding should succeed"),
118+
vec![60, 1, 26, 1, 26, 48, 38, 60, 3, 52]
119+
);
120+
}
121+
122+
#[test]
123+
fn encodes_number_base_notation_with_explicit_subscript_parentheses() {
124+
assert_eq!(
125+
encode_math_expression("1101₍₂₎").expect("math encoding should succeed"),
126+
vec![60, 1, 1, 26, 1, 48, 38, 60, 3, 52]
127+
);
128+
}
129+
}
130+
88131
pub struct SubscriptRule;
89132

90133
impl MathTokenRule for SubscriptRule {

libs/braillify/src/rules/token_rules/math_expression.rs

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ fn is_middle_dot_numeric_word(chars: &[char]) -> bool {
6868
.all(|c| c.is_ascii_digit() || matches!(*c, '\u{00B7}' | '\u{22C5}' | '\u{2212}' | '-'))
6969
}
7070

71-
fn has_adjacent_korean_word(tokens: &[Token<'_>], index: usize) -> bool {
71+
fn adjacent_korean_word_flags(tokens: &[Token<'_>], index: usize) -> (bool, bool) {
7272
let prev_has_korean = index
7373
.checked_sub(1)
7474
.and_then(|mut i| {
@@ -95,6 +95,11 @@ fn has_adjacent_korean_word(tokens: &[Token<'_>], index: usize) -> bool {
9595
}
9696
};
9797

98+
(prev_has_korean, next_has_korean)
99+
}
100+
101+
fn has_adjacent_korean_word(tokens: &[Token<'_>], index: usize) -> bool {
102+
let (prev_has_korean, next_has_korean) = adjacent_korean_word_flags(tokens, index);
98103
prev_has_korean || next_has_korean
99104
}
100105

@@ -132,6 +137,37 @@ fn is_strong_mixed_math_candidate(chars: &[char], text: &str) -> bool {
132137
|| has_combining_mark
133138
}
134139

140+
fn should_wrap_math_sentence(chars: &[char], text: &str) -> bool {
141+
if chars.len() <= 1 {
142+
return false;
143+
}
144+
145+
let has_letters = chars.iter().any(|c| c.is_ascii_alphabetic());
146+
let has_digits = chars.iter().any(|c| c.is_ascii_digit());
147+
let has_math_symbol = chars
148+
.iter()
149+
.any(|c| math_symbol_shortcut::is_math_symbol_char(*c));
150+
let has_superscript = chars.iter().any(|c| is_superscript(*c));
151+
let has_subscript = chars.iter().any(|c| is_subscript(*c));
152+
let has_combining_mark = chars.iter().any(|c| is_combining_math_mark(*c));
153+
let has_math_operator = chars.iter().any(|c| {
154+
matches!(
155+
c,
156+
'+' | '=' | '>' | '<' | '.' | ',' | '-' | '\u{2212}' | '/' | '!'
157+
)
158+
});
159+
let has_brackets = chars
160+
.iter()
161+
.any(|c| matches!(c, '(' | ')' | '[' | ']' | '{' | '}'));
162+
163+
is_strong_mixed_math_candidate(chars, text)
164+
|| (has_digits && (has_math_operator || has_math_symbol || has_brackets))
165+
|| (has_letters && has_digits)
166+
|| (has_letters && has_brackets)
167+
|| (has_letters && has_math_operator)
168+
|| (has_superscript || has_subscript || has_combining_mark)
169+
}
170+
135171
fn try_encode_math_slice(chars: &[char]) -> Option<Vec<u8>> {
136172
if chars.is_empty() || chars.iter().any(|c| is_korean_char(*c)) {
137173
return None;
@@ -247,14 +283,14 @@ fn is_math_expression(chars: &[char], text: &str) -> bool {
247283
.first()
248284
.is_some_and(|c| math_symbol_shortcut::is_math_symbol_char(*c));
249285

250-
// Number-base notation like 1010₂ should not be treated as generic math expression.
286+
// Number-base notation like 1010₂ is a math expression and should use the math engine.
251287
if chars.first().is_some_and(|c| c.is_ascii_digit())
252288
&& chars.iter().any(|c| matches!(*c, '\u{2080}'..='\u{2089}'))
253289
&& chars
254290
.iter()
255291
.all(|c| c.is_ascii_digit() || matches!(*c, '\u{2080}'..='\u{2089}'))
256292
{
257-
return false;
293+
return true;
258294
}
259295

260296
// Common phone/date/range tokens like 02-799-1000 should stay non-math.
@@ -577,7 +613,25 @@ impl TokenRule for MathExpressionTokenRule {
577613

578614
// Try to encode via math engine
579615
match math::encoder::encode_math_expression(text) {
580-
Ok(bytes) => Ok(TokenAction::Replace(Token::PreEncoded(bytes))),
616+
Ok(bytes) => {
617+
let (prev_has_korean, next_has_korean) = adjacent_korean_word_flags(tokens, index);
618+
let should_wrap = should_wrap_math_sentence(&word.chars, text);
619+
let mut wrapped = Vec::with_capacity(
620+
bytes.len()
621+
+ usize::from(prev_has_korean && should_wrap)
622+
+ usize::from(next_has_korean && should_wrap),
623+
);
624+
625+
if prev_has_korean && should_wrap {
626+
wrapped.push(0);
627+
}
628+
wrapped.extend_from_slice(&bytes);
629+
if next_has_korean && should_wrap {
630+
wrapped.push(0);
631+
}
632+
633+
Ok(TokenAction::Replace(Token::PreEncoded(wrapped)))
634+
}
581635
Err(_) => {
582636
// If math encoding fails, let the character-level rules handle it
583637
Ok(TokenAction::Noop)
@@ -679,6 +733,12 @@ mod tests {
679733
assert!(is_math_expression(&chars, "⅔"));
680734
}
681735

736+
#[test]
737+
fn test_is_math_base_notation() {
738+
let chars: Vec<char> = "1010₂".chars().collect();
739+
assert!(is_math_expression(&chars, "1010₂"));
740+
}
741+
682742
#[test]
683743
fn split_mixed_math_word_extracts_math_prefix() {
684744
let chars: Vec<char> = "tan의".chars().collect();

0 commit comments

Comments
 (0)