Skip to content

Commit 816185b

Browse files
committed
use new functions from ticcutils Unicode.h for small speedup and
smaller memory footprint
1 parent cdccf26 commit 816185b

3 files changed

Lines changed: 11 additions & 11 deletions

File tree

include/ucto/tokenize.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ namespace Tokenizer {
381381

382382
void appendText( folia::FoliaElement * ) const;
383383

384-
TiCC::UnicodeNormalizer normalizer;
384+
mutable TiCC::UnicodeNormalizer normalizer;
385385
std::string inputEncoding;
386386

387387
const UnicodeString& detect_type( UChar32 );

src/setting.cxx

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ namespace Tokenizer {
292292
}
293293
else {
294294
UnicodeString line;
295-
while ( TiCC::getline( f, line ) ){
295+
while ( TiCC::getline( f, _normalizer, line ) ){
296296
line.trim();
297297
if ((line.length() > 0) && (line[0] != '#')) {
298298
if ( tokDebug >= 5 ){
@@ -329,7 +329,7 @@ namespace Tokenizer {
329329
}
330330
else {
331331
UnicodeString line;
332-
while ( TiCC::getline( f, line ) ){
332+
while ( TiCC::getline( f, _normalizer, line ) ){
333333
line.trim();
334334
if ((line.length() > 0) && (line[0] != '#')) {
335335
if ( tokDebug >= 5 ){
@@ -370,7 +370,7 @@ namespace Tokenizer {
370370
}
371371
else {
372372
UnicodeString line;
373-
while ( TiCC::getline( f, line ) ){
373+
while ( TiCC::getline( f, _normalizer, line ) ){
374374
line.trim();
375375
if ((line.length() > 0) && (line[0] != '#')) {
376376
if ( tokDebug >= 5 ){
@@ -431,7 +431,7 @@ namespace Tokenizer {
431431
}
432432
else {
433433
UnicodeString line;
434-
while ( TiCC::getline( f, line ) ){
434+
while ( TiCC::getline( f, _normalizer, line ) ){
435435
line.trim();
436436
if ((line.length() > 0) && (line[0] != '#')) {
437437
if ( tokDebug >= 5 ){
@@ -650,7 +650,7 @@ namespace Tokenizer {
650650
}
651651
int rule_count = 0;
652652
UnicodeString line;
653-
while ( TiCC::getline( f, line ) ){
653+
while ( TiCC::getline( f, _normalizer, line ) ){
654654
if ( line.indexOf( "%include" ) != -1 ){
655655
UnicodeString file = UnicodeString(line, 9 );
656656
switch ( mode ){
@@ -850,7 +850,7 @@ namespace Tokenizer {
850850
if ( !add_tokens.empty() ){
851851
ifstream adt( add_tokens );
852852
UnicodeString line;
853-
while ( TiCC::getline( adt, line ) ){
853+
while ( TiCC::getline( adt, _normalizer, line ) ){
854854
UnicodeString entry = escape_regex( line );
855855
if ( !entry.isEmpty() ){
856856
if ( !patterns[TOKENS].isEmpty() ){

src/tokenize.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -720,7 +720,7 @@ namespace Tokenizer {
720720
args["name"] = s.second->set_file;
721721
args["generate_id"] = "next()";
722722
args["type"] = "datasource";
723-
args["version"] = TiCC::UnicodeToUTF8(s.second->version);
723+
args["version"] = TiCC::UnicodeToUTF8(s.second->version,normalizer);
724724
doc->add_processor( args, data_proc );
725725
args.clear();
726726
args["processor"] = proc->id();
@@ -1368,7 +1368,7 @@ namespace Tokenizer {
13681368
if ( !ids.empty() ){
13691369
args["generate_id"] = ids;
13701370
}
1371-
args["class"] = TiCC::UnicodeToUTF8(tok.type);
1371+
args["class"] = TiCC::UnicodeToUTF8(tok.type,normalizer);
13721372
if ( tok.role & NOSPACE ){
13731373
args["space"] = "no";
13741374
}
@@ -1528,7 +1528,7 @@ namespace Tokenizer {
15281528
// New elements
15291529
folia::KWargs args;
15301530
args["xml:id"] = orig->generateId( "tokenized" );
1531-
args["class"] = TiCC::UnicodeToUTF8(tok.type);
1531+
args["class"] = TiCC::UnicodeToUTF8(tok.type,normalizer);
15321532
if ( tok.role & NOSPACE ){
15331533
args["space"] = "no";
15341534
}
@@ -3583,7 +3583,7 @@ namespace Tokenizer {
35833583
}
35843584
else {
35853585
set_file = it->second->set_file;
3586-
version = TiCC::UnicodeToUTF8(it->second->version);
3586+
version = TiCC::UnicodeToUTF8(it->second->version,normalizer);
35873587
return true;
35883588
}
35893589
}

0 commit comments

Comments
 (0)