Skip to content

Commit 5695de8

Browse files
committed
more UnicodeNormalizer optimalizations
1 parent 2fa6cb2 commit 5695de8

10 files changed

Lines changed: 12 additions & 9 deletions

File tree

include/frog/mblem_mod.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ class Mblem {
6868
void makeUnique();
6969
void add_lemmas( const std::vector<folia::Word*>&,
7070
const frog_data& ) const;
71+
TiCC::UnicodeNormalizer& normalizer(){ return _normalizer; };
7172
private:
7273
icu::UnicodeString call_server( const icu::UnicodeString& );
7374
void read_transtable( const std::string& );

include/frog/mbma_mod.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ class Mbma {
8080
static std::string mbma_tagset;
8181
static std::string pos_tagset;
8282
static std::string clex_tagset;
83+
TiCC::UnicodeNormalizer& normalizer(){ return _normalizer; };
8384
Mbma( const Mbma& ) = delete;
8485
Mbma& operator=( const Mbma& ) = delete;
8586
private:

include/frog/mwu_chunker_mod.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ class Mwu {
9797
std::string mwu_tagset;
9898
icu::UnicodeString glue_tag;
9999
TiCC::UniFilter *filter;
100+
TiCC::UnicodeNormalizer _normalizer;
100101
Mwu( const Mwu& ) = delete; // no copies
101102
Mwu operator=( const Mwu& ) = delete; // no copies
102103
};

src/mblem_mod.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ bool Mblem::fill_ts_map( const string& file ){
9898
return false;
9999
}
100100
UnicodeString line;
101-
while ( TiCC::getline( is, line ) ){
101+
while ( TiCC::getline( is, _normalizer, line ) ){
102102
if ( line.isEmpty() || line[0] == '#' ){
103103
continue;
104104
}

src/mblem_prog.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ bool init(){
174174

175175
void Test( istream& in, ostream& os ){
176176
UnicodeString line;
177-
while ( TiCC::getline( in, line ) ){
177+
while ( TiCC::getline( in, myMblem.normalizer(),line ) ){
178178
if ( line.isEmpty() ) {
179179
os << endl;
180180
continue;

src/mbma_mod.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ void Mbma::init_cgn( const string& main, const string& sub ) {
131131
ifstream tc( main );
132132
if ( tc ){
133133
UnicodeString line;
134-
while ( TiCC::getline( tc, line) ) {
134+
while ( TiCC::getline( tc, _normalizer, line) ) {
135135
vector<UnicodeString> tmp = TiCC::split_at( line, " " );
136136
if ( tmp.size() < 2 ){
137137
LOG << "splitting '" << line << "' failed" << endl;
@@ -146,7 +146,7 @@ void Mbma::init_cgn( const string& main, const string& sub ) {
146146
ifstream tc1( sub );
147147
if ( tc1 ){
148148
UnicodeString line;
149-
while( TiCC::getline( tc1, line ) ) {
149+
while( TiCC::getline( tc1, _normalizer, line ) ) {
150150
vector<UnicodeString> tmp = TiCC::split_at( line, " " );
151151
if ( tmp.size() == 2 ){
152152
TAGconv.insert( make_pair( tmp[0], tmp[1] ) );

src/mbma_prog.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ bool check_next( const UnicodeString& tag ){
183183

184184
void Test( istream& in ){
185185
UnicodeString line;
186-
while ( TiCC::getline( in, line ) ){
186+
while ( TiCC::getline( in, myMbma.normalizer(), line ) ){
187187
line = line.trim();
188188
if ( line.isEmpty() ){
189189
continue;

src/mwu_chunker_mod.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ bool Mwu::read_mwus( const string& fname) {
136136
return false;
137137
}
138138
UnicodeString line;
139-
while( TiCC::getline( mwufile, line ) ) {
139+
while( TiCC::getline( mwufile, _normalizer, line ) ) {
140140
vector<UnicodeString> res1 = TiCC::split_at(line, " ");
141141
if ( res1.size() == 2 ){
142142
vector<UnicodeString> res2 = TiCC::split_at(res1[0], "_");;
@@ -211,7 +211,7 @@ bool Mwu::init( const TiCC::Configuration& config ) {
211211
glue_tag = "SPEC(deeleigen)";
212212
}
213213
else {
214-
glue_tag = TiCC::UnicodeFromUTF8(val);
214+
glue_tag = TiCC::UnicodeFromUTF8(val,_normalizer);
215215
}
216216

217217
string cls = config.lookUp( "outputclass" );

src/ner_tagger_mod.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ bool NERTagger::fill_ners( const string& cat,
125125
int long_err_cnt = 0;
126126
size_t ner_cnt = 0;
127127
UnicodeString line;
128-
while ( TiCC::getline( is, line ) ){
128+
while ( TiCC::getline( is, _normalizer, line ) ){
129129
if ( line.isEmpty() || line[0] == '#' ){
130130
continue;
131131
}

src/tagger_base.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ bool BaseTagger::fill_map( const string& file ){
9191
return false;
9292
}
9393
UnicodeString line;
94-
while( TiCC::getline( is, line ) ){
94+
while( TiCC::getline( is, _normalizer, line ) ){
9595
if ( line.isEmpty() || line[0] == '#' ){
9696
continue;
9797
}

0 commit comments

Comments
 (0)