Skip to content

Commit e67ba5b

Browse files
committed
Merge branch 'deep'
2 parents 8337bd0 + b97a6a0 commit e67ba5b

6 files changed

Lines changed: 19 additions & 23 deletions

File tree

src/checkmbma.cxx

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,13 @@ void check_word( const UnicodeString& _word, bool doMor ){
7979
if ( uword != ls ){
8080
return;
8181
}
82-
myMbma.Classify( ls );
83-
vector<UnicodeString> anas = myMbma.getResult();
82+
myMbma.Classify( ls, "" );
83+
vector<pair<UnicodeString,string>> anas = myMbma.getResults(true);
8484
set<UnicodeString> fails;
8585
for ( const auto& ana : anas ){
86+
UnicodeString flat = flatten(ana.first);
8687
bool lem_found = false;
87-
vector<UnicodeString> mors = TiCC::split_at_first_of( ana, "[]" );
88+
vector<UnicodeString> mors = TiCC::split_at_first_of( flat, "[]" );
8889
bool first = true;
8990
for ( const auto& mor : mors ){
9091
UnicodeString mor1 = mor;

src/froggen.cxx

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -368,12 +368,12 @@ void create_mblem_trainfile( const multimap<UnicodeString, map<UnicodeString, ma
368368
exit( EXIT_FAILURE );
369369
}
370370
UnicodeString outLine;
371-
for ( const auto& it : data ){
372-
UnicodeString wordform = it.first;
371+
for ( const auto& data_it : data ){
372+
UnicodeString wordform = data_it.first;
373373
UnicodeString safeInstance;
374374
if ( !outLine.isEmpty() ){
375375
string out = UnicodeToUTF8(outLine);
376-
out.erase( out.length()-1 );
376+
out.erase( out.length()-1 ); // remove the final '|'
377377
os << out << endl;
378378
outLine.remove();
379379
}
@@ -410,7 +410,7 @@ void create_mblem_trainfile( const multimap<UnicodeString, map<UnicodeString, ma
410410
outLine = instance;
411411
}
412412
multimap<size_t, multimap<UnicodeString,UnicodeString>,std::greater<size_t>> sorted;
413-
for ( const auto& it2 : it.second ){
413+
for ( const auto& it2 : data_it.second ){
414414
for ( const auto& it3: it2.second ){
415415
multimap<UnicodeString,UnicodeString> mm;
416416
mm.insert(make_pair(it3.first,it2.first));
@@ -441,11 +441,10 @@ void create_mblem_trainfile( const multimap<UnicodeString, map<UnicodeString, ma
441441
thisform = wordform;
442442
if ( tag.indexOf(it.first.c_str()) >= 0 ){
443443
// the POS tag matches, so potentially yes
444-
int part_pos = -1;
445444
UnicodeString part;
446445
for ( const auto& p : it.second ){
447446
// loop over potential particles.
448-
part_pos = thisform.indexOf(p.c_str());
447+
int part_pos = thisform.indexOf(p.c_str());
449448
if ( part_pos != -1 ){
450449
part = p.c_str();
451450
if ( debug ){

src/makembma.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ int main(int argc, char * const argv[] ) {
164164
exit(1);
165165
}
166166
parts.erase(parts.begin());
167-
vector<Rule *> r = myMbma.execute( word, parts );
167+
vector<Rule *> r = myMbma.execute( word, "", parts );
168168
if ( r.empty() ){
169169
cerr << "problems with entry: '" << line << "'" << endl;
170170
continue;

src/morgen.cxx

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ void create_instance_file( const string& inpname, const string& outname ){
146146
exit(1);
147147
}
148148
parts.erase(parts.begin());
149-
vector<Rule *> r = myMbma.execute( word, parts );
149+
vector<Rule *> r = myMbma.execute( word, "", parts );
150150
if ( r.empty() ){
151151
cerr << "problems with entry: '" << line << "'" << endl;
152152
continue;
@@ -240,13 +240,13 @@ int main(int argc, char * const argv[] ) {
240240
TiCC::Configuration frog_config = use_config;
241241
// frog_config.clearatt( "configDir", "global" );
242242
string inpname = names[0];
243-
string outname = outputdir + base_name + ".data";
243+
string data_out_name = outputdir + base_name + ".data";
244244
string treename = use_config.lookUp( "treeFile", "mbma" );
245245
if ( treename.empty() ){
246246
treename = base_name + ".tree";
247247
}
248248

249-
string cgndir;
249+
string cgndir;
250250
bool cgn_opt = opts.extract( "cgn", cgndir );
251251
if ( cgn_opt ){
252252
use_config.setatt( "cgnDir", cgndir, "mbma" );
@@ -295,11 +295,10 @@ int main(int argc, char * const argv[] ) {
295295

296296
frog_config.setatt( "treeFile", treename, "mbma" );
297297
string full_treename = outputdir + treename;
298-
create_instance_file( inpname, outname );
299-
create_instance_base( outname, full_treename );
298+
create_instance_file( inpname, data_out_name );
299+
create_instance_base( data_out_name, full_treename );
300300

301301
frog_config.clearatt( "baseName", "mbma" );
302-
string mbma_set_name = use_config.lookUp( "set", "mbma" );
303302

304303
string cfg_out;
305304
if ( configfile.empty() ){

src/nerv.cxx

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ int main(){
1717
string ner = parts[1];
1818
string prev = "_";
1919
string cur = parts[2];
20-
string next;
2120
while ( getline( cin, line ) ){
2221
parts = TiCC::split_at( line, "\t" );
2322
if ( parts.empty() ){
@@ -37,17 +36,15 @@ int main(){
3736
ner = parts[1];
3837
prev = cur;
3938
cur = parts[2];
40-
next = "_";
4139
}
4240
}
4341
else {
44-
next = parts[2];
45-
cout << word << "\t" << "\t" << ner << "\t" << prev << "\t" << cur << "\t" << next << endl;
42+
cout << word << "\t" << "\t" << ner << "\t" << prev << "\t"
43+
<< cur << "\t" << parts[2] << endl;
4644
word = parts[0];
4745
ner = parts[1];
4846
prev = cur;
4947
cur = parts[2];
50-
next = "ERR";
5148
}
5249
}
5350
}

src/testmbma.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,13 @@ void Test( istream& in, bool deep ){
145145
UnicodeString uWord = parts[0];
146146
uWord.toLower();
147147
parts.erase(parts.begin());
148-
vector<Rule *> rules = myMbma.execute( uWord, parts );
148+
vector<Rule *> rules = myMbma.execute( uWord, "", parts );
149149
if ( rules.empty() ){
150150
cout << "no rule matched: " << line << endl;
151151
}
152152
else {
153153
for ( auto const& r : rules ){
154-
cout << uWord << "==> " << r->morpheme_string( deep )
154+
cout << uWord << "==> " << r->pretty_string( deep )
155155
<< " " << r->tag << endl;
156156
delete r;
157157
}

0 commit comments

Comments
 (0)