@@ -259,7 +259,7 @@ def join(self):
259259
260260class WhereParser (Thread ):
261261
262- def __init__ (self , phrases , tables_of_from , columns_of_values_of_where , count_keywords , sum_keywords , average_keywords , max_keywords , min_keywords , greater_keywords , less_keywords , between_keywords , negation_keywords , junction_keywords , disjunction_keywords , like_keywords , distinct_keywords , database_dico ):
262+ def __init__ (self , phrases , tables_of_from , columns_of_values_of_where , count_keywords , sum_keywords , average_keywords , max_keywords , min_keywords , greater_keywords , less_keywords , between_keywords , negation_keywords , junction_keywords , disjunction_keywords , like_keywords , distinct_keywords , database_dico , database_object ):
263263 Thread .__init__ (self )
264264 self .where_objects = []
265265 self .phrases = phrases
@@ -279,6 +279,7 @@ def __init__(self, phrases, tables_of_from, columns_of_values_of_where, count_ke
279279 self .like_keywords = like_keywords
280280 self .distinct_keywords = distinct_keywords
281281 self .database_dico = database_dico
282+ self .database_object = database_object
282283
283284 def get_tables_of_column (self , column ):
284285 tmp_table = []
@@ -359,6 +360,13 @@ def predict_junction(self, previous_column_offset, current_column_offset):
359360 else :
360361 return 'OR'
361362
363+ def uniquify (self , list ):
364+ already = []
365+ for element in list :
366+ if element not in already :
367+ already .append (element )
368+ return already
369+
362370 def run (self ):
363371 number_of_where_columns = 0
364372 columns_of_where = []
@@ -379,13 +387,18 @@ def run(self):
379387
380388 for phrase in self .phrases :
381389 for i in range (0 , len (phrase )):
382- for table in self .database_dico :
383- if phrase [i ] in self .database_dico [table ]:
384- number_of_where_columns += 1
385- columns_of_where .append (phrase [i ])
386- offset_of [phrase [i ]] = i
387- column_offset .append (i )
388- break
390+ for table_name in self .database_dico :
391+ columns = self .database_object .get_table_by_name (table_name ).get_columns ()
392+ for column in columns :
393+ if (phrase [i ] == column .get_name ()) or (phrase [i ] in column .get_equivalences ()):
394+ number_of_where_columns += 1
395+ columns_of_where .append (column .get_name ())
396+ offset_of [phrase [i ]] = i
397+ column_offset .append (i )
398+ break
399+ else :
400+ continue
401+ break
389402
390403 phrase_keyword = str (phrase [i ]).lower () # for robust keyword matching
391404
@@ -416,6 +429,8 @@ def run(self):
416429 if phrase_keyword in self .like_keywords : # after the column
417430 self .like_keyword_offset .append (i )
418431
432+ print (self .columns_of_values_of_where )
433+ print (columns_of_where )
419434
420435 for table_of_from in self .tables_of_from :
421436 where_object = Where ()
@@ -437,7 +452,7 @@ def run(self):
437452 operation_type = self .predict_operation_type (previous , current )
438453
439454 if len (self .columns_of_values_of_where ) > i :
440- value = self .columns_of_values_of_where [i ]
455+ value = self .columns_of_values_of_where [len ( self . columns_of_values_of_where ) - len ( columns_of_where ) + i ]
441456 else :
442457 value = 'OOV' # Out Of Vocabulary: default value
443458
@@ -452,12 +467,13 @@ def join(self):
452467
453468class GroupByParser (Thread ):
454469
455- def __init__ (self , phrases , tables_of_from , database_dico ):
470+ def __init__ (self , phrases , tables_of_from , database_dico , database_object ):
456471 Thread .__init__ (self )
457472 self .group_by_objects = []
458473 self .phrases = phrases
459474 self .tables_of_from = tables_of_from
460475 self .database_dico = database_dico
476+ self .database_object = database_object
461477
462478 def get_tables_of_column (self , column ):
463479 tmp_table = []
@@ -479,11 +495,12 @@ def run(self):
479495 group_by_object = GroupBy ()
480496 for phrase in self .phrases :
481497 for i in range (0 , len (phrase )):
482- for table in self .database_dico :
483- if phrase [i ] in self .database_dico [table ]:
484- column = self .get_column_name_with_alias_table (
485- phrase [i ], table_of_from )
486- group_by_object .set_column (column )
498+ for table_name in self .database_dico :
499+ columns = self .database_object .get_table_by_name (table_name ).get_columns ()
500+ for column in columns :
501+ if (phrase [i ] == column .get_name ()) or (phrase [i ] in column .get_equivalences ()):
502+ column_with_alias = self .get_column_name_with_alias_table (column .get_name (), table_of_from )
503+ group_by_object .set_column (column_with_alias )
487504 self .group_by_objects .append (group_by_object )
488505
489506 def join (self ):
@@ -493,14 +510,15 @@ def join(self):
493510
494511class OrderByParser (Thread ):
495512
496- def __init__ (self , phrases , tables_of_from , asc_keywords , desc_keywords , database_dico ):
513+ def __init__ (self , phrases , tables_of_from , asc_keywords , desc_keywords , database_dico , database_object ):
497514 Thread .__init__ (self )
498515 self .order_by_objects = []
499516 self .phrases = phrases
500517 self .tables_of_from = tables_of_from
501518 self .asc_keywords = asc_keywords
502519 self .desc_keywords = desc_keywords
503520 self .database_dico = database_dico
521+ self .database_object = database_object
504522
505523 def get_tables_of_column (self , column ):
506524 tmp_table = []
@@ -531,10 +549,12 @@ def run(self):
531549 order_by_object = OrderBy ()
532550 for phrase in self .phrases :
533551 for i in range (0 , len (phrase )):
534- for table in self .database_dico :
535- if phrase [i ] in self .database_dico [table ]:
536- column = self .get_column_name_with_alias_table (phrase [i ], table_of_from )
537- order_by_object .add_column (column , self .predict_order (phrase ))
552+ for table_name in self .database_dico :
553+ columns = self .database_object .get_table_by_name (table_name ).get_columns ()
554+ for column in columns :
555+ if (phrase [i ] == column .get_name ()) or (phrase [i ] in column .get_equivalences ()):
556+ column_with_alias = self .get_column_name_with_alias_table (column .get_name (), table_of_from )
557+ order_by_object .add_column (column_with_alias , self .predict_order (phrase ))
538558 self .order_by_objects .append (order_by_object )
539559
540560 def join (self ):
@@ -624,27 +644,35 @@ def parse_sentence(self, sentence, stopwordsFilter=None):
624644 med_phrase = ''
625645 end_phrase = ''
626646
647+ ''' @todo merge this part of the algorithm (detection of values of where) in the rest of the parsing algorithm (about line 725) '''
648+
627649 for i in range (0 , len (input_word_list )):
628- if input_word_list [i ] in self .database_dico :
629- if number_of_table_temp == 0 :
630- start_phrase = input_word_list [:i ]
631- number_of_table_temp += 1
632- last_table_position_temp = i
633- for table in self .database_dico :
634- if input_word_list [i ] in self .database_dico [table ]:
635- if number_of_where_column_temp == 0 :
636- med_phrase = input_word_list [
637- len (start_phrase ):last_table_position_temp + 1 ]
638- number_of_where_column_temp += 1
639- break
650+ for table_name in self .database_dico :
651+ if (input_word_list [i ] == table_name ) or (input_word_list [i ] in self .database_object .get_table_by_name (table_name ).get_equivalences ()):
652+ if number_of_table_temp == 0 :
653+ start_phrase = input_word_list [:i ]
654+ number_of_table_temp += 1
655+ last_table_position_temp = i
656+
657+ columns = self .database_object .get_table_by_name (table_name ).get_columns ()
658+ for column in columns :
659+ if (input_word_list [i ] == column .get_name ()) or (input_word_list [i ] in column .get_equivalences ()):
660+ if number_of_where_column_temp == 0 :
661+ med_phrase = input_word_list [len (start_phrase ):last_table_position_temp + 1 ]
662+ number_of_where_column_temp += 1
663+ break
664+ else :
665+ if (number_of_table_temp != 0 ) and (number_of_where_column_temp == 0 ) and (i == (len (input_word_list ) - 1 )):
666+ med_phrase = input_word_list [len (start_phrase ):]
640667 else :
641- if ( number_of_table_temp != 0 ) and ( number_of_where_column_temp == 0 ) and ( i == ( len ( input_word_list ) - 1 )):
642- med_phrase = input_word_list [ len ( start_phrase ):]
668+ continue
669+ break
643670
644671 end_phrase = input_word_list [len (start_phrase ) + len (med_phrase ):]
672+
645673 irext = ' ' .join (end_phrase )
646674
647- ''' @todo set this part of the algorithm (detection of values of where) in the part of the phrases where parsing '''
675+ ''' @todo set this part of the algorithm (detection of values of where) in the WhereParser thread '''
648676
649677 if irext :
650678 irext = self .remove_accents (irext .lower ())
@@ -698,6 +726,8 @@ def parse_sentence(self, sentence, stopwordsFilter=None):
698726 # replace back <_> to spaces from the values assigned
699727 columns_of_values_of_where .append (str ("'" + str (irext_list [index ]).replace ('<_>' , ' ' ) + "'" ))
700728
729+ ''' ----------------------------------------------------------------------------------------------------------- '''
730+
701731 tables_of_from = []
702732 select_phrase = ''
703733 from_phrase = ''
@@ -819,9 +849,9 @@ def parse_sentence(self, sentence, stopwordsFilter=None):
819849 try :
820850 select_parser = SelectParser (columns_of_select , tables_of_from , select_phrase , self .count_keywords , self .sum_keywords , self .average_keywords , self .max_keywords , self .min_keywords , self .distinct_keywords , self .database_dico , self .database_object )
821851 from_parser = FromParser (tables_of_from , columns_of_select , columns_of_where , self .database_object )
822- where_parser = WhereParser (new_where_phrase , tables_of_from , columns_of_values_of_where , self .count_keywords , self .sum_keywords , self .average_keywords , self .max_keywords , self .min_keywords , self .greater_keywords , self .less_keywords , self .between_keywords , self .negation_keywords , self .junction_keywords , self .disjunction_keywords , self .like_keywords , self .distinct_keywords , self .database_dico )
823- group_by_parser = GroupByParser (group_by_phrase , tables_of_from , self .database_dico )
824- order_by_parser = OrderByParser (order_by_phrase , tables_of_from , self .asc_keywords , self .desc_keywords , self .database_dico )
852+ where_parser = WhereParser (new_where_phrase , tables_of_from , columns_of_values_of_where , self .count_keywords , self .sum_keywords , self .average_keywords , self .max_keywords , self .min_keywords , self .greater_keywords , self .less_keywords , self .between_keywords , self .negation_keywords , self .junction_keywords , self .disjunction_keywords , self .like_keywords , self .distinct_keywords , self .database_dico , self . database_object )
853+ group_by_parser = GroupByParser (group_by_phrase , tables_of_from , self .database_dico , self . database_object )
854+ order_by_parser = OrderByParser (order_by_phrase , tables_of_from , self .asc_keywords , self .desc_keywords , self .database_dico , self . database_object )
825855
826856 select_parser .start ()
827857 from_parser .start ()
0 commit comments