2121@dataclass
2222class PipelineStepBuilder :
2323 cls : Type [PipelineStep ]
24+ description : str
2425 # { name => (type, default value) }
2526 kwargs : dict [str , tuple [Union [Type , str ], Any ]]
2627
@@ -31,26 +32,34 @@ def make_step(self, step_kwargs: dict[str, Any]) -> PipelineStep:
3132 return self .cls (** step_kwargs )
3233
3334
34- tokenization_step_builders = [PipelineStepBuilder (NLTKTokenizer , {})]
35+ tokenization_step_builders = [PipelineStepBuilder (NLTKTokenizer , "" , {})]
3536ner_step_builders = [
36- PipelineStepBuilder (NLTKNamedEntityRecognizer , {}),
37- PipelineStepBuilder (BertNamedEntityRecognizer , {}),
37+ PipelineStepBuilder (NLTKNamedEntityRecognizer , "Fast statistical NER model" , {}),
38+ PipelineStepBuilder (
39+ BertNamedEntityRecognizer , "BERT based deep learning NER model" , {}
40+ ),
41+ ]
42+ coref_step_builders = [
43+ PipelineStepBuilder (
44+ BertCoreferenceResolver , "BERT based deep learning coreference model" , {}
45+ )
3846]
39- coref_step_builders = [PipelineStepBuilder (BertCoreferenceResolver , {})]
4047character_unification_step_builders = [
4148 PipelineStepBuilder (
4249 GraphRulesCharacterUnifier ,
50+ "Rules based character unification algorithm" ,
4351 {
4452 "min_appearances" : ("uint" , 0 ),
4553 "link_corefs_mentions" : (bool , False ),
4654 "ignore_leading_determiner" : (bool , False ),
4755 },
4856 ),
49- PipelineStepBuilder (NaiveCharacterUnifier , {}),
57+ PipelineStepBuilder (NaiveCharacterUnifier , "Baseline naive character unifier" , {}),
5058]
5159graph_extraction_step_builder = [
5260 PipelineStepBuilder (
5361 CoOccurrencesGraphExtractor ,
62+ "" ,
5463 {"co_occurrences_dist" : ("uint" , 25 )},
5564 )
5665]
@@ -180,10 +189,10 @@ def set_kwargs(kwargs: dict, name: str, value: Any):
180189 if typ == str :
181190 tbox = gr .Textbox (label = name , value = default , interactive = True , key = key )
182191 tbox .change (set_kwargs , [kwargs , gr .State (name ), tbox ], [kwargs ])
183- if typ == int :
192+ elif typ == int :
184193 nb = gr .Number (label = name , value = float (default ), interactive = True , key = key )
185194 nb .change (set_kwargs , [kwargs , gr .State (name ), nb ], [kwargs ])
186- if typ == "uint" :
195+ elif typ == "uint" :
187196 uint = gr .Number (
188197 label = name , value = float (default ), minimum = 0 , interactive = True , key = key
189198 )
@@ -199,67 +208,75 @@ def set_kwargs(kwargs: dict, name: str, value: Any):
199208 with gr .Row ():
200209 # Inputs
201210 with gr .Column ():
202- with gr .Accordion ("Tokenization" , open = False ):
203- tok_kwargs = gr .State ({})
204- tok_ddown = gr .Dropdown (
205- [s .name () for s in tokenization_step_builders ],
206- value = tokenization_step_builders [0 ].name (),
207- label = "Tokenization step" ,
208- )
209- tok_ddown .change (lambda : {}, [], [tok_kwargs ])
210-
211- @gr .render (inputs = tok_ddown )
212- def render_tok_kwargs (tok_step : str ):
213- step_builder = select_step_builder (
214- tokenization_step_builders , tok_step
211+ with gr .Group ("Tokenization" ):
212+ gr .Markdown ("## Step 1: Tokenization" )
213+ with gr .Accordion ("Click to expand" , open = False ):
214+ tok_kwargs = gr .State ({})
215+ tok_ddown = gr .Dropdown (
216+ [s .name () for s in tokenization_step_builders ],
217+ value = tokenization_step_builders [0 ].name (),
218+ label = "Tokenization step" ,
219+ )
220+ tok_ddown .change (lambda : {}, [], [tok_kwargs ])
221+
222+ @gr .render (inputs = tok_ddown )
223+ def render_tok_kwargs (tok_step : str ):
224+ step_builder = select_step_builder (
225+ tokenization_step_builders , tok_step
226+ )
227+ render_kwargs_ (step_builder , tok_kwargs )
228+
229+ with gr .Group ("NER" ):
230+ gr .Markdown ("## Step 2: Named Entity Recognition" )
231+ with gr .Accordion ("Click to expand" , open = False ):
232+ ner_kwargs = gr .State ({})
233+ ner_ddown = gr .Dropdown (
234+ [s .name () for s in ner_step_builders ],
235+ value = ner_step_builders [0 ].name ,
236+ label = "NER step" ,
215237 )
216- render_kwargs_ (step_builder , tok_kwargs )
217-
218- with gr .Accordion ("NER" , open = False ):
219- ner_kwargs = gr .State ({})
220- ner_ddown = gr .Dropdown (
221- [s .name () for s in ner_step_builders ],
222- value = ner_step_builders [0 ].name ,
223- label = "NER step" ,
224- )
225- ner_ddown .change (lambda : {}, [], [ner_kwargs ])
226-
227- @gr .render (inputs = ner_ddown )
228- def render_ner_kwargs (ner_step : str ):
229- ner_builder = select_step_builder (ner_step_builders , ner_step )
230- render_kwargs_ (ner_builder , ner_kwargs )
231-
232- with gr .Accordion ("Character Unification" , open = False ):
233- cu_kwargs = gr .State ({})
234- cu_ddown = gr .Dropdown (
235- [s .name () for s in character_unification_step_builders ],
236- value = character_unification_step_builders [0 ].name (),
237- label = "Character unification step" ,
238- )
239- cu_ddown .change (lambda : {}, [], [cu_kwargs ])
240-
241- @gr .render (inputs = cu_ddown )
242- def render_cu_kwargs (cu_step : str ):
243- step_builder = select_step_builder (
244- character_unification_step_builders , cu_step
238+ ner_ddown .change (lambda : {}, [], [ner_kwargs ])
239+
240+ @gr .render (inputs = ner_ddown )
241+ def render_ner_kwargs (ner_step : str ):
242+ ner_builder = select_step_builder (ner_step_builders , ner_step )
243+ render_kwargs_ (ner_builder , ner_kwargs )
244+
245+ with gr .Group ("Character Unification" ):
246+ gr .Markdown ("## Step 3: Character Unification" )
247+ with gr .Accordion ("Click to expand" , open = False ):
248+ cu_kwargs = gr .State ({})
249+ cu_ddown = gr .Dropdown (
250+ [s .name () for s in character_unification_step_builders ],
251+ value = character_unification_step_builders [0 ].name (),
252+ label = "Character unification step" ,
245253 )
246- render_kwargs_ (step_builder , cu_kwargs )
247-
248- with gr .Accordion ("Graph Extraction" , open = False ):
249- ge_kwargs = gr .State ({})
250- ge_ddown = gr .Dropdown (
251- [s .name () for s in graph_extraction_step_builder ],
252- value = graph_extraction_step_builder [0 ].name (),
253- label = "Graph extraction step" ,
254- )
255- ge_ddown .change (lambda : {}, [], [ge_kwargs ])
256-
257- @gr .render (inputs = ge_ddown )
258- def render_ge_kwargs (ge_step : str ):
259- step_builder = select_step_builder (
260- graph_extraction_step_builder , ge_step
254+ cu_ddown .change (lambda : {}, [], [cu_kwargs ])
255+
256+ @gr .render (inputs = cu_ddown )
257+ def render_cu_kwargs (cu_step : str ):
258+ step_builder = select_step_builder (
259+ character_unification_step_builders , cu_step
260+ )
261+ render_kwargs_ (step_builder , cu_kwargs )
262+
263+ with gr .Group ("Graph Extraction" ):
264+ gr .Markdown ("## Step 4: Graph Extraction" )
265+ with gr .Accordion ("Click to expand" , open = False ):
266+ ge_kwargs = gr .State ({})
267+ ge_ddown = gr .Dropdown (
268+ [s .name () for s in graph_extraction_step_builder ],
269+ value = graph_extraction_step_builder [0 ].name (),
270+ label = "Graph extraction step" ,
261271 )
262- render_kwargs_ (step_builder , ge_kwargs )
272+ ge_ddown .change (lambda : {}, [], [ge_kwargs ])
273+
274+ @gr .render (inputs = ge_ddown )
275+ def render_ge_kwargs (ge_step : str ):
276+ step_builder = select_step_builder (
277+ graph_extraction_step_builder , ge_step
278+ )
279+ render_kwargs_ (step_builder , ge_kwargs )
263280
264281 # TODO: pipeline level parameter like 'lang'
265282 text = gr .TextArea ()
0 commit comments