@@ -76,6 +76,32 @@ def mentions_for_blocks(
7676 return blocks_mentions
7777
7878
79+ def quotes_for_blocks (
80+ block_bounds : BlockBounds , quotes : List [Quote ], speakers : List [Optional [Character ]]
81+ ) -> Tuple [List [List [Quote ]], List [List [Optional [Character ]]]]:
82+ """Return quotes and associated speaker for each block.
83+
84+ :param block_bounds: block bounds, in tokens
85+ :param mentions: a sorted list of mentions
86+
87+ :return: a list of quotes per block. This list has len
88+ ``len(block_bounds)``.
89+ """
90+ assert block_bounds [1 ] == "tokens"
91+
92+ block_quotes = [[] for _ in range (len (block_bounds [0 ]))]
93+ block_speakers = [[] for _ in range (len (block_bounds [0 ]))]
94+
95+ for quote , speaker in zip (quotes , speakers ):
96+ for block_i , (start_i , end_i ) in enumerate (block_bounds [0 ]):
97+ if quote .start >= start_i and quote .end < end_i :
98+ block_quotes [block_i ].append (quote )
99+ block_speakers [block_i ].append (speaker )
100+ break
101+
102+ return block_quotes , block_speakers
103+
104+
79105class CoOccurrencesGraphExtractor (PipelineStep ):
80106 """A simple character graph extractor using co-occurences"""
81107
@@ -449,10 +475,6 @@ def optional_needs(self) -> Set[str]:
449475class ConversationalGraphExtractor (PipelineStep ):
450476 """A graph extractor using conversation between characters or
451477 mentions.
452-
453- .. note::
454-
455- Does not support dynamic networks yet.
456478 """
457479
458480 def __init__ (
@@ -462,6 +484,9 @@ def __init__(
462484 Union [int , Tuple [int , Literal ["tokens" , "sentences" ]]]
463485 ] = None ,
464486 ignore_self_mention : bool = True ,
487+ dynamic : bool = False ,
488+ dynamic_window : Optional [int ] = None ,
489+ dynamic_overlap : int = 0 ,
465490 ):
466491 """
467492 :param graph_type: either 'conversation' or 'mention'.
@@ -470,11 +495,31 @@ def __init__(
470495 occurring between characters. 'mention' extracts a
471496 directed graph where interactions are character mentions
472497 of one another in quoted speech.
498+
473499 :param conversation_dist: must be supplied if `graph_type` is
474500 'conversation'. The distance between two quotation for
475501 them to be considered as being interacting.
502+
476503 :param ignore_self_mention: if ``True``, self mentions are
477- ignore for ``graph_type=='mention'``
504+ ignored for ``graph_type=='mention'``
505+
506+ :param dynamic:
507+
508+ - if ``False`` (the default), a static ``nx.graph`` is
509+ extracted
510+
511+ - if ``True``, several ``nx.graph`` are extracted. In
512+ that case, ``dynamic_window`` and
513+ ``dynamic_overlap``*can* be specified. If
514+ ``dynamic_window`` is not specified, this step is
515+ expecting the text to be cut into 'dynamic blocks',
516+ and a graph will be extracted for each block. In
517+ that case, ``dynamic_blocks`` must be passed to the
518+ pipeline as a ``List[str]`` at runtime.
519+
520+ :param dynamic_window: dynamic window, in number of quotes.
521+
522+ :param dynamic_overlap: overlap, in number of quotes.
478523 """
479524 self .graph_type = graph_type
480525
@@ -484,6 +529,10 @@ def __init__(
484529
485530 self .ignore_self_mention = ignore_self_mention
486531
532+ self .dynamic = dynamic
533+ self .dynamic_window = dynamic_window
534+ self .dynamic_overlap = dynamic_overlap
535+
487536 super ().__init__ ()
488537
489538 def _quotes_interact (
@@ -564,12 +613,12 @@ def _mention_extract(
564613 if speaker is None :
565614 continue
566615
567- # TODO: optim
568616 # find characters mentioned in quote and add a directed
569617 # edge speaker => character
570618 for character in characters :
571619 if character == speaker and self .ignore_self_mention :
572620 continue
621+ # TODO: optim
573622 for mention in character .mentions :
574623 if (
575624 mention .start_idx >= quote .start
@@ -582,22 +631,75 @@ def _mention_extract(
582631
583632 return G
584633
585- def __call__ (
634+ def _extract_static (
586635 self ,
587636 sentences : List [List [str ]],
588637 quotes : List [Quote ],
589638 speakers : List [Optional [Character ]],
590639 characters : Set [Character ],
591- ** kwargs ,
592- ) -> Dict [str , Any ]:
640+ ) -> nx .Graph :
593641 if self .graph_type == "conversation" :
594642 G = self ._conversation_extract (sentences , quotes , speakers , characters )
595643 elif self .graph_type == "mention" :
596644 G = self ._mention_extract (quotes , speakers , characters )
597645 else :
598646 raise ValueError (f"unknown graph_type: { self .graph_type } " )
647+ return G
599648
600- return {"character_network" : G }
649+ def _extract_dynamic (
650+ self ,
651+ sentences : List [List [str ]],
652+ quotes : List [Quote ],
653+ speakers : List [Optional [Character ]],
654+ characters : Set [Character ],
655+ dynamic_blocks : Optional [BlockBounds ] = None ,
656+ ) -> List [nx .Graph ]:
657+ assert self .dynamic_window is None or dynamic_blocks is None
658+
659+ if not self .dynamic_window is None :
660+ bounds = []
661+ for block_quotes in windowed (
662+ quotes ,
663+ self .dynamic_window ,
664+ step = self .dynamic_window - self .dynamic_overlap ,
665+ ):
666+ block_quotes = [q for q in block_quotes if not q is None ]
667+ bounds .append ((block_quotes [0 ].start , block_quotes [0 ].end ))
668+ dynamic_blocks = (bounds , "tokens" )
669+
670+ assert not dynamic_blocks is None
671+
672+ quotes_for_each_block , speakers_for_each_block = quotes_for_blocks (
673+ dynamic_blocks , quotes , speakers
674+ )
675+ return [
676+ self ._extract_static (sentences , block_quotes , block_speakers , characters )
677+ for block_quotes , block_speakers in zip (
678+ quotes_for_each_block , speakers_for_each_block
679+ )
680+ ]
681+
682+ def __call__ (
683+ self ,
684+ sentences : List [List [str ]],
685+ quotes : List [Quote ],
686+ speakers : List [Optional [Character ]],
687+ characters : Set [Character ],
688+ dynamic_blocks : Optional [BlockBounds ] = None ,
689+ ** kwargs ,
690+ ) -> Dict [str , Union [nx .Graph , List [nx .Graph ]]]:
691+ if self .dynamic :
692+ return {
693+ "character_network" : self ._extract_dynamic (
694+ sentences , quotes , speakers , characters , dynamic_blocks
695+ )
696+ }
697+ else :
698+ return {
699+ "character_network" : self ._extract_static (
700+ sentences , quotes , speakers , characters
701+ )
702+ }
601703
602704 def supported_langs (self ) -> Literal ["any" ]:
603705 return "any"
0 commit comments