CompNet
diff --git a/‎docs/pipeline.rst‎
Lines changed: 41 additions & 16 deletions b/‎docs/pipeline.rst‎
Lines changed: 41 additions & 16 deletions
diff --git a/‎renard/pipeline/core.py‎
Lines changed: 5 additions & 1 deletion b/‎renard/pipeline/core.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎renard/pipeline/graph_extraction.py‎
Lines changed: 68 additions & 18 deletions b/‎renard/pipeline/graph_extraction.py‎
Lines changed: 68 additions & 18 deletions
@@ -75,17 +75,22 @@ For simplicity, one can use one of the preconfigured pipelines:
 
 .. code-block:: python
 
-   from renard.pipeline.preconfigured import bert_pipeline
+   from renard.pipeline.preconfigured import co_occurence_pipeline
 
    with open("./my_doc.txt") as f:
        text = f.read()
 
-   pipeline = bert_pipeline(
-       graph_extractor_kwargs={"co_occurrences_dist": (1, "sentences")}
-   )
+   pipeline = co_occurrence_pipeline()
    out = pipeline(text)
 
 
+The following preconfigured pipelines are available:
+
+- :func:`.co_occurrence_pipeline`
+- :func:`.conversational_pipeline`
+- :func:`.relational_pipeline`
+
+
 Pipeline Output: the Pipeline State
 ===================================
 
@@ -137,7 +142,7 @@ Tokenization
 Tokenization is the task of cutting text in *tokens*. It is usually
 the first task to apply to a text. 2 tokenizer are available:
 
-- :class:`.NLTKTokenizer`
+- :class:`.NLTKTokenizer` is the tokenizer from NLTK.
 - :class:`.StanfordCoreNLPPipeline` does contain a tokenizer as part
   of its full NLP pipeline.
 
@@ -148,16 +153,19 @@ Named Entity Recognition
 Named entity recognition (NER) detects entities occurences in the
 text. 3 modules are available:
 
-- :class:`.NLTKNamedEntityRecognizer`
-- :class:`.BertNamedEntityRecognizer`
+- :class:`.NLTKNamedEntityRecognizer` is a lightweight NER module from
+  NLTK, based on POS tagging and rules.
+- :class:`.BertNamedEntityRecognizer` is a NER module employing a
+  finetuned BERT model.
 - :class:`.StanfordCoreNLPPipeline` contains a NER model as part of
   its full NLP pipeline.
 
 
 Coreference Resolution
 ----------------------
 
-- :class:`.SpacyCorefereeCoreferenceResolver`
+- :class:`.SpacyCorefereeCoreferenceResolver` uses the spacy coreferee
+  module.
 - :class:`.BertCoreferenceResolver`, using the Tibert library.
 - :class:`.StanfordCoreNLPPipeline` can execute a coreference
   resolution model as part of its pipeline.
@@ -166,14 +174,14 @@ Coreference Resolution
 Quote Detection
 ---------------
 
-- :class:`.QuoteDetector`
+- :class:`.QuoteDetector` detect quotes using simple logic.
 
 
 Sentiment Analysis
 ------------------
 
 - :class:`.NLTKSentimentAnalyzer` leverages NLTK's Vader for sentiment
-  analysis
+  analysis.
 
 
 Characters Extraction
@@ -183,21 +191,36 @@ Characters extraction (or alias resolution) extract characters from
 occurences detected using NER. This is done by assigning each mention
 to a unique character.
 
-- :class:`.NaiveCharacterUnifier`
-- :class:`.GraphRulesCharacterUnifier`
+- :class:`.NaiveCharacterUnifier` assigns each mention with a unique
+  form to a character.
+- :class:`.GraphRulesCharacterUnifier` uses a set of rules to assign
+  each mention to a character.
+
+
+Relation Extraction
+-------------------
+
+- :class:`.T5RelationExtractor` extracts relations between characters
+  using a finetuned T5 model.
 
 
 Speaker Attribution
 -------------------
 
-- :class:`.BertSpeakerDetector`
+- :class:`.BertSpeakerDetector` detects speaker using a finetuned BERT
+  model.
 
 
 Graph Extraction
 ----------------
 
-- :class:`.CoOccurrencesGraphExtractor`
-- :class:`.ConversationalGraphExtractor`
+- :class:`.CoOccurrencesGraphExtractor` extracts a graph of
+  co-occurrence between characters.
+- :class:`.ConversationalGraphExtractor` extracts a conversational
+  graph: either conversation between characters, or of character
+  mentions.
+- :class:`.RelationalGraphExtractor` extracts a relational graph,
+  where the relation between each character is typed.
 
 
 Dynamic Graphs
@@ -241,7 +264,9 @@ When executing the above block of code, the output attribute
 [<networkx.classes.graph.Graph object at 0x7fd9e9115900>]
 
 See :class:`.CoOccurrencesGraphExtractor` for more details on the
-usage of the ``dynamic`` and ``dynamic_window`` arguments.
+usage of the ``dynamic`` and ``dynamic_window`` arguments. Note that,
+currently, only the co-occurrence graph extractor supports dynamic
+networks.
 
 Plot and export functions work as one would expect
 intuitively. :meth:`.PipelineState.plot_graph` allow to visualize the
 
@@ -35,6 +35,7 @@
     from renard.pipeline.character_unification import Character
     from renard.pipeline.ner import NEREntity
     from renard.pipeline.quote_detection import Quote
+    from renard.pipeline.relation_extraction import Relation
     import matplotlib.pyplot as plt
 
 
@@ -175,7 +176,10 @@ class PipelineState:
     speakers: Optional[List[Optional[Character]]] = None
 
     #: polarity of each sentence
-    sentences_polarities: Optional[List[float]] = None
+    sentence_polarities: Optional[List[float]] = None
+
+    #: relations detected in each sentence
+    sentence_relations: Optional[List[List[Relation]]] = None
 
     #: NER entities
     entities: Optional[List[NEREntity]] = None
 
@@ -1,5 +1,6 @@
 from typing import Dict, Any, List, Set, Optional, Tuple, Literal, Union
 import itertools as it
+from collections import defaultdict
 import operator
 
 import networkx as nx
@@ -11,6 +12,7 @@
 from renard.pipeline.core import PipelineStep
 from renard.pipeline.character_unification import Character
 from renard.pipeline.quote_detection import Quote
+from renard.pipeline.relation_extraction import Relation
 
 
 def sent_index_for_token_index(token_index: int, sentences: List[List[str]]) -> int:
@@ -147,7 +149,7 @@ def __call__(
         sentences: List[List[str]],
         char2token: Optional[List[int]] = None,
         dynamic_blocks: Optional[BlockBounds] = None,
-        sentences_polarities: Optional[List[float]] = None,
+        sentence_polarities: Optional[List[float]] = None,
         entities: Optional[List[NEREntity]] = None,
         co_occurrences_blocks: Optional[BlockBounds] = None,
         **kwargs,
@@ -194,13 +196,13 @@ def __call__(
                     self.dynamic_overlap,
                     dynamic_blocks,
                     sentences,
-                    sentences_polarities,
+                    sentence_polarities,
                     co_occurrences_blocks,
                 )
             }
         return {
             "character_network": self._extract_graph(
-                mentions, sentences, sentences_polarities, co_occurrences_blocks
+                mentions, sentences, sentence_polarities, co_occurrences_blocks
             )
         }
 
@@ -257,24 +259,24 @@ def _extract_graph(
         self,
         mentions: List[Tuple[Any, NEREntity]],
         sentences: List[List[str]],
-        sentences_polarities: Optional[List[float]],
+        sentence_polarities: Optional[List[float]],
         co_occurrences_blocks: Optional[BlockBounds],
     ) -> nx.Graph:
         """
         :param mentions: A list of entity mentions, ordered by
             appearance, each of the form (KEY MENTION).  KEY
             determines the unicity of the entity.
-        :param sentences: if specified, ``sentences_polarities`` must
+        :param sentences: if specified, ``sentence_polarities`` must
             be specified as well.
-        :param sentences_polarities: if specified, ``sentences`` must
+        :param sentence_polarities: if specified, ``sentences`` must
             be specified as well.  In that case, edges are annotated
             with the ``'polarity`` attribute, indicating the polarity
             of the relationship between two characters.  Polarity
             between two interactions is computed as the strongest
             sentence polarity between those two mentions.
         :param co_occurrences_blocks: only unit 'tokens' is accepted.
         """
-        compute_polarity = not sentences_polarities is None
+        compute_polarity = not sentence_polarities is None
 
         assert co_occurrences_blocks is None or co_occurrences_blocks[1] == "tokens"
         if co_occurrences_blocks is None:
@@ -324,15 +326,15 @@ def _extract_graph(
 
                 if compute_polarity:
                     assert not sentences is None
-                    assert not sentences_polarities is None
+                    assert not sentence_polarities is None
                     # TODO: optim
                     first_sent_idx = sent_index_for_token_index(
                         mention1.start_idx, sentences
                     )
                     last_sent_idx = sent_index_for_token_index(
                         mention2.start_idx, sentences
                     )
-                    sents_polarities_between_mentions = sentences_polarities[
+                    sents_polarities_between_mentions = sentence_polarities[
                         first_sent_idx : last_sent_idx + 1
                     ]
                     polarity = max(sents_polarities_between_mentions, key=abs)
@@ -349,7 +351,7 @@ def _extract_dynamic_graph(
         overlap: int,
         dynamic_blocks: Optional[BlockBounds],
         sentences: List[List[str]],
-        sentences_polarities: Optional[List[float]],
+        sentence_polarities: Optional[List[float]],
         co_occurrences_blocks: Optional[BlockBounds],
     ) -> List[nx.Graph]:
         """
@@ -367,14 +369,14 @@ def _extract_dynamic_graph(
         """
         assert co_occurrences_blocks is None or co_occurrences_blocks[1] == "tokens"
         assert window is None or dynamic_blocks is None
-        compute_polarity = not sentences is None and not sentences_polarities is None
+        compute_polarity = not sentences is None and not sentence_polarities is None
 
         if not window is None:
             return [
                 self._extract_graph(
                     [elt for elt in ct if not elt is None],
                     sentences,
-                    sentences_polarities,
+                    sentence_polarities,
                     co_occurrences_blocks,
                 )
                 for ct in windowed(mentions, window, step=window - overlap)
@@ -391,10 +393,10 @@ def _extract_dynamic_graph(
             sent_start, sent_end = sent_indices_for_block(dynamic_block, sentences)
             block_sentences = sentences[sent_start : sent_end + 1]
 
-            block_sentences_polarities = None
+            block_sentence_polarities = None
             if compute_polarity:
-                assert not sentences_polarities is None
-                block_sentences_polarities = sentences_polarities[
+                assert not sentence_polarities is None
+                block_sentence_polarities = sentence_polarities[
                     sent_start : sent_end + 1
                 ]
 
@@ -412,7 +414,7 @@ def _extract_dynamic_graph(
                 self._extract_graph(
                     block_mentions,
                     block_sentences,
-                    block_sentences_polarities,
+                    block_sentence_polarities,
                     block_co_occ_bounds,
                 )
             )
@@ -441,7 +443,7 @@ def production(self) -> Set[str]:
         return {"character_network"}
 
     def optional_needs(self) -> Set[str]:
-        return {"sentences_polarities"}
+        return {"sentence_polarities"}
 
 
 class ConversationalGraphExtractor(PipelineStep):
@@ -588,7 +590,6 @@ def __call__(
         characters: Set[Character],
         **kwargs,
     ) -> Dict[str, Any]:
-
         if self.graph_type == "conversation":
             G = self._conversation_extract(sentences, quotes, speakers, characters)
         elif self.graph_type == "mention":
@@ -608,3 +609,52 @@ def needs(self) -> Set[str]:
     def production(self) -> Set[str]:
         """character_network"""
         return {"character_network"}
+
+
+class RelationalGraphExtractor(PipelineStep):
+    """A graph extractor using relations between characters.
+
+    .. note::
+
+        Does not support dynamic networks yet.
+    """
+
+    def __init__(self, min_rel_occurrences: int = 1):
+        self.min_rel_occurrences = min_rel_occurrences
+
+    def __call__(
+        self,
+        characters: list[Character],
+        sentence_relations: list[list[Relation]],
+        **kwargs,
+    ) -> dict[str, Any]:
+        G = nx.Graph()
+        for character in characters:
+            G.add_node(character)
+
+        # { (char1, char2) => { relation: counter } }
+        edge_relations = defaultdict(dict)
+        for relations in sentence_relations:
+            for subj, rel, obj in relations:
+                counter = edge_relations[(subj, obj)].get(rel, 0)
+                edge_relations[(subj, obj)][rel] = counter + 1
+
+        for (char1, char2), counter in edge_relations.items():
+            relations = {
+                rel
+                for rel, count in counter.items()
+                if count >= self.min_rel_occurrences
+            }
+            if len(relations) > 0:
+                G.add_edge(char1, char2, relations=relations)
+
+        return {"character_network": G}
+
+    def supported_langs(self) -> Literal["any"]:
+        return "any"
+
+    def needs(self) -> set[str]:
+        return {"characters", "sentence_relations"}
+
+    def production(self) -> set[str]:
+        return {"character_network"}