Skip to content

Commit a296b1a

Browse files
authored
MediaSum Corpus + pkg_resources Issue Fix (#332)
1 parent b243967 commit a296b1a

5 files changed

Lines changed: 21 additions & 43 deletions

File tree

convokit/coordination/coordination.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import Callable, Tuple, List, Dict, Optional, Collection, Union
33
import copy
44

5-
import pkg_resources
5+
from importlib import resources
66

77
from convokit.model import Corpus, Speaker, Utterance
88
from convokit.transformer import Transformer
@@ -376,9 +376,7 @@ def score_report(self, corpus: Corpus, scores: CoordinationScore):
376376

377377
# helper functions
378378
def _compute_liwc_reverse_dict(self) -> None:
379-
with open(
380-
pkg_resources.resource_filename("convokit", "data/coord-liwc-patterns.txt"), "r"
381-
) as f:
379+
with open(resources.files("convokit").joinpath("data/coord-liwc-patterns.txt"), "r") as f:
382380
all_words = []
383381
for line in f:
384382
cat, pat = line.strip().split("\t")

convokit/politeness_collections/politeness_api/features/politeness_strategies.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
import pkg_resources
2-
import os
1+
from importlib import resources
32

43
#####
54
# Word lists
@@ -107,17 +106,14 @@
107106
]
108107

109108
# Positive and negative words from Liu
110-
pos_filename = pkg_resources.resource_filename(
111-
"convokit", os.path.join("data", "liu-positive-words.txt")
112-
)
113-
neg_filename = pkg_resources.resource_filename(
114-
"convokit", os.path.join("data", "liu-negative-words.txt")
115-
)
109+
data_root = resources.files("convokit").joinpath("data")
110+
pos_filename = data_root.joinpath("liu-positive-words.txt")
111+
neg_filename = data_root.joinpath("liu-negative-words.txt")
116112

117113

118-
positive_words = set(map(lambda x: x.strip(), open(pos_filename).read().splitlines()))
114+
positive_words = set(map(lambda x: x.strip(), pos_filename.read_text().splitlines()))
119115
negative_words = set(
120-
map(lambda x: x.strip(), open(neg_filename, encoding="ISO-8859-1").read().splitlines())
116+
map(lambda x: x.strip(), neg_filename.read_text(encoding="ISO-8859-1").splitlines())
121117
)
122118

123119
#####

convokit/politeness_collections/politeness_cscw_zh/strategy_extractor.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import json
2-
import os
32
import re
43
from itertools import chain
5-
import pkg_resources
4+
from importlib import resources
65
from collections import defaultdict
76
from typing import Dict, List, Tuple
87

@@ -14,17 +13,10 @@
1413

1514
LEXICON_DIR = "politeness_collections/politeness_cscw_zh/lexicons"
1615

17-
ngram_path = pkg_resources.resource_filename(
18-
"convokit", os.path.join(LEXICON_DIR, "ngram_markers.json")
19-
)
20-
21-
starter_path = pkg_resources.resource_filename(
22-
"convokit", os.path.join(LEXICON_DIR, "starter_markers.json")
23-
)
24-
25-
non_starter_path = pkg_resources.resource_filename(
26-
"convokit", os.path.join(LEXICON_DIR, "non_starter_markers.json")
27-
)
16+
lexicon_root = resources.files("convokit").joinpath(LEXICON_DIR)
17+
ngram_path = lexicon_root.joinpath("ngram_markers.json")
18+
starter_path = lexicon_root.joinpath("starter_markers.json")
19+
non_starter_path = lexicon_root.joinpath("non_starter_markers.json")
2820

2921

3022
PLEASE_PATTERN = re.compile(r"([烦劳还]?\s?请)|([烦劳]您)")

convokit/politeness_collections/politeness_local/strategy_extractor.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import json
2-
import os
32
import re
43
from itertools import chain
5-
import pkg_resources
4+
from importlib import resources
65
from collections import defaultdict
76
from typing import Dict, List, Tuple, Set, Optional
87

@@ -14,17 +13,10 @@
1413

1514
LEXICON_DIR = "politeness_collections/politeness_local/lexicons"
1615

17-
ngram_path = pkg_resources.resource_filename(
18-
"convokit", os.path.join(LEXICON_DIR, "ngram_markers.json")
19-
)
20-
21-
starter_path = pkg_resources.resource_filename(
22-
"convokit", os.path.join(LEXICON_DIR, "starter_markers.json")
23-
)
24-
25-
non_starter_path = pkg_resources.resource_filename(
26-
"convokit", os.path.join(LEXICON_DIR, "non_starter_markers.json")
27-
)
16+
lexicon_root = resources.files("convokit").joinpath(LEXICON_DIR)
17+
ngram_path = lexicon_root.joinpath("ngram_markers.json")
18+
starter_path = lexicon_root.joinpath("starter_markers.json")
19+
non_starter_path = lexicon_root.joinpath("non_starter_markers.json")
2820

2921
# strategy functions
3022

docs/source/mediasum.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ To download directly with ConvoKit:
9292
For some quick stats:
9393

9494
>>> corpus.print_summary_stats()
95-
Number of Speakers: 700
96-
Number of Utterances: 67373
97-
Number of Conversations: 3107
95+
Number of Speakers: 718483
96+
Number of Utterances: 13919244
97+
Number of Conversations: 463596
9898

9999
Get all paraphrase pairs from the corpus metadata
100100

0 commit comments

Comments
 (0)