Skip to content

Commit 9db6e1d

Browse files
committed
plot adjustments
1 parent 214f62f commit 9db6e1d

3 files changed

Lines changed: 37 additions & 20 deletions

File tree

plot_dekker_books_len.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,9 @@
1515

1616
dataset = DekkerDataset()
1717

18-
doc_tokens = []
19-
for document in tqdm(dataset.documents):
20-
tokens = flattened([sent.tokens for sent in document])
21-
tokens = tokenizer.tokenize(" ".join(tokens))
22-
doc_tokens.append(tokens)
23-
24-
2518
plt.style.use("science")
26-
plt.xlabel("Number of tokens")
27-
plt.hist([len(t) for t in doc_tokens])
19+
plt.xlabel("Number of sentences")
20+
plt.hist([len(t) for t in dataset.documents])
2821
if args.output:
2922
plt.savefig(args.output)
3023
else:

plot_dist.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,18 @@
2525

2626

2727
plt.style.use("science")
28-
fig, axs = plt.subplots(1, 2)
29-
fig.set_size_inches(20, 4)
28+
plt.rc("xtick", labelsize=30) # fontsize of the tick labels
29+
plt.rc("ytick", labelsize=30) # fontsize of the tick labels
30+
fig, axs = plt.subplots(1, 2, sharex=True, sharey=True, tight_layout=True)
31+
fig.set_size_inches(20, 6)
3032
axs[0].hist(sn_dists, bins=50)
31-
axs[0].set_xlabel("Distance of retrieved sentences (in sentences)", fontsize=20)
32-
axs[0].set_title("samenoun", fontsize=20)
33+
axs[0].set_title("samenoun", fontsize=30)
3334
axs[1].hist(bm25_dists, bins=50)
34-
axs[1].set_xlabel("Distance of retrieved sentences (in sentences)", fontsize=20)
35-
axs[1].set_title("bm25", fontsize=20)
35+
fig.supxlabel(
36+
"Distance of retrieved sentences (in sentences)",
37+
fontsize=30,
38+
)
39+
axs[1].set_title("bm25", fontsize=30)
3640
if args.output:
3741
plt.savefig(args.output)
3842
else:

plot_mean_test_f1.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,25 @@
99
parser.add_argument("-r", "--oracle", action="store_true")
1010
args = parser.parse_args()
1111

12+
# from https://matplotlib.org/stable/gallery/lines_bars_and_markers/linestyles.html
13+
linestyle_tuple = [
14+
("solid", "solid"),
15+
("dashed", "dashed"),
16+
("dashdot", "dashdot"),
17+
("densely dotted", (0, (1, 1))),
18+
("long dash with offset", (5, (10, 3))),
19+
("densely dashdotdotted", (0, (3, 1, 1, 1, 1, 1))),
20+
("densely dashdotted", (0, (3, 1, 1, 1))),
21+
("dashdotted", (0, (3, 5, 1, 5))),
22+
("densely dashed", (0, (5, 1))),
23+
("loosely dashed", (0, (5, 10))),
24+
("loosely dashdotted", (0, (3, 10, 1, 10))),
25+
("dashdotdotted", (0, (3, 5, 1, 5, 1, 5))),
26+
("loosely dashdotdotted", (0, (3, 10, 1, 10, 1, 10))),
27+
("dotted", "dotted"),
28+
("loosely dotted", (0, (1, 10))),
29+
]
30+
1231

1332
runs = ["random", "bm25", "samenoun", "left", "right", "neighbors"]
1433

@@ -17,29 +36,30 @@
1736

1837

1938
plt.style.use("science")
20-
# plt.rcParams.update({"xtick.labelsize": 18})
21-
# plt.rcParams.update({"ytick.labelsize": 18})
2239
plt.rc("xtick", labelsize=40) # fontsize of the tick labels
2340
plt.rc("ytick", labelsize=40) # fontsize of the tick labels
2441
fig, ax = plt.subplots()
2542

26-
fig.set_size_inches(16, 8)
43+
fig.set_size_inches(16, 12)
2744

28-
for run in runs:
45+
for run_i, run in enumerate(runs):
2946
if args.oracle:
3047
run = f"oracle_{run}"
3148
with open(f"./runs/short/{run}/metrics.json") as f:
3249
metrics = json.load(f)
3350
ax.plot(
3451
[int(step) for step in metrics["mean_test_f1"]["steps"]],
3552
metrics["mean_test_f1"]["values"],
53+
linestyle=linestyle_tuple[run_i][1],
54+
linewidth=4,
3655
)
3756

3857
# bare baseline
3958
ax.plot(
4059
[1, 6],
4160
[bare_metrics["mean_test_f1"]["values"][0]] * 2,
42-
linestyle="--",
61+
linestyle=linestyle_tuple[len(runs)][1],
62+
linewidth=4,
4363
)
4464

4565
ax.grid()

0 commit comments

Comments
 (0)