|
| 1 | +import argparse, json |
| 2 | +import matplotlib.pyplot as plt |
| 3 | +from tqdm import tqdm |
| 4 | +from conivel.datas.context import ( |
| 5 | + SameNounRetriever, |
| 6 | + BM25ContextRetriever, |
| 7 | + IdealNeuralContextRetriever, |
| 8 | +) |
| 9 | +from conivel.datas.dekker import DekkerDataset |
| 10 | +from conivel.utils import pretrained_bert_for_token_classification |
| 11 | +from conivel.train import train_ner_model |
| 12 | + |
| 13 | + |
| 14 | +parser = argparse.ArgumentParser() |
| 15 | +parser.add_argument("-i", "--input", type=str) |
| 16 | +parser.add_argument("-o", "--output", type=str, default=None) |
| 17 | +args = parser.parse_args() |
| 18 | + |
| 19 | + |
| 20 | +with open(args.input) as f: |
| 21 | + dists = json.load(f) |
| 22 | + |
| 23 | +sn_dists = dists["samenoun_dists"] |
| 24 | +bm25_dists = dists["bm25_dists"] |
| 25 | + |
| 26 | + |
| 27 | +plt.style.use("science") |
| 28 | +fig, axs = plt.subplots(1, 2) |
| 29 | +fig.set_size_inches(20, 4) |
| 30 | +axs[0].hist(sn_dists, bins=50) |
| 31 | +axs[0].set_xlabel("Distance of retrieved sentences (in sentences)", fontsize=20) |
| 32 | +axs[0].set_title("samenoun", fontsize=20) |
| 33 | +axs[1].hist(bm25_dists, bins=50) |
| 34 | +axs[1].set_xlabel("Distance of retrieved sentences (in sentences)", fontsize=20) |
| 35 | +axs[1].set_title("bm25", fontsize=20) |
| 36 | +if args.output: |
| 37 | + plt.savefig(args.output) |
| 38 | +else: |
| 39 | + plt.show() |
0 commit comments