Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions s2s/dset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from typing import Dict, Type, Union

from s2s.dset._arith import ArithDset
from s2s.dset._eng2chi import Eng2ChiDset
from s2s.dset._base import BaseDset

Dset = Union[
ArithDset,
Eng2ChiDset,
]

DSET_OPTS: Dict[str, Type[Dset]] = {
ArithDset.dset_name: ArithDset,
Eng2ChiDset.dset_name: Eng2ChiDset
}
33 changes: 33 additions & 0 deletions s2s/dset/_eng2chi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os

from typing import Sequence

import pandas as pd
from sklearn.metrics import accuracy_score

from s2s.dset._base import BaseDset
from s2s.path import DATA_PATH

class Eng2ChiDset(BaseDset):
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add docstring for Eng2ChiDset class, including reference of the source of the dataset.

dset_name = 'eng2chi'

def __init__(self):
super().__init__()
df = pd.read_csv(os.path.join(DATA_PATH, 'eng2chi.csv'))
self.src.extend(
df['src'].apply(str).apply(self.__class__.preprocess).to_list()
)
self.tgt.extend(
df['tgt'].apply(str).apply(self.__class__.preprocess).to_list()
)

@staticmethod
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For translation, one should use evaluation metrics other than exact match (i.e., generate words must "be the same word" and "in the same order" of target sequence , which is used by accuracy_score).
Why is exact match bad?
Consider the example translation pair I like apple and 我喜歡蘋果, 蘋果我喜歡 should also be an acceptable answer.
Thus, one should use some evaluation metrics with fozzy match, i.e., it's okay to not be that accurate but at the same time have same meaning (swapping order, synonym, etc.).
Nowadays people mostly use BLEU score as evaluation metric on translation task.
Go find some python package which calculate BLEU score for you.

def eval(tgt: str, pred: str) -> float:
return float(tgt == pred)

@staticmethod
def batch_eval(
batch_tgt: Sequence[str],
batch_pred: Sequence[str],
) -> float:
return accuracy_score(batch_tgt, batch_pred)