ProFatXuanAll · fdsa654hg · Nov 22, 2020 · Nov 26, 2020 · Nov 27, 2020 · Nov 29, 2020
diff --git a/s2s/dset/__init__.py b/s2s/dset/__init__.py
@@ -1,12 +1,15 @@
 from typing import Dict, Type, Union
 
 from s2s.dset._arith import ArithDset
+from s2s.dset._eng2chi import Eng2ChiDset
 from s2s.dset._base import BaseDset
 
 Dset = Union[
     ArithDset,
+    Eng2ChiDset,
 ]
 
 DSET_OPTS: Dict[str, Type[Dset]] = {
     ArithDset.dset_name: ArithDset,
+    Eng2ChiDset.dset_name: Eng2ChiDset
 }
diff --git a/s2s/dset/_eng2chi.py b/s2s/dset/_eng2chi.py
@@ -0,0 +1,33 @@
+import os
+
+from typing import Sequence
+
+import pandas as pd
+from sklearn.metrics import accuracy_score
+
+from s2s.dset._base import BaseDset
+from s2s.path import DATA_PATH
+
+class Eng2ChiDset(BaseDset):
+    dset_name = 'eng2chi'
+
+    def __init__(self):
+        super().__init__()
+        df = pd.read_csv(os.path.join(DATA_PATH, 'eng2chi.csv'))
+        self.src.extend(
+            df['src'].apply(str).apply(self.__class__.preprocess).to_list()
+        )
+        self.tgt.extend(
+            df['tgt'].apply(str).apply(self.__class__.preprocess).to_list()
+        )
+
+    @staticmethod
+    def eval(tgt: str, pred: str) -> float:
+        return float(tgt == pred)
+
+    @staticmethod
+    def batch_eval(
+            batch_tgt: Sequence[str],
+            batch_pred: Sequence[str],
+    ) -> float:
+        return accuracy_score(batch_tgt, batch_pred)