Skip to content

Commit 470a3a6

Browse files
1 parent c476822 commit 470a3a6

123 files changed

Lines changed: 343 additions & 326 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file records the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: f64218c7c9b9e542af182e17b985c5c6
3+
config: cbf5709ef81b50e205370d5d9942f63f
44
tags: 645f666f9bcd5a90fca523b33c5a78b7

_modules/index.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<head>
66
<meta charset="utf-8" />
77
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
8-
<title>Overview: module code &mdash; PyThaiNLP 8a188f2 documentation</title>
8+
<title>Overview: module code &mdash; PyThaiNLP f22c110 documentation</title>
99
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=03e43079" />
1010
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=9edc463e" />
1111
<link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -14,7 +14,7 @@
1414

1515
<script src="../_static/jquery.js?v=5d32c60e"></script>
1616
<script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
17-
<script src="../_static/documentation_options.js?v=214d532b"></script>
17+
<script src="../_static/documentation_options.js?v=472f9f16"></script>
1818
<script src="../_static/doctools.js?v=fd6eb6e6"></script>
1919
<script src="../_static/sphinx_highlight.js?v=6ffebe34"></script>
2020
<script src="../_static/clipboard.min.js?v=a7894cd8"></script>

_modules/pythainlp/ancient/aksonhan.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<head>
66
<meta charset="utf-8" />
77
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
8-
<title>pythainlp.ancient.aksonhan &mdash; PyThaiNLP 8a188f2 documentation</title>
8+
<title>pythainlp.ancient.aksonhan &mdash; PyThaiNLP f22c110 documentation</title>
99
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=03e43079" />
1010
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=9edc463e" />
1111
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
@@ -14,7 +14,7 @@
1414

1515
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
1616
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
17-
<script src="../../../_static/documentation_options.js?v=214d532b"></script>
17+
<script src="../../../_static/documentation_options.js?v=472f9f16"></script>
1818
<script src="../../../_static/doctools.js?v=fd6eb6e6"></script>
1919
<script src="../../../_static/sphinx_highlight.js?v=6ffebe34"></script>
2020
<script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>

_modules/pythainlp/ancient/currency.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<head>
66
<meta charset="utf-8" />
77
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
8-
<title>pythainlp.ancient.currency &mdash; PyThaiNLP 8a188f2 documentation</title>
8+
<title>pythainlp.ancient.currency &mdash; PyThaiNLP f22c110 documentation</title>
99
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=03e43079" />
1010
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=9edc463e" />
1111
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
@@ -14,7 +14,7 @@
1414

1515
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
1616
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
17-
<script src="../../../_static/documentation_options.js?v=214d532b"></script>
17+
<script src="../../../_static/documentation_options.js?v=472f9f16"></script>
1818
<script src="../../../_static/doctools.js?v=fd6eb6e6"></script>
1919
<script src="../../../_static/sphinx_highlight.js?v=6ffebe34"></script>
2020
<script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>

_modules/pythainlp/benchmarks/word_tokenization.html

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<head>
66
<meta charset="utf-8" />
77
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
8-
<title>pythainlp.benchmarks.word_tokenization &mdash; PyThaiNLP 8a188f2 documentation</title>
8+
<title>pythainlp.benchmarks.word_tokenization &mdash; PyThaiNLP f22c110 documentation</title>
99
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=03e43079" />
1010
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=9edc463e" />
1111
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
@@ -14,7 +14,7 @@
1414

1515
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
1616
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
17-
<script src="../../../_static/documentation_options.js?v=214d532b"></script>
17+
<script src="../../../_static/documentation_options.js?v=472f9f16"></script>
1818
<script src="../../../_static/doctools.js?v=fd6eb6e6"></script>
1919
<script src="../../../_static/sphinx_highlight.js?v=6ffebe34"></script>
2020
<script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>
@@ -116,7 +116,7 @@ <h1>Source code for pythainlp.benchmarks.word_tokenization</h1><div class="highl
116116

117117
<span class="kn">import</span><span class="w"> </span><span class="nn">re</span>
118118
<span class="kn">import</span><span class="w"> </span><span class="nn">sys</span>
119-
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">TYPE_CHECKING</span><span class="p">,</span> <span class="n">Any</span>
119+
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">TYPE_CHECKING</span><span class="p">,</span> <span class="n">Union</span>
120120

121121
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
122122
<span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
@@ -153,7 +153,7 @@ <h1>Source code for pythainlp.benchmarks.word_tokenization</h1><div class="highl
153153
<span class="k">return</span> <span class="mi">2</span> <span class="o">*</span> <span class="n">precision</span> <span class="o">*</span> <span class="n">recall</span> <span class="o">/</span> <span class="p">(</span><span class="n">precision</span> <span class="o">+</span> <span class="n">recall</span><span class="p">)</span>
154154

155155

156-
<span class="k">def</span><span class="w"> </span><span class="nf">_flatten_result</span><span class="p">(</span><span class="n">my_dict</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">sep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;:&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
156+
<span class="k">def</span><span class="w"> </span><span class="nf">_flatten_result</span><span class="p">(</span><span class="n">my_dict</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">sep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;:&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]:</span>
157157
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Flatten two-dimension dictionary.</span>
158158

159159
<span class="sd"> Use keys in the first dimension as a prefix for keys in the second dimension.</span>
@@ -167,7 +167,7 @@ <h1>Source code for pythainlp.benchmarks.word_tokenization</h1><div class="highl
167167
<span class="sd"> :param str sep: separator between the two keys (default: &quot;:&quot;)</span>
168168

169169
<span class="sd"> :return: a one-dimension dictionary with keys combined</span>
170-
<span class="sd"> :rtype: dict[str, Any]</span>
170+
<span class="sd"> :rtype: dict[str, Union[int, str]]</span>
171171
<span class="sd"> &quot;&quot;&quot;</span>
172172
<span class="k">return</span> <span class="p">{</span>
173173
<span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">k1</span><span class="si">}{</span><span class="n">sep</span><span class="si">}{</span><span class="n">k2</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">:</span> <span class="n">v</span>
@@ -252,7 +252,7 @@ <h1>Source code for pythainlp.benchmarks.word_tokenization</h1><div class="highl
252252

253253
<div class="viewcode-block" id="compute_stats">
254254
<a class="viewcode-back" href="../../../api/benchmarks.html#pythainlp.benchmarks.word_tokenization.compute_stats">[docs]</a>
255-
<span class="k">def</span><span class="w"> </span><span class="nf">compute_stats</span><span class="p">(</span><span class="n">ref_sample</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">raw_sample</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
255+
<span class="k">def</span><span class="w"> </span><span class="nf">compute_stats</span><span class="p">(</span><span class="n">ref_sample</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">raw_sample</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]]:</span>
256256
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Compute statistics for tokenization quality</span>
257257

258258
<span class="sd"> These statistics include:</span>
@@ -269,7 +269,7 @@ <h1>Source code for pythainlp.benchmarks.word_tokenization</h1><div class="highl
269269
<span class="sd"> :param str samples: samples that we want to evaluate</span>
270270

271271
<span class="sd"> :return: metrics at character- and word-level and indicators of correctly tokenized words</span>
272-
<span class="sd"> :rtype: dict[str, Any]</span>
272+
<span class="sd"> :rtype: dict[str, dict[str, Union[int, str]]]</span>
273273
<span class="sd"> &quot;&quot;&quot;</span>
274274
<span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
275275

@@ -285,11 +285,11 @@ <h1>Source code for pythainlp.benchmarks.word_tokenization</h1><div class="highl
285285
<span class="n">c_pos_pred</span> <span class="o">=</span> <span class="n">c_pos_pred</span><span class="p">[</span><span class="n">c_pos_pred</span> <span class="o">&lt;</span> <span class="n">ref_sample_arr</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span>
286286
<span class="n">c_neg_pred</span> <span class="o">=</span> <span class="n">c_neg_pred</span><span class="p">[</span><span class="n">c_neg_pred</span> <span class="o">&lt;</span> <span class="n">ref_sample_arr</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span>
287287

288-
<span class="n">c_tp</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">intp</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">ref_sample_arr</span><span class="p">[</span><span class="n">c_pos_pred</span><span class="p">]</span> <span class="o">==</span> <span class="mi">1</span><span class="p">)</span>
289-
<span class="n">c_fp</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">intp</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">ref_sample_arr</span><span class="p">[</span><span class="n">c_pos_pred</span><span class="p">]</span> <span class="o">==</span> <span class="mi">0</span><span class="p">)</span>
288+
<span class="n">c_tp</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">ref_sample_arr</span><span class="p">[</span><span class="n">c_pos_pred</span><span class="p">]</span> <span class="o">==</span> <span class="mi">1</span><span class="p">))</span>
289+
<span class="n">c_fp</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">ref_sample_arr</span><span class="p">[</span><span class="n">c_pos_pred</span><span class="p">]</span> <span class="o">==</span> <span class="mi">0</span><span class="p">))</span>
290290

291-
<span class="n">c_tn</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">intp</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">ref_sample_arr</span><span class="p">[</span><span class="n">c_neg_pred</span><span class="p">]</span> <span class="o">==</span> <span class="mi">0</span><span class="p">)</span>
292-
<span class="n">c_fn</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">intp</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">ref_sample_arr</span><span class="p">[</span><span class="n">c_neg_pred</span><span class="p">]</span> <span class="o">==</span> <span class="mi">1</span><span class="p">)</span>
291+
<span class="n">c_tn</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">ref_sample_arr</span><span class="p">[</span><span class="n">c_neg_pred</span><span class="p">]</span> <span class="o">==</span> <span class="mi">0</span><span class="p">))</span>
292+
<span class="n">c_fn</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">ref_sample_arr</span><span class="p">[</span><span class="n">c_neg_pred</span><span class="p">]</span> <span class="o">==</span> <span class="mi">1</span><span class="p">))</span>
293293

294294
<span class="c1"># Compute word-level statistics</span>
295295

@@ -302,7 +302,7 @@ <h1>Source code for pythainlp.benchmarks.word_tokenization</h1><div class="highl
302302
<span class="n">word_boundaries</span><span class="p">,</span> <span class="n">ss_boundaries</span>
303303
<span class="p">)</span>
304304

305-
<span class="n">correctly_tokenised_words</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">intp</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">tokenization_indicators</span><span class="p">)</span>
305+
<span class="n">correctly_tokenised_words</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">tokenization_indicators</span><span class="p">))</span>
306306

307307
<span class="n">tokenization_indicators_str</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">tokenization_indicators</span><span class="p">))</span>
308308

@@ -315,8 +315,8 @@ <h1>Source code for pythainlp.benchmarks.word_tokenization</h1><div class="highl
315315
<span class="p">},</span>
316316
<span class="s2">&quot;word_level&quot;</span><span class="p">:</span> <span class="p">{</span>
317317
<span class="s2">&quot;correctly_tokenised_words&quot;</span><span class="p">:</span> <span class="n">correctly_tokenised_words</span><span class="p">,</span>
318-
<span class="s2">&quot;total_words_in_sample&quot;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">sample_arr</span><span class="p">),</span>
319-
<span class="s2">&quot;total_words_in_ref_sample&quot;</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">ref_sample_arr</span><span class="p">),</span>
318+
<span class="s2">&quot;total_words_in_sample&quot;</span><span class="p">:</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">sample_arr</span><span class="p">)),</span>
319+
<span class="s2">&quot;total_words_in_ref_sample&quot;</span><span class="p">:</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">ref_sample_arr</span><span class="p">)),</span>
320320
<span class="p">},</span>
321321
<span class="s2">&quot;global&quot;</span><span class="p">:</span> <span class="p">{</span>
322322
<span class="s2">&quot;tokenisation_indicators&quot;</span><span class="p">:</span> <span class="s2">&quot;&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">tokenization_indicators_str</span><span class="p">)</span>

0 commit comments

Comments
 (0)