-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathz_6c5f9b70fbd6dea2_jsonlines_py.html
More file actions
174 lines (174 loc) · 24.8 KB
/
z_6c5f9b70fbd6dea2_jsonlines_py.html
File metadata and controls
174 lines (174 loc) · 24.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>Coverage for muutils/jsonlines.py: 100%</title>
<link rel="icon" sizes="32x32" href="favicon_32_cb_c827f16f.png">
<link rel="stylesheet" href="style_cb_9ff733b0.css" type="text/css">
<script src="coverage_html_cb_dd2e7eb5.js" defer></script>
</head>
<body class="pyfile">
<header>
<div class="content">
<h1>
<span class="text">Coverage for </span><b>muutils / jsonlines.py</b>:
<span class="pc_cov">100%</span>
</h1>
<aside id="help_panel_wrapper">
<input id="help_panel_state" type="checkbox">
<label for="help_panel_state">
<img id="keyboard_icon" src="keybd_closed_cb_900cfef5.png" alt="Show/hide keyboard shortcuts">
</label>
<div id="help_panel">
<p class="legend">Shortcuts on this page</p>
<div class="keyhelp">
<p>
<kbd>r</kbd>
<kbd>m</kbd>
<kbd>x</kbd>
toggle line displays
</p>
<p>
<kbd>j</kbd>
<kbd>k</kbd>
next/prev highlighted chunk
</p>
<p>
<kbd>0</kbd> (zero) top of page
</p>
<p>
<kbd>1</kbd> (one) first highlighted chunk
</p>
<p>
<kbd>[</kbd>
<kbd>]</kbd>
prev/next file
</p>
<p>
<kbd>u</kbd> up to the index
</p>
<p>
<kbd>?</kbd> show/hide this help
</p>
</div>
</div>
</aside>
<h2>
<span class="text">32 statements </span>
<button type="button" class="run button_toggle_run" value="run" data-shortcut="r" title="Toggle lines run">32<span class="text"> run</span></button>
<button type="button" class="mis show_mis button_toggle_mis" value="mis" data-shortcut="m" title="Toggle lines missing">0<span class="text"> missing</span></button>
<button type="button" class="exc show_exc button_toggle_exc" value="exc" data-shortcut="x" title="Toggle lines excluded">0<span class="text"> excluded</span></button>
</h2>
<p class="text">
<a id="prevFileLink" class="nav" href="z_d2e926c6535458c2_util_py.html">« prev</a>
<a id="indexLink" class="nav" href="index.html">^ index</a>
<a id="nextFileLink" class="nav" href="z_6c5f9b70fbd6dea2_kappa_py.html">» next</a>
<a class="nav" href="https://coverage.readthedocs.io/en/7.13.5">coverage.py v7.13.5</a>,
created at 2026-03-18 21:32 -0600
</p>
<aside class="hidden">
<button type="button" class="button_next_chunk" data-shortcut="j"></button>
<button type="button" class="button_prev_chunk" data-shortcut="k"></button>
<button type="button" class="button_top_of_page" data-shortcut="0"></button>
<button type="button" class="button_first_chunk" data-shortcut="1"></button>
<button type="button" class="button_prev_file" data-shortcut="["></button>
<button type="button" class="button_next_file" data-shortcut="]"></button>
<button type="button" class="button_to_index" data-shortcut="u"></button>
<button type="button" class="button_show_hide_help" data-shortcut="?"></button>
</aside>
</div>
</header>
<main id="source">
<p class="pln"><span class="n"><a id="t1" href="#t1">1</a></span><span class="t"><span class="str">"utilities for reading and writing jsonlines files, including gzip support"</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t2" href="#t2">2</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t3" href="#t3">3</a></span><span class="t"><span class="key">from</span> <span class="nam">__future__</span> <span class="key">import</span> <span class="nam">annotations</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t4" href="#t4">4</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t5" href="#t5">5</a></span><span class="t"><span class="key">import</span> <span class="nam">gzip</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t6" href="#t6">6</a></span><span class="t"><span class="key">import</span> <span class="nam">json</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t7" href="#t7">7</a></span><span class="t"><span class="key">from</span> <span class="nam">typing</span> <span class="key">import</span> <span class="nam">Callable</span><span class="op">,</span> <span class="nam">Sequence</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t8" href="#t8">8</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t9" href="#t9">9</a></span><span class="t"><span class="key">from</span> <span class="nam">muutils</span><span class="op">.</span><span class="nam">json_serialize</span> <span class="key">import</span> <span class="nam">JSONitem</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t10" href="#t10">10</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t11" href="#t11">11</a></span><span class="t"><span class="nam">_GZIP_EXTENSIONS</span><span class="op">:</span> <span class="nam">tuple</span> <span class="op">=</span> <span class="op">(</span><span class="str">".gz"</span><span class="op">,</span> <span class="str">".gzip"</span><span class="op">)</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t12" href="#t12">12</a></span><span class="t"> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t13" href="#t13">13</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t14" href="#t14">14</a></span><span class="t"><span class="key">def</span> <span class="nam">_file_is_gzip</span><span class="op">(</span><span class="nam">path</span><span class="op">:</span> <span class="nam">str</span><span class="op">)</span> <span class="op">-></span> <span class="nam">bool</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t15" href="#t15">15</a></span><span class="t"> <span class="key">return</span> <span class="nam">any</span><span class="op">(</span><span class="nam">str</span><span class="op">(</span><span class="nam">path</span><span class="op">)</span><span class="op">.</span><span class="nam">endswith</span><span class="op">(</span><span class="nam">ext</span><span class="op">)</span> <span class="key">for</span> <span class="nam">ext</span> <span class="key">in</span> <span class="nam">_GZIP_EXTENSIONS</span><span class="op">)</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t16" href="#t16">16</a></span><span class="t"> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t17" href="#t17">17</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t18" href="#t18">18</a></span><span class="t"><span class="key">def</span> <span class="nam">_get_opener</span><span class="op">(</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t19" href="#t19">19</a></span><span class="t"> <span class="nam">path</span><span class="op">:</span> <span class="nam">str</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t20" href="#t20">20</a></span><span class="t"> <span class="nam">use_gzip</span><span class="op">:</span> <span class="nam">bool</span> <span class="op">|</span> <span class="key">None</span> <span class="op">=</span> <span class="key">None</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t21" href="#t21">21</a></span><span class="t"><span class="op">)</span> <span class="op">-></span> <span class="nam">Callable</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t22" href="#t22">22</a></span><span class="t"> <span class="key">if</span> <span class="nam">use_gzip</span> <span class="key">is</span> <span class="key">None</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t23" href="#t23">23</a></span><span class="t"> <span class="nam">use_gzip</span> <span class="op">=</span> <span class="nam">_file_is_gzip</span><span class="op">(</span><span class="nam">path</span><span class="op">)</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t24" href="#t24">24</a></span><span class="t"> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t25" href="#t25">25</a></span><span class="t"> <span class="com"># appears to be another mypy bug</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t26" href="#t26">26</a></span><span class="t"> <span class="com"># https://github.com/python/mypy/issues/10740</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t27" href="#t27">27</a></span><span class="t"> <span class="key">return</span> <span class="nam">open</span> <span class="key">if</span> <span class="key">not</span> <span class="nam">use_gzip</span> <span class="key">else</span> <span class="nam">gzip</span><span class="op">.</span><span class="nam">open</span> <span class="com"># type: ignore</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t28" href="#t28">28</a></span><span class="t"> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t29" href="#t29">29</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t30" href="#t30">30</a></span><span class="t"><span class="key">def</span> <span class="nam">jsonl_load</span><span class="op">(</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t31" href="#t31">31</a></span><span class="t"> <span class="nam">path</span><span class="op">:</span> <span class="nam">str</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t32" href="#t32">32</a></span><span class="t"> <span class="op">/</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t33" href="#t33">33</a></span><span class="t"> <span class="op">*</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t34" href="#t34">34</a></span><span class="t"> <span class="nam">use_gzip</span><span class="op">:</span> <span class="nam">bool</span> <span class="op">|</span> <span class="key">None</span> <span class="op">=</span> <span class="key">None</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t35" href="#t35">35</a></span><span class="t"><span class="op">)</span> <span class="op">-></span> <span class="nam">list</span><span class="op">[</span><span class="nam">JSONitem</span><span class="op">]</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t36" href="#t36">36</a></span><span class="t"> <span class="nam">opener</span><span class="op">:</span> <span class="nam">Callable</span> <span class="op">=</span> <span class="nam">_get_opener</span><span class="op">(</span><span class="nam">path</span><span class="op">,</span> <span class="nam">use_gzip</span><span class="op">)</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t37" href="#t37">37</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t38" href="#t38">38</a></span><span class="t"> <span class="nam">data</span><span class="op">:</span> <span class="nam">list</span><span class="op">[</span><span class="nam">JSONitem</span><span class="op">]</span> <span class="op">=</span> <span class="nam">list</span><span class="op">(</span><span class="op">)</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t39" href="#t39">39</a></span><span class="t"> <span class="key">with</span> <span class="nam">opener</span><span class="op">(</span><span class="nam">path</span><span class="op">,</span> <span class="str">"rt"</span><span class="op">,</span> <span class="nam">encoding</span><span class="op">=</span><span class="str">"UTF-8"</span><span class="op">)</span> <span class="key">as</span> <span class="nam">f</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t40" href="#t40">40</a></span><span class="t"> <span class="key">for</span> <span class="nam">line</span> <span class="key">in</span> <span class="nam">f</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t41" href="#t41">41</a></span><span class="t"> <span class="nam">data</span><span class="op">.</span><span class="nam">append</span><span class="op">(</span><span class="nam">json</span><span class="op">.</span><span class="nam">loads</span><span class="op">(</span><span class="nam">line</span><span class="op">)</span><span class="op">)</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t42" href="#t42">42</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t43" href="#t43">43</a></span><span class="t"> <span class="key">return</span> <span class="nam">data</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t44" href="#t44">44</a></span><span class="t"> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t45" href="#t45">45</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t46" href="#t46">46</a></span><span class="t"><span class="key">def</span> <span class="nam">jsonl_load_log</span><span class="op">(</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t47" href="#t47">47</a></span><span class="t"> <span class="nam">path</span><span class="op">:</span> <span class="nam">str</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t48" href="#t48">48</a></span><span class="t"> <span class="op">/</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t49" href="#t49">49</a></span><span class="t"> <span class="op">*</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t50" href="#t50">50</a></span><span class="t"> <span class="nam">use_gzip</span><span class="op">:</span> <span class="nam">bool</span> <span class="op">|</span> <span class="key">None</span> <span class="op">=</span> <span class="key">None</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t51" href="#t51">51</a></span><span class="t"><span class="op">)</span> <span class="op">-></span> <span class="nam">list</span><span class="op">[</span><span class="nam">dict</span><span class="op">]</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t52" href="#t52">52</a></span><span class="t"> <span class="nam">data</span><span class="op">:</span> <span class="nam">list</span><span class="op">[</span><span class="nam">JSONitem</span><span class="op">]</span> <span class="op">=</span> <span class="nam">jsonl_load</span><span class="op">(</span><span class="nam">path</span><span class="op">,</span> <span class="nam">use_gzip</span><span class="op">=</span><span class="nam">use_gzip</span><span class="op">)</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t53" href="#t53">53</a></span><span class="t"> <span class="key">for</span> <span class="nam">idx</span><span class="op">,</span> <span class="nam">item</span> <span class="key">in</span> <span class="nam">enumerate</span><span class="op">(</span><span class="nam">data</span><span class="op">)</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t54" href="#t54">54</a></span><span class="t"> <span class="key">assert</span> <span class="nam">isinstance</span><span class="op">(</span><span class="nam">item</span><span class="op">,</span> <span class="nam">dict</span><span class="op">)</span><span class="op">,</span> <span class="op">(</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t55" href="#t55">55</a></span><span class="t"> <span class="fst">f"</span><span class="fst">item </span><span class="op">{</span><span class="nam">idx</span> <span class="op">=</span> <span class="op">}</span><span class="fst"> from file </span><span class="op">{</span><span class="nam">path</span><span class="op">}</span><span class="fst"> is not a dict: </span><span class="op">{</span><span class="nam">type</span><span class="op">(</span><span class="nam">item</span><span class="op">)</span> <span class="op">=</span> <span class="op">}</span><span class="fst">\t</span><span class="op">{</span><span class="nam">item</span> <span class="op">=</span> <span class="op">}</span><span class="fst">"</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t56" href="#t56">56</a></span><span class="t"> <span class="op">)</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t57" href="#t57">57</a></span><span class="t"> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t58" href="#t58">58</a></span><span class="t"> <span class="com"># mypy complains that we are returning a list[JSONitem] but the function signature says list[dict]</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t59" href="#t59">59</a></span><span class="t"> <span class="com"># it can't figure out that we are asserting that all items are dicts</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t60" href="#t60">60</a></span><span class="t"> <span class="key">return</span> <span class="nam">data</span> <span class="com"># type: ignore</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t61" href="#t61">61</a></span><span class="t"> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t62" href="#t62">62</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t63" href="#t63">63</a></span><span class="t"><span class="key">def</span> <span class="nam">jsonl_write</span><span class="op">(</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t64" href="#t64">64</a></span><span class="t"> <span class="nam">path</span><span class="op">:</span> <span class="nam">str</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t65" href="#t65">65</a></span><span class="t"> <span class="nam">items</span><span class="op">:</span> <span class="nam">Sequence</span><span class="op">[</span><span class="nam">JSONitem</span><span class="op">]</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t66" href="#t66">66</a></span><span class="t"> <span class="nam">use_gzip</span><span class="op">:</span> <span class="nam">bool</span> <span class="op">|</span> <span class="key">None</span> <span class="op">=</span> <span class="key">None</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t67" href="#t67">67</a></span><span class="t"> <span class="nam">gzip_compresslevel</span><span class="op">:</span> <span class="nam">int</span> <span class="op">=</span> <span class="num">2</span><span class="op">,</span> </span><span class="r"></span></p>
<p class="run run2"><span class="n"><a id="t68" href="#t68">68</a></span><span class="t"><span class="op">)</span> <span class="op">-></span> <span class="key">None</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t69" href="#t69">69</a></span><span class="t"> <span class="nam">opener</span><span class="op">:</span> <span class="nam">Callable</span> <span class="op">=</span> <span class="nam">_get_opener</span><span class="op">(</span><span class="nam">path</span><span class="op">,</span> <span class="nam">use_gzip</span><span class="op">)</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t70" href="#t70">70</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t71" href="#t71">71</a></span><span class="t"> <span class="nam">opener_kwargs</span><span class="op">:</span> <span class="nam">dict</span> <span class="op">=</span> <span class="nam">dict</span><span class="op">(</span><span class="op">)</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t72" href="#t72">72</a></span><span class="t"> <span class="key">if</span> <span class="nam">use_gzip</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t73" href="#t73">73</a></span><span class="t"> <span class="nam">opener_kwargs</span> <span class="op">=</span> <span class="nam">dict</span><span class="op">(</span><span class="nam">compresslevel</span><span class="op">=</span><span class="nam">gzip_compresslevel</span><span class="op">)</span> </span><span class="r"></span></p>
<p class="pln"><span class="n"><a id="t74" href="#t74">74</a></span><span class="t"> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t75" href="#t75">75</a></span><span class="t"> <span class="key">with</span> <span class="nam">opener</span><span class="op">(</span><span class="nam">path</span><span class="op">,</span> <span class="str">"wt"</span><span class="op">,</span> <span class="nam">encoding</span><span class="op">=</span><span class="str">"UTF-8"</span><span class="op">,</span> <span class="op">**</span><span class="nam">opener_kwargs</span><span class="op">)</span> <span class="key">as</span> <span class="nam">f</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t76" href="#t76">76</a></span><span class="t"> <span class="key">for</span> <span class="nam">item</span> <span class="key">in</span> <span class="nam">items</span><span class="op">:</span> </span><span class="r"></span></p>
<p class="run"><span class="n"><a id="t77" href="#t77">77</a></span><span class="t"> <span class="nam">f</span><span class="op">.</span><span class="nam">write</span><span class="op">(</span><span class="nam">json</span><span class="op">.</span><span class="nam">dumps</span><span class="op">(</span><span class="nam">item</span><span class="op">)</span> <span class="op">+</span> <span class="str">"\n"</span><span class="op">)</span> </span><span class="r"></span></p>
</main>
<footer>
<div class="content">
<p>
<a class="nav" href="z_d2e926c6535458c2_util_py.html">« prev</a>
<a class="nav" href="index.html">^ index</a>
<a class="nav" href="z_6c5f9b70fbd6dea2_kappa_py.html">» next</a>
<a class="nav" href="https://coverage.readthedocs.io/en/7.13.5">coverage.py v7.13.5</a>,
created at 2026-03-18 21:32 -0600
</p>
</div>
</footer>
</body>
</html>