Skip to content

Commit d2a80ee

Browse files
authored
Merge pull request #267 from oree-xx/data
Move data_to_csv function to shared.py
2 parents dae9e83 + 39e692e commit d2a80ee

File tree

4 files changed

+22
-44
lines changed

4 files changed

+22
-44
lines changed

scripts/2-process/gcs_process.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
"""
55
# Standard library
66
import argparse
7-
import csv
87
import os
98
import sys
109
import textwrap
@@ -82,16 +81,6 @@ def parse_arguments():
8281
return args
8382

8483

85-
def data_to_csv(args, data, file_path):
86-
if not args.enable_save:
87-
return
88-
os.makedirs(PATHS["data_phase"], exist_ok=True)
89-
# emulate csv.unix_dialect
90-
data.to_csv(
91-
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
92-
)
93-
94-
9584
def process_product_totals(args, count_data):
9685
"""
9786
Processing count data: totals by product
@@ -131,7 +120,7 @@ def process_product_totals(args, count_data):
131120
data.items(), columns=["CC legal tool product", "Count"]
132121
)
133122
file_path = shared.path_join(PATHS["data_phase"], "gcs_product_totals.csv")
134-
data_to_csv(args, data, file_path)
123+
shared.data_to_csv(args, data, file_path)
135124

136125

137126
def process_latest_prior_retired_totals(args, count_data):
@@ -212,7 +201,7 @@ def process_latest_prior_retired_totals(args, count_data):
212201
file_path = shared.path_join(
213202
PATHS["data_phase"], f"gcs_status_{key}_totals.csv"
214203
)
215-
data_to_csv(args, dataframe, file_path)
204+
shared.data_to_csv(args, dataframe, file_path)
216205

217206

218207
def process_totals_by_free_cultural(args, count_data):
@@ -245,7 +234,7 @@ def process_totals_by_free_cultural(args, count_data):
245234
file_path = shared.path_join(
246235
PATHS["data_phase"], "gcs_totals_by_free_cultural.csv"
247236
)
248-
data_to_csv(args, data, file_path)
237+
shared.data_to_csv(args, data, file_path)
249238

250239

251240
def process_totals_by_restrictions(args, count_data):
@@ -279,7 +268,7 @@ def process_totals_by_restrictions(args, count_data):
279268
file_path = shared.path_join(
280269
PATHS["data_phase"], "gcs_totals_by_restrictions.csv"
281270
)
282-
data_to_csv(args, data, file_path)
271+
shared.data_to_csv(args, data, file_path)
283272

284273

285274
def process_totals_by_language(args, data):
@@ -300,7 +289,7 @@ def process_totals_by_language(args, data):
300289
file_path = shared.path_join(
301290
PATHS["data_phase"], "gcs_totals_by_language.csv"
302291
)
303-
data_to_csv(args, data, file_path)
292+
shared.data_to_csv(args, data, file_path)
304293

305294

306295
def process_totals_by_country(args, data):
@@ -321,7 +310,7 @@ def process_totals_by_country(args, data):
321310
file_path = shared.path_join(
322311
PATHS["data_phase"], "gcs_totals_by_country.csv"
323312
)
324-
data_to_csv(args, data, file_path)
313+
shared.data_to_csv(args, data, file_path)
325314

326315

327316
def main():

scripts/2-process/github_process.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
"""
66
# Standard library
77
import argparse
8-
import csv
98
import os
109
import sys
1110
import traceback
@@ -74,16 +73,6 @@ def parse_arguments():
7473
return args
7574

7675

77-
def data_to_csv(args, data, file_path):
78-
if not args.enable_save:
79-
return
80-
os.makedirs(PATHS["data_phase"], exist_ok=True)
81-
# emulate csv.unix_dialect
82-
data.to_csv(
83-
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
84-
)
85-
86-
8776
def process_totals_by_license(args, count_data):
8877
"""
8978
Processing count data: totals by License
@@ -106,7 +95,7 @@ def process_totals_by_license(args, count_data):
10695
file_path = shared.path_join(
10796
PATHS["data_phase"], "github_totals_by_license.csv"
10897
)
109-
data_to_csv(args, data, file_path)
98+
shared.data_to_csv(args, data, file_path)
11099

111100

112101
def process_totals_by_restriction(args, count_data):
@@ -140,7 +129,7 @@ def process_totals_by_restriction(args, count_data):
140129
file_path = shared.path_join(
141130
PATHS["data_phase"], "github_totals_by_restriction.csv"
142131
)
143-
data_to_csv(args, data, file_path)
132+
shared.data_to_csv(args, data, file_path)
144133

145134

146135
def main():

scripts/2-process/wikipedia_process.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
"""
66
# Standard library
77
import argparse
8-
import csv
98
import os
109
import sys
1110
import textwrap
@@ -85,16 +84,6 @@ def parse_arguments():
8584
return args
8685

8786

88-
def data_to_csv(args, data, file_path):
89-
if not args.enable_save:
90-
return
91-
os.makedirs(PATHS["data_phase"], exist_ok=True)
92-
# emulate csv.unix_dialect
93-
data.to_csv(
94-
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
95-
)
96-
97-
9887
def process_highest_language_usage(args, count_data):
9988
"""
10089
Processing count data: Most represented languages
@@ -113,7 +102,7 @@ def process_highest_language_usage(args, count_data):
113102
file_path = shared.path_join(
114103
PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
115104
)
116-
data_to_csv(args, top_10, file_path)
105+
shared.data_to_csv(args, top_10, file_path)
117106

118107

119108
def process_least_language_usage(args, count_data):
@@ -136,7 +125,7 @@ def process_least_language_usage(args, count_data):
136125
file_path = shared.path_join(
137126
PATHS["data_phase"], "wikipedia_least_language_usage.csv"
138127
)
139-
data_to_csv(args, bottom_10, file_path)
128+
shared.data_to_csv(args, bottom_10, file_path)
140129

141130

142131
def process_language_representation(args, count_data):
@@ -162,7 +151,7 @@ def process_language_representation(args, count_data):
162151
file_path = shared.path_join(
163152
PATHS["data_phase"], "wikipedia_language_representation.csv"
164153
)
165-
data_to_csv(args, language_counts, file_path)
154+
shared.data_to_csv(args, language_counts, file_path)
166155

167156

168157
def main():

scripts/shared.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Standard library
2+
import csv
23
import logging
34
import os
45
import sys
@@ -36,6 +37,16 @@ def __init__(self, message, exit_code=None):
3637
super().__init__(self.message)
3738

3839

40+
def data_to_csv(args, data, file_path):
41+
if not args.enable_save:
42+
return
43+
os.makedirs(args.paths["data_phase"], exist_ok=True)
44+
# emulate csv.unix_dialect
45+
data.to_csv(
46+
file_path, index=False, quoting=csv.QUOTE_ALL, lineterminator="\n"
47+
)
48+
49+
3950
def check_for_data_files(args, file_paths, QUARTER):
4051
if args.force:
4152
return

0 commit comments

Comments
 (0)