@@ -125,31 +125,6 @@ def parse_arguments():
125125 return args
126126
127127
128- def initialize_data_file (file_path , headers ):
129- """Initialize CSV file with headers if it doesn't exist."""
130- if not os .path .isfile (file_path ):
131- with open (file_path , "w" , encoding = "utf-8" , newline = "\n " ) as file_obj :
132- writer = csv .DictWriter (
133- file_obj , fieldnames = headers , dialect = "unix"
134- )
135- writer .writeheader ()
136-
137-
138- def initialize_all_data_files (args ):
139- """Initialize all data files used by this script.
140-
141- Creates the data directory and initializes empty CSVs with headers.
142- """
143- if not args .enable_save :
144- return
145-
146- os .makedirs (PATHS ["data_1-fetch" ], exist_ok = True )
147- initialize_data_file (FILE_ARXIV_COUNT , HEADER_COUNT )
148- initialize_data_file (FILE_ARXIV_CATEGORY_REPORT , HEADER_CATEGORY_REPORT )
149- initialize_data_file (FILE_ARXIV_YEAR , HEADER_YEAR )
150- initialize_data_file (FILE_ARXIV_AUTHOR_BUCKET , HEADER_AUTHOR_BUCKET )
151-
152-
153128def get_identifier_mapping ():
154129 global IDENTIER_MAPPING
155130 LOGGER .info ("Loading CC Legal Tool metadata for CC identifer mapping" )
@@ -472,19 +447,6 @@ def query_arxiv(args, session):
472447 return data , cc_articles_found
473448
474449
475- def rows_to_csv (args , fieldnames , rows , file_path ):
476- if not args .enable_save :
477- return args
478-
479- with open (file_path , "w" , encoding = "utf-8" , newline = "\n " ) as file_handle :
480- writer = csv .DictWriter (
481- file_handle , fieldnames = fieldnames , dialect = "unix"
482- )
483- writer .writeheader ()
484- for row in rows :
485- writer .writerow (row )
486-
487-
488450def write_data (args , data ):
489451 """
490452 Write fetched data to CSV files.
@@ -508,7 +470,9 @@ def write_data(args, data):
508470 }
509471 )
510472 rows .sort (key = itemgetter ("TOOL_IDENTIFIER" , "AUTHOR_BUCKET" ))
511- rows_to_csv (args , HEADER_AUTHOR_BUCKET , rows , FILE_ARXIV_AUTHOR_BUCKET )
473+ shared .rows_to_csv (
474+ args , HEADER_AUTHOR_BUCKET , rows , FILE_ARXIV_AUTHOR_BUCKET
475+ )
512476
513477 # Save category report
514478 # fetched_data["category_counts"]: {identifer: {category_code: count}}
@@ -527,15 +491,17 @@ def write_data(args, data):
527491 }
528492 )
529493 rows .sort (key = itemgetter ("TOOL_IDENTIFIER" , "CATEGORY_CODE" ))
530- rows_to_csv (args , HEADER_CATEGORY_REPORT , rows , FILE_ARXIV_CATEGORY_REPORT )
494+ shared .rows_to_csv (
495+ args , HEADER_CATEGORY_REPORT , rows , FILE_ARXIV_CATEGORY_REPORT
496+ )
531497
532498 # Save tool counts report
533499 # fetched_data["tool_counts"]: {identfier: count}
534500 rows = []
535501 for identifier , count in data ["tool_counts" ].items ():
536502 rows .append ({"TOOL_IDENTIFIER" : identifier , "COUNT" : count })
537503 rows .sort (key = itemgetter ("TOOL_IDENTIFIER" ))
538- rows_to_csv (args , HEADER_COUNT , rows , FILE_ARXIV_COUNT )
504+ shared . rows_to_csv (args , HEADER_COUNT , rows , FILE_ARXIV_COUNT )
539505
540506 # Save year count report
541507 # fetched_data["year_counts"]: {identifer: {year: count}}
@@ -546,7 +512,7 @@ def write_data(args, data):
546512 {"TOOL_IDENTIFIER" : identifier , "YEAR" : year , "COUNT" : count }
547513 )
548514 rows .sort (key = itemgetter ("TOOL_IDENTIFIER" , "YEAR" ))
549- rows_to_csv (args , HEADER_YEAR , rows , FILE_ARXIV_YEAR )
515+ shared . rows_to_csv (args , HEADER_YEAR , FILE_ARXIV_YEAR , rows )
550516
551517
552518def write_provence (args , cc_articles_found ):
@@ -584,7 +550,6 @@ def main():
584550 args = parse_arguments ()
585551 shared .paths_log (LOGGER , PATHS )
586552 shared .git_fetch_and_merge (args , PATHS ["repo" ])
587- initialize_all_data_files (args )
588553 get_identifier_mapping ()
589554 session = shared .get_session ()
590555 query_category_mapping (args , session )
0 commit comments