33"""
44
55# Standard library
6- import logging
7- import os .path
6+ import os
87import re
98import sys
109import traceback
1615import pandas as pd
1716import plotly .express as px
1817import seaborn as sns
19-
20- warnings .filterwarnings ("ignore" )
21-
22- # Third-party
2318from wordcloud import STOPWORDS , WordCloud # noqa: E402
2419
25- # Set the current working directory
26- PATH_WORK_DIR = os .path .dirname (os .path .abspath (__file__ ))
27-
28- # Set the current working directory
29- CWD = os .path .dirname (os .path .abspath (__file__ ))
30-
31- # Set up the logger
32- LOG = logging .getLogger (__name__ )
33- LOG .setLevel (logging .INFO )
20+ sys .path .append ("." )
21+ # First-party/Local
22+ import quantify # noqa: E402
3423
35- # Define both the handler and the formatter
36- handler = logging .StreamHandler ()
37- formatter = logging .Formatter (
38- "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
39- )
40-
41- # Add formatter to the handler
42- handler .setFormatter (formatter )
43-
44- # Add handler to the logger
45- LOG .addHandler (handler )
24+ # Warning suppression /!\ Caution /!\
25+ warnings .filterwarnings ("ignore" )
4626
47- # Log the start of the script execution
48- LOG . info ( "Script execution started." )
27+ # Setup PATH_WORK_DIR, and LOGGER using quantify.setup()
28+ _ , PATH_WORK_DIR , _ , _ , LOGGER = quantify . setup ( __file__ )
4929
5030
5131def tags_frequency (csv_path , column_names ):
@@ -59,7 +39,7 @@ def tags_frequency(csv_path, column_names):
5939 Example: ["tags", "description"]
6040
6141 """
62- LOG .info ("Generating word cloud based on tags." )
42+ LOGGER .info ("Generating word cloud based on tags." )
6343
6444 df = pd .read_csv (csv_path )
6545 # Process each column containing tags
@@ -79,7 +59,7 @@ def tags_frequency(csv_path, column_names):
7959 and str (row ) != ""
8060 and str (row ) != "nan"
8161 ):
82- LOG .debug (f"Processing row: { row } " )
62+ LOGGER .debug (f"Processing row: { row } " )
8363 if "ChineseinUS.org" in str (row ):
8464 row = "ChineseinUS"
8565 list2 += re .split (r"\s|(?<!\d)[,.](?!\d)" , str (row ))
@@ -168,7 +148,7 @@ def time_trend_helper(df):
168148 Returns:
169149 - DataFrame: DataFrame with counts of entries per year.
170150 """
171- LOG .info ("Extracting year-wise count of entries." )
151+ LOGGER .info ("Extracting year-wise count of entries." )
172152
173153 year_list = []
174154 for date_row in df ["dates" ][0 :]:
@@ -196,7 +176,7 @@ def time_trend(csv_path):
196176 Args:
197177 - csv_path (str): Path to the CSV file.
198178 """
199- LOG .info ("Generating time trend line graph." )
179+ LOGGER .info ("Generating time trend line graph." )
200180
201181 df = pd .read_csv (csv_path )
202182 count_df = time_trend_helper (df )
@@ -239,7 +219,7 @@ def time_trend_compile_helper(yearly_count):
239219 Returns:
240220 - DataFrame: Filtered yearly count data.
241221 """
242- LOG .info ("Filtering yearly trend data." )
222+ LOGGER .info ("Filtering yearly trend data." )
243223
244224 Years = np .arange (2018 , 2023 )
245225 yearly_count ["year" ] = list (yearly_count .index )
@@ -249,7 +229,7 @@ def time_trend_compile_helper(yearly_count):
249229 int (yearly_count ["year" ][num ]) >= 2018
250230 ):
251231 counts .append (yearly_count ["Counts" ][num ])
252- LOG .info (f"{ counts } " )
232+ LOGGER .info (f"{ counts } " )
253233 final_yearly_count = pd .DataFrame (
254234 list (zip (Years , counts )), columns = ["Years" , "Yearly_counts" ]
255235 )
@@ -260,7 +240,7 @@ def time_trend_compile():
260240 """
261241 Compile yearly trends for different licenses and plot them.
262242 """
263- LOG .info ("Compiling yearly trends for different licenses." )
243+ LOGGER .info ("Compiling yearly trends for different licenses." )
264244
265245 license1 = pd .read_csv ("../flickr/dataset/cleaned_license1.csv" )
266246 license2 = pd .read_csv ("../flickr/dataset/cleaned_license2.csv" )
@@ -319,7 +299,7 @@ def time_trend_compile():
319299 yearly_count6 = time_trend_compile_helper (yearly_count6 )
320300 yearly_count9 = time_trend_compile_helper (yearly_count9 )
321301 yearly_count10 = time_trend_compile_helper (yearly_count10 )
322- LOG .info (f"{ yearly_count1 } " )
302+ LOGGER .info (f"{ yearly_count1 } " )
323303
324304 # Plot yearly trend for all licenses
325305 plt .plot (
@@ -408,20 +388,22 @@ def view_compare_helper(df):
408388 Returns:
409389 - int: Maximum views.
410390 """
411- LOG .info ("Calculating maximum views of pictures under a license." )
391+ LOGGER .info ("Calculating maximum views of pictures under a license." )
412392
413393 highest_view = int (max (df ["views" ]))
414394 df = df .sort_values ("views" , ascending = False )
415- LOG .info (f"DataFrame sorted by views in descending order: { df } " )
416- LOG .info (f"Maximum views found: { highest_view } " )
395+ LOGGER .info (f"DataFrame sorted by views in descending order: { df } " )
396+ LOGGER .info (f"Maximum views found: { highest_view } " )
417397 return highest_view
418398
419399
420400def view_compare ():
421401 """
422402 Compare maximum views of pictures under different licenses.
423403 """
424- LOG .info ("Comparing maximum views of pictures under different licenses." )
404+ LOGGER .info (
405+ "Comparing maximum views of pictures under different licenses."
406+ )
425407
426408 license1 = pd .read_csv (
427409 os .path .join (PATH_WORK_DIR , "../flickr/dataset/cleaned_license1.csv" )
@@ -461,7 +443,7 @@ def view_compare():
461443 maxs = []
462444 for lic in licenses :
463445 maxs .append (view_compare_helper (lic ))
464- LOG .info (f"{ maxs } " )
446+ LOGGER .info (f"{ maxs } " )
465447 # Create DataFrame to store license and their maximum views
466448 temp_data = pd .DataFrame ()
467449 temp_data ["Licenses" ] = [
@@ -517,7 +499,9 @@ def total_usage():
517499 """
518500 Generate a bar plot showing the total usage of different licenses.
519501 """
520- LOG .info ("Generating bar plot showing total usage of different licenses." )
502+ LOGGER .info (
503+ "Generating bar plot showing total usage of different licenses."
504+ )
521505
522506 # Reads the license total file as the input dataset
523507 df = pd .read_csv (
@@ -538,15 +522,14 @@ def main():
538522
539523
540524if __name__ == "__main__" :
541- # Exception Handling
542525 try :
543526 main ()
544527 except SystemExit as e :
545- LOG .error (f"System exit with code: { e .code } " )
528+ LOGGER .error (f"System exit with code: { e .code } " )
546529 sys .exit (e .code )
547530 except KeyboardInterrupt :
548- LOG .info ("(130) Halted via KeyboardInterrupt." )
531+ LOGGER .info ("(130) Halted via KeyboardInterrupt." )
549532 sys .exit (130 )
550533 except Exception :
551- LOG . error (f"(1) Unhandled exception: { traceback .format_exc ()} " )
534+ LOGGER . exception (f"(1) Unhandled exception: { traceback .format_exc ()} " )
552535 sys .exit (1 )
0 commit comments