Merge pull request #17 from Matth-L/main

filipesmg · web-flow · commit 036c530bd5ce · 2026-01-09T14:31:29.000+01:00
Added possibility to give folders to JuRepTool; added option to give fontawesome location for JuRepTool.
diff --git a/configs/jureptool/config.yml b/configs/jureptool/config.yml
@@ -24,10 +24,13 @@ hostname: 'https://llview.fz-juelich.de/#system(-dc)[ ,_]#'
 # JURI/js/ext, (https://github.com/FZJ-JSC/JURI/tree/main/js/ext)
 # Moreover, Content Delivery Network (CDN) can also be used
 # ex :
+# plotly_location: 'https://cdn.plot.ly/plotly-3.3.0.min.js'
 # jquery_location: 'https://code.jquery.com/jquery-3.6.0.min.js'
-# plotly_location: 'https://cdn.plot.ly/plotly-latest.min.js'
+# fontawesome_location: 'https://cdnjs.cloudflare.com/ajax/libs/font-awesome/7.0.1/css/fontawesome.min.css'
+
 plotly_location: null
 jquery_location: null
+fontawesome_location: null
 
 #Folder prefix to be added to items in plotlists file
 folder_prefix: '$LLVIEW_DATA/$LLVIEW_SYSTEMNAME/tmp/jobreport/data/'
diff --git a/jureptool/src/GenerateHTML.py b/jureptool/src/GenerateHTML.py
@@ -29,8 +29,13 @@ def CreateHTML( config,
   <head>
   <meta charset="UTF-8">
 """
+  if loc := config['appearance'].get('fontawesome_location'):
+    fontawesome_location = loc
+  else:
+    fontawesome_location = f"{replace_vars(config['appearance']['hostname'],config['appearance'])}/css/ext/font-awesome.min.css"
+
   html += f"""
-  <link rel="stylesheet" href='{replace_vars(config['appearance']['hostname'],config['appearance'])}/css/ext/font-awesome.min.css'>
+  <link rel="stylesheet" href='{fontawesome_location}'>
   <link rel="icon" type="image/svg+xml"
       href="data:image/svg+xml,%3Csvg height='100%25' stroke-miterlimit='10' style='fill-rule:nonzero;clip-rule:evenodd;stroke-linecap:round;stroke-linejoin:round;' version='1.1' viewBox='0 0 32 32' width='100%25' xml:space='preserve' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink'%3E%3Cpath d='M8.02154 13.6133L8.03331 23.6475L10.2411 23.6597L12.4489 23.6718L12.4489 25.7506L12.4489 27.8294L18.7334 27.8294L25.018 27.8294L25.018 26.6379L25.018 25.4464L20.0885 25.4464L15.1589 25.4464L15.1589 24.5587L15.1589 23.6709L17.869 23.6709L20.5791 23.6709L20.5791 22.456L20.5791 21.2412L17.869 21.2412L15.1589 21.2412L15.1589 14.4894L15.1589 7.73754L13.8039 7.73754L12.4489 7.73754L12.4489 14.4894L12.4489 21.2412L11.5844 21.2412L10.72 21.2412L10.72 12.4101L10.72 3.57898L9.36489 3.57898L8.00972 3.57898L8.02154 13.6133' fill='%23023d6b' fill-rule='evenodd' opacity='1' stroke='none'/%3E%3Cpath d='M15.0868 0.0309399C9.2877 0.347224 4.09586 3.83135 1.56139 9.10753C-0.520462 13.4413-0.520462 18.5745 1.56139 22.9083C5.1584 30.3963 13.8239 33.894 21.607 30.9994C25.9088 29.3995 29.3916 25.9168 30.9915 21.615C32.5077 17.538 32.307 12.997 30.4386 9.10753C28.097 4.233 23.5169 0.89078 18.1603 0.147847C17.6781 0.080936 16.1368-0.0254576 15.8598-0.0109727C15.7956-0.0076085 15.4477 0.0112218 15.0868 0.0309399M8.02154 13.6133L8.03331 23.6475L10.2411 23.6597L12.4489 23.6718L12.4489 25.7506L12.4489 27.8294L18.7334 27.8294L25.018 27.8294L25.018 26.6379L25.018 25.4464L20.0885 25.4464L15.1589 25.4464L15.1589 24.5587L15.1589 23.6709L17.869 23.6709L20.5791 23.6709L20.5791 22.456L20.5791 21.2412L17.869 21.2412L15.1589 21.2412L15.1589 14.4894L15.1589 7.73754L13.8039 7.73754L12.4489 7.73754L12.4489 14.4894L12.4489 21.2412L11.5844 21.2412L10.72 21.2412L10.72 12.4101L10.72 3.57898L9.36489 3.57898L8.00972 3.57898L8.02154 13.6133' fill='%23ffffff' fill-rule='evenodd' opacity='1' stroke='none'/%3E%3C/svg%3E" />
 """
diff --git a/jureptool/src/main.py b/jureptool/src/main.py
@@ -1,12 +1,13 @@
 # Copyright (c) 2023 Forschungszentrum Juelich GmbH.
-# This file is part of LLview. 
+# This file is part of LLview.
 #
 # This is an open source software distributed under the GPLv3 license. More information see the LICENSE file at the top level.
 #
 # Contributions must follow the Contributor License Agreement. More information see the CONTRIBUTING.md file at the top level.
 #
 # Contributors:
-#    Filipe Guimarães (Forschungszentrum Juelich GmbH) 
+#    Filipe Guimarães (Forschungszentrum Juelich GmbH)
+#    Matthias Lapu (CEA)
 
 import os                                                  # OS library (files and folders operations)
 import sys                                                 # system variables for stdout and stderr
@@ -30,7 +31,7 @@
 import itertools
 import shutil
 import yaml
-import datetime 
+import datetime
 from pytz import timezone
 
 
@@ -57,7 +58,7 @@ def check_shutdown_callback(self):
   """
   Callback used after every job to check if shutdown file exists
   to terminate the jobs
-  """  
+  """
   if check_shutdown():
     if email: msg.send_email(semail,remail,"Shutdown file found, stopping generation of PDF-job reports")
     log.warning("Shutdown file found, stopping jobs")
@@ -68,7 +69,7 @@ def error_handler(e):
   """
   Error callback to catch any raised exception raised by some
   of the child processes, and send email
-  """  
+  """
   if email: msg.send_email(semail,remail,f"Error in PDF-Job report:\n {' '.join(traceback.format_exception(type(e), e, e.__traceback__))}")
   log.error(f"Error:\n {' '.join(traceback.format_exception(type(e), e, e.__traceback__))}")
   global nerrors
@@ -98,7 +99,7 @@ def ProcessReport(njob,total_jobs,job,config):
   Wrapper to catch eventual errors in _ProcessReport
   """
   log = logging.getLogger('logger')
-  
+
   try:
     _ProcessReport(njob,total_jobs,job,config)
   except Exception as e:
@@ -125,7 +126,7 @@ def _ProcessReport(njob,total_jobs,job,config):
 
   # Getting timezonegap
   config['appearance']['timezonegap'] = timezone(config['appearance']['timezone']).localize(datetime.datetime.strptime(data["job"]["starttime"],'%Y-%m-%d %H:%M:%S')).utcoffset().seconds
-  
+
   # Removing sensitive data in demo mode
   if config['demo']:
     # folder = "."
@@ -154,7 +155,7 @@ def _ProcessReport(njob,total_jobs,job,config):
   except (ValueError,KeyError):
     data['job']['numgpus'] = 0
     num_gpus = 0
-  
+
   # Escaping job name
   data['job']['name'] = re.escape(data['job']['name'])
 
@@ -287,7 +288,7 @@ def _ProcessReport(njob,total_jobs,job,config):
   ##################################### Reading data from files #####################################
   for fh,fh_info in files.items():
     # If number of points is less than 2 or if the filename is not given, don't read the file
-    if (int(fh_info['datapoints'])<2) or (fh not in data['files']) or (data['files'][fh] in ["",0,"-"]): 
+    if (int(fh_info['datapoints'])<2) or (fh not in data['files']) or (data['files'][fh] in ["",0,"-"]):
       continue
 
     # Reading file with information for all nodes and all times
@@ -313,11 +314,11 @@ def _ProcessReport(njob,total_jobs,job,config):
     y_x_keys = [key for key in {**y_headers, **x_headers}.keys()]
 
     # Dropping duplicated lines
-    df_temp.drop_duplicates(subset=y_x_keys, keep='first', inplace=True) 
+    df_temp.drop_duplicates(subset=y_x_keys, keep='first', inplace=True)
 
     # Dropping rows above ts range
     if config['appearance']['maxsec']:
-      df_temp.drop(df_temp[df_temp[config['plots']['_x']['header']] > df_temp[config['plots']['_x']['header']].min()+config['appearance']['maxsec']].index, inplace=True) 
+      df_temp.drop(df_temp[df_temp[config['plots']['_x']['header']] > df_temp[config['plots']['_x']['header']].min()+config['appearance']['maxsec']].index, inplace=True)
 
     # Dropping rows with infinity values
     df_temp = df_temp[~df_temp.isin([np.inf, -np.inf]).any(axis=1)]
@@ -379,7 +380,7 @@ def _ProcessReport(njob,total_jobs,job,config):
       # If there are no graphs to plot in this section, skip
       if not graphs_to_plot: continue
 
-      # Getting the file headers in this section 
+      # Getting the file headers in this section
       files_in_section = [config_section[_]['_file_header'] for _ in graphs_to_plot]
 
       ################################# Setting up TOC and graphs_to_plot ####################################
@@ -487,7 +488,7 @@ def _ProcessReport(njob,total_jobs,job,config):
       to_plot_extra[section]['colorplot'] = []
       to_plot_extra[section]['unified'] = []
       to_plot_extra[section]['description'] = []
-      # Looping over graphs defined in the custom section 
+      # Looping over graphs defined in the custom section
       for idx,_ in enumerate(graphs_to_plot):
         # The header on the dat file uses a generic name, and not the real "name" of the graph
         to_plot_extra[section]['headers'].append(f"value{idx}")
@@ -548,7 +549,7 @@ def _ProcessReport(njob,total_jobs,job,config):
         df_overview[side]['legend'] = []
         for (fh,graphs),legend in zip(cols.items(),legends):
           # Skipping plot if no data is present
-          if files[fh]['data'] is None: continue 
+          if files[fh]['data'] is None: continue
           if x_header_overview == 'ts': x_header = 'datetime'
           df_temp = files[fh]['data'][list(graphs)].groupby([x_header], as_index=False).mean()
           # # Transforming timestamps (with timezone) to datetime
@@ -694,8 +695,8 @@ def _ProcessReport(njob,total_jobs,job,config):
     #   proj_end = timeline_df['end'].max()
     proj_end = datetime.datetime.timestamp(datetime.datetime.strptime(data['job']['updatetime'], '%Y-%m-%d %H:%M:%S'))
     timeline_df.loc[timeline_df['end']<0,'end'] = proj_end
-    timeline_df['start_time'] = timeline_df['beg'].apply(lambda x: datetime.datetime.fromtimestamp(int(x+config['appearance']['timezonegap']),datetime.timezone.utc)) 
-    timeline_df['end_time'] = timeline_df['end'].apply(lambda x: datetime.datetime.fromtimestamp(int(x+config['appearance']['timezonegap']),datetime.timezone.utc)) 
+    timeline_df['start_time'] = timeline_df['beg'].apply(lambda x: datetime.datetime.fromtimestamp(int(x+config['appearance']['timezonegap']),datetime.timezone.utc))
+    timeline_df['end_time'] = timeline_df['end'].apply(lambda x: datetime.datetime.fromtimestamp(int(x+config['appearance']['timezonegap']),datetime.timezone.utc))
     timeline_df['duration'] = timeline_df['end_time']-timeline_df['start_time']
     timeline_df[['color','edgecolor','colorhtml','edgecolorhtml']] = timeline_df['st'].apply(lambda x: add_color(x))
     # Escaping job names
@@ -764,7 +765,7 @@ def _ProcessReport(njob,total_jobs,job,config):
   # Output files:
   # output = f"{folder}/python_{data['files']['pdffile']}"
   output_pdf = f"{config['outfolder']}/{data['files']['pdffile']}"
-  if config['html'] or config['gzip']: 
+  if config['html'] or config['gzip']:
     output_html = f"{config['outfolder']}/{data['files']['htmlfile']}"
 
   # Getting time range of the job:
@@ -802,17 +803,17 @@ def _ProcessReport(njob,total_jobs,job,config):
     timeline_html,system_report_html = LastPages.LastPages(pdf,data,config,page_num,timeline_df,time_range,error_lines)
 
   ############################################################################
-  if config['html'] or config['gzip']: 
+  if config['html'] or config['gzip']:
     config['appearance']['jobid'] = data['job']['jobid'] # Job ID for title and filename
     config['appearance']['system'] = data['job']['system'].lower().replace('_',' ') # System for filename
-    GenerateHTML.CreateHTML(config, 
-                            figs, 
-                            navbar=navbar, 
-                            first=first_page_html, 
-                            overview=overview_fig, 
-                            nodelist=nodelist_html, 
+    GenerateHTML.CreateHTML(config,
+                            figs,
+                            navbar=navbar,
+                            first=first_page_html,
+                            overview=overview_fig,
+                            nodelist=nodelist_html,
                             timeline=timeline_html,
-                            system_report=system_report_html, 
+                            system_report=system_report_html,
                             filename=output_html)
   # Moving files to final folder
   if config['move']:
@@ -850,7 +851,7 @@ def process_plotlist(config,q):
     counter += 1
 
   # Getting list of json files with all running jobs (and finished in the last 30 min) to process
-  # If config['json']=True, all files are already json 
+  # If config['json']=True, all files are already json
   if config['json']:
     jobs = config['file']
   else:
@@ -868,21 +869,21 @@ def process_plotlist(config,q):
   njobs = len(jobs)
   total_jobs = min(njobs,config['maxjobs'])
 
-  if total_jobs==0: 
+  if total_jobs==0:
     log.warning(f"No jobs in plotlist file!")
     return
 
-  # Create pool for dispatching work 
+  # Create pool for dispatching work
   global pool
   pool = mp.Pool(config['nprocs'], worker_init, [q,config['logging']['level']])
 
   log.info(f"Generating report of {total_jobs} jobs")
 
   njob = 0
   for job in jobs:
-    njob += 1                      # FOR DEBUG 
-    if njob > config['maxjobs']:   # FOR DEBUG 
-      break                        # FOR DEBUG 
+    njob += 1                      # FOR DEBUG
+    if njob > config['maxjobs']:   # FOR DEBUG
+      break                        # FOR DEBUG
     pool.apply_async(ProcessReport, [njob,total_jobs,job,config], callback=check_shutdown_callback, error_callback=error_handler)
 
   pool.close()
@@ -946,12 +947,12 @@ def __init__(self,fmt,datefmt=""):
                     logging.ERROR: self.red + self.fmt + self.reset,
                     logging.CRITICAL: self.bold_red + self.fmt + self.reset
                   }
-    
+
   def format(self, record):
     log_fmt = self.FORMATS.get(record.levelno)
     formatter = logging.Formatter(fmt=log_fmt,datefmt=self.datefmt)
     return formatter.format(record)
-    
+
 # Adapted from: https://stackoverflow.com/a/53257669/3142385
 class _ExcludeErrorsFilter(logging.Filter):
     def filter(self, record):
@@ -967,7 +968,7 @@ def log_init(config):
   log = logging.getLogger('logger')
   log.setLevel(config['level'])
 
-  # Setup handler: file (when configured) or stdout, stderr 
+  # Setup handler: file (when configured) or stdout, stderr
   if 'file' in config:
     fh = logging.FileHandler(config['file'], mode=config['filemode'])
     fh.setLevel(config['level'])
@@ -1024,7 +1025,7 @@ def main():
 
   # Parse arguments
   parser = argparse.ArgumentParser(description="JuRepTool")
-  parser.add_argument("file", nargs="+", help="File including list of running and recently-finished jobs or JSON file of a job")
+  parser.add_argument("file", nargs="+", default="", help="File including list of running and recently-finished jobs or JSON file of a job")
   parser.add_argument("--daemon", default=False, action="store_true" , help="Run as a 'daemon', i.e., in an infinite loop")
   parser.add_argument("--demo", default=False, action="store_true" , help="Run in 'demo' mode (hide usernames, project id and job names)")
   parser.add_argument("--nomove", default=False, action="store_true" , help="Don't copy files to final location")
@@ -1056,12 +1057,23 @@ def main():
   #   config['appearance']['plotly_js'] = args.plotlyjs
 
   # Configuration
-  config['file'] = []
-  for file in args.file:
-    config['file']+=glob.glob(file)
-  config['json'] = False
-  if all([_.endswith('json') for _ in config['file']]):
-    config['json'] = True
+  config['file'] = set()
+  # changed to a set to remove duplicate, edgecase where
+  # input : fileA folderB(fileA,fileB) -> config['file']= fileA, fileA, fileB
+
+  if args.file:
+      for element in args.file:
+        if os.path.isfile(element):
+          config['file'].update(glob.glob(element))
+        elif os.path.isdir(element):
+          config['file'].update(os.path.join(element, fname) for fname in os.listdir(element) if fname.endswith('.json'))
+        else:
+          raise FileNotFoundError(f"File not found: {element}")
+      config['json'] = all([_.endswith('json') for _ in config['file']])
+  else:
+    parser.print_help()
+    raise FileNotFoundError(f"Config {args.config} does not exist")
+
   config['demo'] = args.demo
   config['html'] = not args.nohtml
   config['gzip'] = args.gzip