diff --git a/README.md b/README.md index 1a319e7..2228e3e 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ * General configuration: [config/config.yaml](config/config.yaml) * Plot configuration: [config/plot_config.yaml](config/plot_config.yaml) -* Meta information: [workflow/scripts/bokeh_plot/components/plot_css_html.py](workflow/scripts/bokeh_plot/components/plot_css_html.py) +* Meta information: [workflow/scripts/plot/components/plot_css_html.py](workflow/scripts/plot/components/plot_css_html.py) ### Setup diff --git a/config/config.yaml b/config/config.yaml index f4ea4b2..808a22b 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -22,6 +22,7 @@ DISPLAY_LAYOUT_BINARY: /srv/data/seiler/hibf_benchmark/display_layout FILENAMES_FILE: /srv/public/leonard/hibf_benchmarks/data/1024/1024.filenames QUERY_FILE: /srv/public/leonard/hibf_benchmarks/data/1024/reads_e2_250/all.fastq +# All the parameters to run with raptor PARAMS: ALPHA: [60.0 ,40.0, 20.0, 10.0, 5.0, 2.0, 1.5, 1.3, 1.0, 0.7, 0.5] T_MAX: ?sorted([192, 768, 8192] + inclusive_range_mult(64, 2048, 2)) @@ -30,10 +31,11 @@ PARAMS: RELAXED_FPR: ?[0.05] + inclusive_range_add(0.1, 0.9, 0.1) MODE: [none, U, U+R] # [none, U, U+R] - +# Neccessary data parameters to run raptor DATA_PARAMETERS: QUERY_ERRORS: 2 +# Default parameters to run raptor DEFAULT_PARAMS: ALPHA: 1.2 T_MAX: 192 @@ -43,4 +45,5 @@ DEFAULT_PARAMS: M_FPR: 0.05 MODE: U+R -NUM_THREADS: 1 # Number of threads to use per task, not for parallel execution +# Number of threads to use per task, not for parallel execution +NUM_THREADS: 1 diff --git a/config/plot_config.yaml b/config/plot_config.yaml index b540011..1c1f736 100644 --- a/config/plot_config.yaml +++ b/config/plot_config.yaml @@ -1,7 +1,8 @@ -PLOT_NAME: "plot1" -EXTRA_FILE_PLOTTING: True +PLOT_NAME: "plot_name" +EXTRA_FILE_PLOTTING: True # if True, ALL htmls in the results/html folder will be plotted +# Names for the different keys in the plot KEYS: alpha: alpha hash: hash @@ -12,6 +13,8 @@ KEYS: U+R: U+R +# Format: all benchmark-times to plot +# Names: names for the different keys in the plot TIME: FORMAT: - determine_query_length_in_seconds @@ -42,6 +45,8 @@ TIME: generate_results_max_percentage: "Generate results (max) percentage" +# Format: all benchmark-sizes to plot +# Names: names for the different keys in the plot SIZE: FORMAT: - LEVEL_0_GB_SIZE diff --git a/workflow/envs/landingpage.yml b/workflow/envs/landingpage.yml index 4687a16..4d259bc 100644 --- a/workflow/envs/landingpage.yml +++ b/workflow/envs/landingpage.yml @@ -3,3 +3,4 @@ channels: - conda-forge dependencies: - html2image=2.0.4.3 + - beautifulsoup4=4.12.3 diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk index fd0d52a..d1bf299 100644 --- a/workflow/rules/plot.smk +++ b/workflow/rules/plot.smk @@ -5,7 +5,7 @@ rule plot_data: output: PLOT_FILE=f"results/html/{config['PLOT_NAME']}.html", params: - THEME="workflow/scripts/bokeh_plot/plot_theme.yaml", + THEME="workflow/scripts/plot/plot_theme.yaml", KEYS=config["KEYS"], TIME=config["TIME"], SIZE=config["SIZE"], @@ -14,7 +14,7 @@ rule plot_data: conda: "../envs/bokeh.yml" script: - "../scripts/bokeh_plot/plot.py" + "../scripts/plot/plot.py" rule plot_landingpage: @@ -22,7 +22,6 @@ rule plot_landingpage: PLOT_FILE=f"results/html/{config['PLOT_NAME']}.html", output: OUTPUT_FILE="results/html/index.html", - PNG_FILE=f"results/html/{config['PLOT_NAME']}.png", params: EXTRA_FILE_PLOTTING=config["EXTRA_FILE_PLOTTING"], HTML_DIR="results/html", @@ -31,4 +30,4 @@ rule plot_landingpage: conda: "../envs/landingpage.yml" script: - "../scripts/landingpage.py" + "../scripts/plot/landingpage.py" diff --git a/workflow/scripts/bokeh_plot/components/helpers.py b/workflow/scripts/bokeh_plot/components/helpers.py deleted file mode 100644 index 6b3e44d..0000000 --- a/workflow/scripts/bokeh_plot/components/helpers.py +++ /dev/null @@ -1,16 +0,0 @@ -"""Helper functions.""" - - -def convert_dic_to_list(data): - """Returns a list containing the values of the given list as strings.""" - return [str(i) for i in data] - - -def add_arrays(data): - """Returns a list containing the sum of the given lists.""" - return [sum(float(i) for i in sublist) for sublist in zip(*data)] - - -def get_max_result(data, factor): - """Returns the maximum value of the given list multiplied by the given factor.""" - return round(max(data) * factor, 3) diff --git a/workflow/scripts/landingpage.py b/workflow/scripts/landingpage.py deleted file mode 100644 index 93d4fdb..0000000 --- a/workflow/scripts/landingpage.py +++ /dev/null @@ -1,140 +0,0 @@ -""" -Creates a landing page gallery with screenshots of Bokeh plot HTML files. -""" - -import os -import re - -from bokeh_plot.components.log_init import log_init -from html2image import Html2Image - -html_files = snakemake.input["PLOT_FILE"] -output_file = snakemake.output["OUTPUT_FILE"] -extra_file_plotting = snakemake.params["EXTRA_FILE_PLOTTING"] -html_dir = snakemake.params["HTML_DIR"] - -log_init(snakemake.log[0]) - -# get html files -if extra_file_plotting: - html_files = [os.path.join(html_dir, f) for f in os.listdir(html_dir) if f.endswith(".html")] - -# get html names -html_names = [re.sub(".html", "", html_file) for html_file in html_files] - -# create png for each html -hti = Html2Image(output_path=html_dir, custom_flags=["--headless", "--disable-gpu"]) - -for html_file in html_files: - hti.screenshot(html_file=html_file, save_as=re.sub(".html", ".png", os.path.basename(html_file))) - -# all parts of the landing page -LIST_OF_PARTS = "\n".join( - [ - f""" - - """ - for (ihtml_name, html_name) in enumerate(html_names) - ] -) - -# create landing page -CSS_TEXT = """ - body { - font-family: Arial, sans-serif; - background-color: #15191c; - margin: 0; - padding: 20px; - } - - .header { - text-align: center; - padding: 20px; - } - - .header h1 { - margin: 0; - font-size: 2em; - color: #ffffff; - } - - .gallery { - display: grid; - grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); - gap: 20px; - padding: 20px; - } - - .gallery-item { - background-color: #000000; - border-radius: 8px; - box-shadow: 0 0 10px rgba(255, 255, 255, 0.1); - overflow: hidden; - text-align: center; - transform: scale(0.97); - transition: 300ms ease-in-out; - } - - .gallery-item:hover { - box-shadow: 0 0 20px rgba(255, 255, 255, 0.134); - transform: scale(1); - } - - .gallery-item img { - max-width: 100%; - height: auto; - display: block; - margin-bottom: 10px; - } - - .gallery-item a { - text-decoration: none; - color: #ffffff; - display: block; - padding: 10px; - font-size: 1em; - transition: 400ms ease-in-out; - } - - .gallery-item a:hover { - color: #c2c2c2; - } - """ - -HTML_TEXT = ( - """ - - - - - - - Landing Page for Bokeh Plots - - - - -
-

Plot Gallery

-
- - - - """ -) - -# save landing page -with open(os.path.join(os.path.dirname(output_file), "style.css"), "w", encoding="utf-8") as f: - f.write(CSS_TEXT) - -with open(output_file, "w", encoding="utf-8") as f: - f.write(HTML_TEXT) diff --git a/workflow/scripts/bokeh_plot/components/convert_data.py b/workflow/scripts/plot/components/convert_data.py similarity index 100% rename from workflow/scripts/bokeh_plot/components/convert_data.py rename to workflow/scripts/plot/components/convert_data.py diff --git a/workflow/scripts/bokeh_plot/components/log_init.py b/workflow/scripts/plot/components/log_init.py similarity index 100% rename from workflow/scripts/bokeh_plot/components/log_init.py rename to workflow/scripts/plot/components/log_init.py diff --git a/workflow/scripts/bokeh_plot/components/plot_css_html.py b/workflow/scripts/plot/components/plot_css_html.py similarity index 62% rename from workflow/scripts/bokeh_plot/components/plot_css_html.py rename to workflow/scripts/plot/components/plot_css_html.py index e4feef2..de7f639 100644 --- a/workflow/scripts/bokeh_plot/components/plot_css_html.py +++ b/workflow/scripts/plot/components/plot_css_html.py @@ -33,13 +33,16 @@ } dataset = { - "Type": "Simulated", - "Sequence size": "512 MiB", - "Number of bins": 1024, - "Number of haplotypes": 1, - "Number of reads": 1048576, - "Read length": 250, - "Read errors": 2, + "Description": "Simulated dataset with 1 haplotype and 1M reads", + "Parameters": { + "Type": "Simulated", + "Sequence size": "512 MiB", + "Number of bins": 1024, + "Number of haplotypes": 1, + "Number of reads": 1048576, + "Read length": 250, + "Read errors": 2, + }, } @@ -62,8 +65,8 @@ def create_latex_text(): def create_dataset_text(): """Creates a div containing the description of the plot.""" - desc = '

Dataset:

' - for key, value in dataset.items(): + desc = f'

Dataset:

{dataset["Description"]}

' + for key, value in dataset["Parameters"].items(): desc += f'
{key}:
{value}
' desc += "
" return desc @@ -163,3 +166,119 @@ def get_hover_code(): size_plot_hovers[i].tooltips = size_description[i]; } """ + + +def landing_page_css(): + """Returns the CSS style for the landing page.""" + return """ + body { + font-family: Arial, sans-serif; + background-color: #15191c; + margin: 0; + padding: 20px; + } + + .header { + text-align: center; + padding: 20px; + } + + .header h1 { + margin: 0; + font-size: 2em; + color: #ffffff; + } + + .gallery { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(350px, 1fr)); + gap: 20px; + padding: 20px; + } + + .gallery-item { + height: 200px; + background-color: #000000; + border-radius: 8px; + box-shadow: 0 0 10px rgba(255, 255, 255, 0.1); + overflow: hidden; + position: relative; + text-align: center; + display: flex; + transform: scale(0.97); + transition: 300ms ease-in-out; + } + + .gallery-item:hover { + box-shadow: 0 0 20px rgba(255, 255, 255, 0.134); + transform: scale(1); + } + + .gallery-item img { + max-width: 100%; + height: auto; + display: block; + } + + .gallery-item h4 { + margin: 0; + padding: 20px; + text-align: center; + display: flex; + align-items: center; + justify-content: center; + color: #ffffff; + } + + + .description-box { + position: absolute; + height: calc(100% - 20px); + bottom: 0; + left: 0; + right: 0; + background: rgba(0, 0, 0, 0.8); + color: white; + padding: 10px; + transform: translateY(100%); + transition: transform 0.3s ease-in-out; + display: flex; + flex-direction: column; + justify-content: center; + } + + .gallery-item:hover .description-box { + transform: translateY(0); + } + + .description { + max-height: 100%; + overflow: auto; + scrollbar-width: none; + -ms-overflow-style: none; + -webkit-overflow-scrolling: touch; + } + + .description::-webkit-scrollbar { + display: none; + } + + .gallery-item a { + text-decoration: none; + color: #ffffff; + display: block; + padding: 10px; + font-size: 1em; + transition: 400ms ease-in-out; + } + + .gallery-item a:hover { + color: #c2c2c2; + } + + .gallery-item h4 { + font-size: 1.3em; + margin-bottom: 5px; + width: 100%; + } + """ diff --git a/workflow/scripts/bokeh_plot/components/plot_style.py b/workflow/scripts/plot/components/plot_style.py similarity index 100% rename from workflow/scripts/bokeh_plot/components/plot_style.py rename to workflow/scripts/plot/components/plot_style.py diff --git a/workflow/scripts/plot/landingpage.py b/workflow/scripts/plot/landingpage.py new file mode 100644 index 0000000..5d427ee --- /dev/null +++ b/workflow/scripts/plot/landingpage.py @@ -0,0 +1,139 @@ +""" +Creates a landing page gallery with screenshots of Bokeh plot HTML files. +""" + +import json +import os + +from bs4 import BeautifulSoup + +from components.log_init import log_init +from components.plot_css_html import landing_page_css + +html_files = snakemake.input["PLOT_FILE"] # type: ignore +output_file = snakemake.output["OUTPUT_FILE"] # type: ignore +extra_file_plotting = snakemake.params["EXTRA_FILE_PLOTTING"] # type: ignore +html_dir = snakemake.params["HTML_DIR"] # type: ignore + +log_init(snakemake.log[0]) # type: ignore + + +def find_texts(obj, texts): + """recursiv text extraction from json""" + if isinstance(obj, dict): + for key, value in obj.items(): + if key == "text": + texts.append(value) + elif isinstance(value, (dict, list)): + find_texts(value, texts) + elif isinstance(obj, list): + for item in obj: + if isinstance(item, (dict, list)): + find_texts(item, texts) + + +def get_html_name(html_file): + """get the html name from the html file""" + return html_file.split("/")[-1].replace(".html", "") + + +def clean_html(html_file): + """prepare html for extraction""" + with open(html_file, "r", encoding="utf-8") as file: + html = str(file.read()) + soup = BeautifulSoup(html, "html.parser") + extracted_script = soup.find("script", {"type": "application/json"}) + converted_html = extracted_script.get_text().replace("<", "<").replace(">", ">").replace('"', '"') + json_data = json.loads(converted_html) + texts = [] + find_texts(json_data, texts) + text_string = "\n".join(texts) + converted_soup = BeautifulSoup(text_string, "html.parser") + dataset = converted_soup.find("div", {"id": "dataset"}) + return dataset + + +def extract_description(html_file): + """extract dataset details from html file""" + cleaned_html = clean_html(html_file) + if cleaned_html: + for headline in cleaned_html.find_all("h2") + cleaned_html.find_all("h4"): + headline.decompose() + filename = get_html_name(html_file) + return cleaned_html if cleaned_html else filename + + +def extract_headline(html_file): + """extract headline from html file""" + cleaned_html = clean_html(html_file) + if cleaned_html: + for headline in cleaned_html.find_all("h4"): + return headline.get_text() if headline else html_file + filename = get_html_name(html_file) + return filename + + +# if extra_file_plotting is True, get all html files from the results/html directory +if extra_file_plotting: + html_files = [os.path.join(html_dir, f) for f in os.listdir(html_dir) if f.endswith(".html")] + + +# get html names +html_names = [get_html_name(html_file) for html_file in html_files] + + +# all gallery items for the landing page, headline of the data-description will become the title of the gallery-item, +# the description of the data-set will become the hover box +LIST_OF_PARTS = "\n".join( + [ + f""" + + """ + for html_name in html_names + ] +) + + +# html template +HTML_TEXT = ( + """ + + + + + + + Landing Page for Bokeh Plots + + + + +
+

Plot Gallery

+
+ + + + """ +) + + +# save landing page and css +with open(os.path.join(os.path.dirname(output_file), "style.css"), "w", encoding="utf-8") as f: + f.write(landing_page_css()) + +with open(output_file, "w", encoding="utf-8") as f: + f.write(HTML_TEXT) diff --git a/workflow/scripts/bokeh_plot/plot.py b/workflow/scripts/plot/plot.py similarity index 84% rename from workflow/scripts/bokeh_plot/plot.py rename to workflow/scripts/plot/plot.py index 7ae36da..0529a64 100755 --- a/workflow/scripts/bokeh_plot/plot.py +++ b/workflow/scripts/plot/plot.py @@ -8,21 +8,20 @@ from bokeh.themes import Theme from components.convert_data import prepare_size_data, prepare_time_data -from components.helpers import convert_dic_to_list from components.log_init import log_init from components.plot_style import add_legend, add_second_y_axis, configure_size_plot, configure_time_plot, save_tabs -log_init(snakemake.log[0]) +log_init(snakemake.log[0]) # type: ignore -SIZE_INPUT = snakemake.input["SIZE_INPUT"] -TIME_INPUT = snakemake.input["TIME_INPUT"] +SIZE_INPUT = snakemake.input["SIZE_INPUT"] # type: ignore +TIME_INPUT = snakemake.input["TIME_INPUT"] # type: ignore -PLOT_FILE = snakemake.output["PLOT_FILE"] +PLOT_FILE = snakemake.output["PLOT_FILE"] # type: ignore -THEME = snakemake.params["THEME"] -KEYS = snakemake.params["KEYS"] -TIME = snakemake.params["TIME"] -SIZE = snakemake.params["SIZE"] +THEME = snakemake.params["THEME"] # type: ignore +KEYS = snakemake.params["KEYS"] # type: ignore +TIME = snakemake.params["TIME"] # type: ignore +SIZE = snakemake.params["SIZE"] # type: ignore TIME_NAMES = [TIME["NAMES"].get(key, key) for key in TIME["FORMAT"]] SIZE_NAMES = [SIZE["NAMES"].get(key, key) for key in SIZE["FORMAT"]] @@ -74,8 +73,8 @@ def create_plot(): time_x_range = round(max(time_dic["TOTAL_TIME"]) * 1.03, 3) size_x_range = round(max(size_dic["GB_TOTAL_SIZE"]) * 1.03, 3) - size_y_range = convert_dic_to_list(size_dic["SUBKEY"]) - time_y_range = convert_dic_to_list(time_dic["SUBKEY"]) + size_y_range = [str(i) for i in size_dic["SUBKEY"]] + time_y_range = [str(i) for i in time_dic["SUBKEY"]] y_range = size_y_range if len(size_y_range) > len(time_y_range) else time_y_range plot1 = create_time_plot(time_dic, y_range, time_x_range, key) diff --git a/workflow/scripts/bokeh_plot/plot_theme.yaml b/workflow/scripts/plot/plot_theme.yaml similarity index 100% rename from workflow/scripts/bokeh_plot/plot_theme.yaml rename to workflow/scripts/plot/plot_theme.yaml