From e92f7bc59a482cac50ac16b5b8e1ffd6ce23085b Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 21 Apr 2026 10:52:37 -0400 Subject: [PATCH 01/43] telemetry_chargeback role task file additions - these task file are needed for loki data download - adds looping for multiple scenarios - runs rating command --- roles/telemetry_chargeback/defaults/main.yml | 27 +++++++ .../telemetry_chargeback/tasks/cleanup_ck.yml | 5 ++ .../tasks/flush_loki_data.yml | 52 ++++++++++++++ .../tasks/ingest_loki_data.yml | 42 +++++++++++ .../tasks/load_loki_data.yml | 12 ++++ .../telemetry_chargeback/tasks/loki_rate.yml | 28 ++++++++ roles/telemetry_chargeback/tasks/main.yml | 57 ++++++++++++++- .../tasks/retrieve_loki_data.yml | 71 +++++++++++++++++++ .../tasks/run_test_scenarios.yml | 53 ++++++++++++++ .../tasks/setup_loki_env.yml | 56 +++++++++++++++ 10 files changed, 400 insertions(+), 3 deletions(-) create mode 100644 roles/telemetry_chargeback/tasks/cleanup_ck.yml create mode 100644 roles/telemetry_chargeback/tasks/flush_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/load_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/loki_rate.yml create mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/run_test_scenarios.yml create mode 100644 roles/telemetry_chargeback/tasks/setup_loki_env.yml diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 64f07b7a1..9cc04c8c7 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -1,2 +1,29 @@ --- +# OpenStack CLI command openstack_cmd: "openstack" + +# Debug mode +cloudkitty_debug: false + +# Directory paths +logs_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/logs" +artifacts_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/artifacts" +cert_dir: "{{ ansible_user_dir }}/ck-certs" +local_cert_dir: "{{ ansible_env.HOME }}/ci-framework-data/flush_certs" +remote_cert_dir: "osp-certs" + +# Cloudkitty certificates and secrets +cert_secret_name: "cert-cloudkitty-client-internal" +client_secret: "secret/cloudkitty-lokistack-gateway-client-http" +ca_configmap: "cm/cloudkitty-lokistack-ca-bundle" + +# LogQL Query +logql_query: "{{ loki_query | default('{service=\"cloudkitty\"}') }}" + +# OpenShift/Kubernetes settings +cloudkitty_namespace: "openstack" +openstackpod: "openstackclient" + +# Time window settings +lookback: 6 +limit: 50 diff --git a/roles/telemetry_chargeback/tasks/cleanup_ck.yml b/roles/telemetry_chargeback/tasks/cleanup_ck.yml new file mode 100644 index 000000000..01407d155 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/cleanup_ck.yml @@ -0,0 +1,5 @@ +--- +- name: "Cleanup local certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: absent diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml new file mode 100644 index 000000000..6ec05419d --- /dev/null +++ b/roles/telemetry_chargeback/tasks/flush_loki_data.yml @@ -0,0 +1,52 @@ +--- +# Flush Loki Ingester Memory to Storage + +- name: "Flush execution inside OpenStack CLI" + block: + # create dir + - name: "Create directory inside OpenStack CLI" + ansible.builtin.command: + cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" + changed_when: false + + # certs to Flush data to Loki + - name: "Create directory to extract certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: directory + mode: '0755' + + # copy all certs + - name: "Copy certificates to OpenStack CLI" + ansible.builtin.command: + cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" + changed_when: true + + # flush loki + - name: "Trigger Loki ingester flush" + ansible.builtin.command: + cmd: > + oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- + curl -v -X POST {{ ingester_flush_url }} + --cert {{ remote_cert_dir }}/tls.crt + --key {{ remote_cert_dir }}/tls.key + --cacert {{ remote_cert_dir }}/service-ca.crt + register: flush_response + changed_when: true + failed_when: flush_response.rc != 0 + + # Status + - name: "Verify flush status" + ansible.builtin.assert: + that: + - "'204' in flush_response.stderr or '200' in flush_response.stderr" + fail_msg: "Flush failed" + success_msg: "Ingester Memory Flushed successfully" + + rescue: + - name: "Debug failure output" + ansible.builtin.debug: + msg: + - "Failure" + - "Stdout: {{ flush_response.stdout | default('') }}" + - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml new file mode 100644 index 000000000..a53751f3f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml @@ -0,0 +1,42 @@ +--- +# Ingest data log to Loki that is generated from gen_synth_loki_data.yml + +- name: "Ingest data log to Loki via API" + block: + + - name: "Read log file content" + ansible.builtin.slurp: + src: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" + register: log_file_content + + - name: "Push data to Loki" + ansible.builtin.uri: + url: "{{ loki_push_url }}" + method: POST + body: "{{ log_file_content['content'] | b64decode | from_json }}" + body_format: json + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + validate_certs: false + status_code: 204 + return_content: true + register: loki_response + ignore_errors: false + failed_when: loki_response.status != 204 + + # Success + - name: "Confirm ingestion success" + ansible.builtin.debug: + msg: "Ingestion Successful!" + + rescue: + # Rescue block + - name: "Debug failure" + ansible.builtin.debug: + msg: "{{ loki_response.status | default('N/A') }}" + + # Failure + - name: "Report ingestion failure" + ansible.builtin.fail: + msg: "Ingestion Failed" + ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml new file mode 100644 index 000000000..a2a1e129f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -0,0 +1,12 @@ +--- +- name: "Ingest CloudKitty data log for {{ item }}" + ansible.builtin.include_tasks: + file: ingest_loki_data.yml + +- name: "Flush data to Loki storage for {{ item }}" + ansible.builtin.include_tasks: + file: flush_loki_data.yml + +- name: "Retrieve data log from Loki for {{ item }}" + ansible.builtin.include_tasks: + file: retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml new file mode 100644 index 000000000..822585336 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/loki_rate.yml @@ -0,0 +1,28 @@ +--- +- name: "TEST Get Rate and Qty by type from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml -g type" + register: cost_totals_by_type + changed_when: false + failed_when: cost_totals_by_type.rc != 0 + +- name: "**INFO** Print the rating by type {{ item }}" + ansible.builtin.debug: + var: cost_totals_by_type.stdout + +- name: "Output saved as yaml {{ item }}" + ansible.builtin.copy: + content: "{{ cost_totals_by_type.stdout }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_suffix }}" + mode: '0644' + +- name: "TEST Get Rate and Qty Summary from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml" + register: cost_totals_summary + changed_when: false + failed_when: cost_totals_summary.rc != 0 + +- name: "**INFO** Print the rating summary {{ item }}" + ansible.builtin.debug: + var: cost_totals_summary.stdout diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 98a94b233..e2f264834 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -1,6 +1,57 @@ --- -- name: "Validate Chargeback Feature" +- name: "Validate Chargeback Feature deployed correctly" ansible.builtin.include_tasks: "chargeback_tests.yml" -- name: "Generate Synthetic Data" - ansible.builtin.include_tasks: "gen_synth_loki_data.yml" +- name: "Setup Loki Environment" + ansible.builtin.include_tasks: "setup_loki_env.yml" + +- name: "CloudKitty debug ON/OFF" + ansible.builtin.set_fact: + cloudkitty_debug_dir: "{{ (cloudkitty_debug | bool) | ternary(artifacts_dir_zuul + '/debug_ck_db', '') }}" + +- name: "Get admin project ID for CI" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} project show admin -f value -c id" + register: get_admin_project_id + changed_when: false + failed_when: false + +- name: "Set admin project ID for CI" + ansible.builtin.set_fact: + cloudkitty_project_id: "{{ (get_admin_project_id.stdout | trim) | default('') }}" + +- name: "Get admin user ID for CI" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} user show admin -f value -c id" + register: get_admin_user_id + changed_when: false + failed_when: false + +- name: "Set admin user ID for CI" + ansible.builtin.set_fact: + cloudkitty_user_id: "{{ (get_admin_user_id.stdout | trim) | default('') }}" + +- name: "Find test files" + ansible.builtin.find: + paths: "{{ cloudkitty_scenario_dir }}" + patterns: "test_*.yml" + register: found_files_raw + +- name: "Extract only the filenames into a clean list" + ansible.builtin.set_fact: + found_files: "{{ found_files_raw.files | map(attribute='path') | map('basename') | map('regex_replace', '\\.yml$', '') | list }}" + +- name: "Run scenario file through workflow" + block: + - name: "Process and Loop if files exist" + ansible.builtin.include_tasks: run_test_scenarios.yml + loop: "{{ found_files }}" + when: found_files | length > 0 + + - name: "Cleanup after job run" + ansible.builtin.include_tasks: cleanup_ck.yml + + rescue: + - name: "Log failure" + ansible.builtin.debug: + msg: "Running test scenarios loop failed." diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml new file mode 100644 index 000000000..adaa2b34a --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -0,0 +1,71 @@ +--- +- name: "Expected Count {{ item }}" + ansible.builtin.debug: + msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" + +# Query Loki +- name: "Retrieve Logs from Loki via API {{ item }}" + block: + - name: "Query Loki API" + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + return_content: true + body_format: json + register: loki_response + # Wait condition + until: + - loki_response.status == 200 + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) + retries: 25 + delay: 60 + + - name: "Save Loki Data to JSON file" + ansible.builtin.copy: + content: "{{ loki_response.json | to_json }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + mode: '0644' + + # Validate + - name: "Verify Data Integrity {{ item }}" + vars: + actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - actual_count | int == (synth_data_rates.data_log.log_count | int) + fail_msg: >- + Query did not return all data entries. Expected + {{ synth_data_rates.data_log.log_count }} log entries, but Loki + only returned {{ actual_count }} + success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" + + rescue: + - name: "Debug failure" + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: "Report Retrieval Failure" + ansible.builtin.fail: + msg: "Retrieval Failed" + +- name: "Generate chargeback stats from Loki-retrieved data file: {{ item }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + -o "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" + --debug "{{ cloudkitty_debug_dir }}" + register: loki_retrieved_summary_info + changed_when: loki_retrieved_summary_info.rc == 0 diff --git a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml new file mode 100644 index 000000000..5addb4a22 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml @@ -0,0 +1,53 @@ +--- +- name: "Generate Synthetic Data for each file: {{ item }}" + ansible.builtin.include_tasks: "gen_synth_loki_data.yml" + +- name: "Load data to Loki: {{ item }}" + ansible.builtin.include_tasks: "load_loki_data.yml" + +- name: "Get total rate from Loki: {{ item }}" + ansible.builtin.include_tasks: "loki_rate.yml" + +#### diff uploaded data totals vs download data totals +- name: "Check synthetic totals file exists" + ansible.builtin.stat: + path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" + register: synth_totals_stat + +- name: "Check Loki totals file exists" + ansible.builtin.stat: + path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" + register: loki_totals_stat + +- name: "TEST Totals files exist {{ item }}" + ansible.builtin.assert: + that: + - synth_totals_stat.stat.exists | default(false) + - loki_totals_stat.stat.exists | default(false) + fail_msg: | + FAILED! Required file(s) missing for scenario {{ item }}: + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + success_msg: | + PASSED! Required file(s) exist {{ item }}: + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + +- name: "Diff synthetic totals vs Loki totals {{ item }}" + ansible.builtin.command: + cmd: > + diff + {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} + {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} + register: yaml_diff + failed_when: false + changed_when: false + +- name: "TEST Compare synthetic data vs Loki data results {{ item }}" + ansible.builtin.assert: + that: + - yaml_diff.rc == 0 + fail_msg: | + FAILED! {{ item }} - Files differ: + {{ yaml_diff.stdout }} + success_msg: "PASSED! {{ item }} - Data totals are identical." diff --git a/roles/telemetry_chargeback/tasks/setup_loki_env.yml b/roles/telemetry_chargeback/tasks/setup_loki_env.yml new file mode 100644 index 000000000..e4a80250f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/setup_loki_env.yml @@ -0,0 +1,56 @@ +--- +# Setup Loki Environment + +# Dynamic URL's +- name: "Get Loki Public Route Host" + ansible.builtin.command: + cmd: | + oc get route cloudkitty-lokistack -n {{ cloudkitty_namespace }} -o "jsonpath={.spec.host}" + register: loki_route + changed_when: false + +- name: "Set Loki URLs" + ansible.builtin.set_fact: + # Base URL + loki_base_url: "https://{{ loki_route.stdout }}" + + # Internal Flush URL (Service DNS: https://..svc:3100/flush) + ingester_flush_url: "https://cloudkitty-lokistack-ingester-http.{{ cloudkitty_namespace }}.svc:3100/flush" + +- name: "Set Derived Loki URLs" + ansible.builtin.set_fact: + loki_push_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/push" + loki_query_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/query_range" + +- name: "Debug URLs" + ansible.builtin.debug: + msg: + - "Loki Route: {{ loki_base_url }}" + - "Push URL: {{ loki_push_url }}" + - "Flush URL: {{ ingester_flush_url }}" + - "Query URL: {{ loki_query_url }}" + +# Certs to Ingest & Retrieve data to/from Loki +- name: "Ensure Local Certificate Directory Exists" + ansible.builtin.file: + path: "{{ cert_dir }}" + state: directory + mode: '0755' + +- name: "Extract Certificates from OpenShift Secret" + ansible.builtin.command: + cmd: | + oc extract secret/{{ cert_secret_name }} --to={{ cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +- name: "Extract Client Certificates" + ansible.builtin.command: + cmd: | + oc extract {{ client_secret }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true + +- name: "Extract CA Bundle" + ansible.builtin.command: + cmd: | + oc extract {{ ca_configmap }} --to={{ local_cert_dir }} --confirm -n {{ cloudkitty_namespace }} + changed_when: true From 28a064fd3cf2912f825f43fe0bd010d9e29d4d9e Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 21 Apr 2026 19:12:40 -0400 Subject: [PATCH 02/43] From 96610851fea4687b635f21da4c89d2277c3dd718 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Wed, 22 Apr 2026 09:12:33 -0400 Subject: [PATCH 03/43] remove loki_rate file --- .../files/gen_synth_loki_data.py | 157 +++++++++++++++--- .../tasks/flush_loki_data.yml | 52 ------ .../tasks/gen_synth_loki_data.yml | 55 +++--- .../tasks/ingest_loki_data.yml | 42 ----- .../tasks/load_loki_data.yml | 12 -- .../telemetry_chargeback/tasks/loki_rate.yml | 28 ---- .../tasks/retrieve_loki_data.yml | 71 -------- .../tasks/run_test_scenarios.yml | 50 ------ 8 files changed, 161 insertions(+), 306 deletions(-) delete mode 100644 roles/telemetry_chargeback/tasks/flush_loki_data.yml delete mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml delete mode 100644 roles/telemetry_chargeback/tasks/load_loki_data.yml delete mode 100644 roles/telemetry_chargeback/tasks/loki_rate.yml delete mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py index f05796e29..263554dc6 100755 --- a/roles/telemetry_chargeback/files/gen_synth_loki_data.py +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -2,13 +2,48 @@ import logging import argparse import json +import sys import yaml from datetime import datetime, timezone, timedelta from pathlib import Path -from typing import Dict, Any +from typing import Dict, Any, List, Union from jinja2 import Environment +def _get_value_for_step( + values: List[Union[int, float]], + step_idx: int, + num_steps: int +) -> Union[int, float]: + """ + Get the appropriate value from a list based on the current step index. + + Values are distributed evenly across all steps. For example, if there are + 12 steps and 4 values, each value covers 3 steps: + - Steps 0-2: values[0] + - Steps 3-5: values[1] + - Steps 6-8: values[2] + - Steps 9-11: values[3] + + Args: + values: List of values to choose from. + step_idx: Current step index (0-based). + num_steps: Total number of steps. + + Returns: + The value corresponding to the current step. + """ + num_values = len(values) + if num_values == 1: + return values[0] + + # Calculate how many steps each value covers + steps_per_value = num_steps / num_values + # Determine which value index to use, clamping to valid range + value_idx = min(int(step_idx // steps_per_value), num_values - 1) + return values[value_idx] + + # --- Configure logging with a default level that can be changed --- logging.basicConfig( level=logging.INFO, @@ -73,7 +108,10 @@ def generate_loki_data( start_time: datetime, end_time: datetime, time_step_seconds: int, - config: Dict[str, Any] + config: Dict[str, Any], + project: Union[str, int, None] = None, + user: Union[str, int, None] = None, + reverse_timestamps: bool = False, ): """ Generate synthetic Loki log data by preparing a data list and rendering. @@ -85,6 +123,12 @@ def generate_loki_data( end_time (datetime): The end time for data generation. time_step_seconds (int): The duration of each log entry in seconds. config (Dict[str, Any]): Configuration dictionary loaded from file. + project: Optional value to inject as groupby.project in every + log entry in the output (overrides test_* file value when set). + user: Optional value to inject as groupby.user in every + log entry in the output (overrides test_* file value when set). + reverse_timestamps (bool): If True, reverse the order of timestamps + in the JSON output (newest first, oldest last). """ # Hardcoded constant for invalid timestamps invalid_timestamp = "INVALID_TIMESTAMP" @@ -175,37 +219,49 @@ def generate_loki_data( logger.error(f"Invalid log type configuration: {log_type_config}") raise ValueError("Each log type in log_types must be a dictionary") - log_type_name = log_type_config.get("name") - if not log_type_name: - logger.error("Each log type must have a 'name' field") - raise ValueError("Each log type must have a 'name' field") + # "type" is log-type identifier (dict key) and output value + type_key = log_type_config.get("type") + if not type_key: + logger.error("Each log type must have a 'type' field") + raise ValueError("Each log type must have a 'type' field") # Validate required fields - missing = [f for f in required_fields if f not in log_type_config] + # metadata is optional for generation; name is not a log-type field + required_for_item = [ + f for f in required_fields + if f not in ("name", "metadata") + ] + missing = [f for f in required_for_item if f not in log_type_config] if missing: logger.error( - f"Missing required fields in {log_type_name} config: {missing}" + f"Missing required fields in {type_key!r} config: {missing}" ) raise ValueError( - f"Missing required fields in {log_type_name}: {missing}" + f"Missing required fields in {type_key!r}: {missing}" ) # Build groupby from config groupby = log_type_config.get("groupby", {}) if not isinstance(groupby, dict): logger.error( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) raise ValueError( - f"groupby must be a dictionary for {log_type_name}" + f"groupby must be a dictionary for {type_key!r}" ) - log_types[log_type_name] = { - "type": log_type_config["type"], + # Ensure qty and price are lists for step-based distribution + qty_val = log_type_config["qty"] + price_val = log_type_config["price"] + qty_list = qty_val if isinstance(qty_val, list) else [qty_val] + price_list = price_val if isinstance(price_val, list) else [price_val] + + log_types[type_key] = { + "type": type_key, "unit": log_type_config["unit"], "description": log_type_config.get("description"), - "qty": log_type_config["qty"], - "price": log_type_config["price"], + "qty": qty_list, + "price": price_list, "groupby": groupby.copy(), "metadata": log_type_config.get("metadata", {}) } @@ -231,15 +287,21 @@ def tojson_preserve_order(obj): # --- Render the template in one pass with all the data --- logger.info("Rendering final output...") + if reverse_timestamps: + log_data_list.reverse() + logger.debug( + "Reversed timestamp order (newest first, oldest last)." + ) + + # Calculate total number of steps for value distribution + num_steps = len(log_data_list) + logger.debug(f"Total number of time steps: {num_steps}") + # Pre-calculate log types with date fields for each time step log_types_list = [] for idx, item in enumerate(log_data_list): - # For the last entry, use end_time to ensure it shows today's date - if idx == len(log_data_list) - 1: - dt = end_time - else: - epoch_seconds = item["nanoseconds"] / 1_000_000_000 - dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + epoch_seconds = item["nanoseconds"] / 1_000_000_000 + dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) iso_year, iso_week, _ = dt.isocalendar() day_of_year = dt.timetuple().tm_yday @@ -267,6 +329,17 @@ def tojson_preserve_order(obj): log_type_with_dates = log_type_data.copy() log_type_with_dates["groupby"] = log_type_data["groupby"].copy() log_type_with_dates["groupby"].update(date_fields) + if project is not None: + log_type_with_dates["groupby"]["project"] = project + if user is not None: + log_type_with_dates["groupby"]["user"] = user + # Select qty and price based on step index distribution + log_type_with_dates["qty"] = _get_value_for_step( + log_type_data["qty"], idx, num_steps + ) + log_type_with_dates["price"] = _get_value_for_step( + log_type_data["price"], idx, num_steps + ) log_types_with_dates[log_type_name] = log_type_with_dates log_types_list.append(log_types_with_dates) @@ -296,8 +369,19 @@ def tojson_preserve_order(obj): ) except IOError as e: logger.error(f"Failed to write to output file '{output_path}': {e}") - except Exception as e: - logger.error(f"An unexpected error occurred during file write: {e}") + raise + + # --- Step 5: Validate that the output is valid JSON --- + try: + with output_path.open('r') as f_in: + json.load(f_in) + logger.info("Output file validated as valid JSON.") + except json.JSONDecodeError as e: + logger.error( + f"Output file is not valid JSON: {e}. " + f"Delete '{output_path}' and fix the template or data." + ) + sys.exit(1) def main(): @@ -324,8 +408,30 @@ def main(): required=True, help="Path to the output file." ) + parser.add_argument( + "-p", "--project-id", + type=str, + default=None, + metavar="ID", + help="Optional alphanumeric value to use as groupby.project in " + "every log entry in the output (overrides value from test file)." + ) + parser.add_argument( + "-u", "--user-id", + type=str, + default=None, + metavar="ID", + help="Optional alphanumeric value to use as groupby.user in " + "every log entry in the output (overrides value from test file)." + ) # --- Optional Utility Arguments --- + parser.add_argument( + "-r", "--reverse", + action="store_true", + help="Reverse timestamp order in JSON output: newest first, " + "oldest last (default is oldest first, newest last)." + ) parser.add_argument( "--debug", action="store_true", @@ -362,7 +468,10 @@ def main(): start_time=start_time_utc, end_time=end_time_utc, time_step_seconds=step_seconds, - config=config + config=config, + project=args.project_id, + user=args.user_id, + reverse_timestamps=args.reverse, ) except FileNotFoundError: logger.error( diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml deleted file mode 100644 index 6ec05419d..000000000 --- a/roles/telemetry_chargeback/tasks/flush_loki_data.yml +++ /dev/null @@ -1,52 +0,0 @@ ---- -# Flush Loki Ingester Memory to Storage - -- name: "Flush execution inside OpenStack CLI" - block: - # create dir - - name: "Create directory inside OpenStack CLI" - ansible.builtin.command: - cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" - changed_when: false - - # certs to Flush data to Loki - - name: "Create directory to extract certificates" - ansible.builtin.file: - path: "{{ local_cert_dir }}" - state: directory - mode: '0755' - - # copy all certs - - name: "Copy certificates to OpenStack CLI" - ansible.builtin.command: - cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" - changed_when: true - - # flush loki - - name: "Trigger Loki ingester flush" - ansible.builtin.command: - cmd: > - oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- - curl -v -X POST {{ ingester_flush_url }} - --cert {{ remote_cert_dir }}/tls.crt - --key {{ remote_cert_dir }}/tls.key - --cacert {{ remote_cert_dir }}/service-ca.crt - register: flush_response - changed_when: true - failed_when: flush_response.rc != 0 - - # Status - - name: "Verify flush status" - ansible.builtin.assert: - that: - - "'204' in flush_response.stderr or '200' in flush_response.stderr" - fail_msg: "Flush failed" - success_msg: "Ingester Memory Flushed successfully" - - rescue: - - name: "Debug failure output" - ansible.builtin.debug: - msg: - - "Failure" - - "Stdout: {{ flush_response.stdout | default('') }}" - - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml index 0b8d5880d..ec80ca3cc 100644 --- a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -1,39 +1,40 @@ --- -- name: Check for preexisting output file +- name: "Set variables dynamically for {{ item }}" + ansible.builtin.set_fact: + cloudkitty_data_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" + cloudkitty_synth_totals_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" + cloudkitty_test_file: "{{ cloudkitty_scenario_dir }}/{{ item }}.yml" + +- name: "Check for preexisting output file" ansible.builtin.stat: - path: "{{ ck_output_file_local }}" + path: "{{ cloudkitty_data_file }}" register: file_preexists -- name: TEST Generate Synthetic Data +- name: "Generate Synthetic Data for {{ item }}" ansible.builtin.command: cmd: > python3 "{{ cloudkitty_synth_script }}" + -r --tmpl "{{ cloudkitty_data_template }}" - -t "{{ ck_data_config }}" - -o "{{ ck_output_file_local }}" + -t "{{ cloudkitty_test_file }}" + -o "{{ cloudkitty_data_file }}" + {% if cloudkitty_project_id is defined and cloudkitty_project_id %} -p "{{ cloudkitty_project_id }}"{% endif %} register: script_output - when: not file_preexists.stat.exists | bool + when: not file_preexists.stat.exists | bool changed_when: script_output.rc == 0 -- name: Read the content of the file - ansible.builtin.slurp: - src: "{{ ck_output_file_local }}" - register: slurped_file - -- name: TEST Validate JSON format of synthetic data file - ansible.builtin.assert: - that: - # This filter will trigger a task failure if the string isn't valid JSON - - slurped_file.content | b64decode | from_json is defined - fail_msg: "The file does not contain valid JSON format." - success_msg: "JSON format validated successfully." - -- name: Print output_file_remote path - ansible.builtin.debug: - msg: "Synthetic data file: {{ ck_output_file_remote }}" +- name: "Generate chargeback rating from synthetic data file {{ item }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ cloudkitty_data_file }}" + -o "{{ cloudkitty_synth_totals_file }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + when: not file_preexists.stat.exists | bool + changed_when: synth_rating_info.rc == 0 -- name: Copy output file to remote host - ansible.builtin.copy: - src: "{{ ck_output_file_local }}" - dest: "{{ ck_output_file_remote }}" - mode: '0644' +- name: "Load metrics from YAML file" + ansible.builtin.include_vars: + file: "{{ cloudkitty_synth_totals_file }}" + name: synth_data_rates diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml deleted file mode 100644 index a53751f3f..000000000 --- a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml +++ /dev/null @@ -1,42 +0,0 @@ ---- -# Ingest data log to Loki that is generated from gen_synth_loki_data.yml - -- name: "Ingest data log to Loki via API" - block: - - - name: "Read log file content" - ansible.builtin.slurp: - src: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" - register: log_file_content - - - name: "Push data to Loki" - ansible.builtin.uri: - url: "{{ loki_push_url }}" - method: POST - body: "{{ log_file_content['content'] | b64decode | from_json }}" - body_format: json - client_cert: "{{ cert_dir }}/tls.crt" - client_key: "{{ cert_dir }}/tls.key" - validate_certs: false - status_code: 204 - return_content: true - register: loki_response - ignore_errors: false - failed_when: loki_response.status != 204 - - # Success - - name: "Confirm ingestion success" - ansible.builtin.debug: - msg: "Ingestion Successful!" - - rescue: - # Rescue block - - name: "Debug failure" - ansible.builtin.debug: - msg: "{{ loki_response.status | default('N/A') }}" - - # Failure - - name: "Report ingestion failure" - ansible.builtin.fail: - msg: "Ingestion Failed" - ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml deleted file mode 100644 index a2a1e129f..000000000 --- a/roles/telemetry_chargeback/tasks/load_loki_data.yml +++ /dev/null @@ -1,12 +0,0 @@ ---- -- name: "Ingest CloudKitty data log for {{ item }}" - ansible.builtin.include_tasks: - file: ingest_loki_data.yml - -- name: "Flush data to Loki storage for {{ item }}" - ansible.builtin.include_tasks: - file: flush_loki_data.yml - -- name: "Retrieve data log from Loki for {{ item }}" - ansible.builtin.include_tasks: - file: retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml deleted file mode 100644 index 822585336..000000000 --- a/roles/telemetry_chargeback/tasks/loki_rate.yml +++ /dev/null @@ -1,28 +0,0 @@ ---- -- name: "TEST Get Rate and Qty by type from CloudKitty {{ item }}" - ansible.builtin.command: - cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml -g type" - register: cost_totals_by_type - changed_when: false - failed_when: cost_totals_by_type.rc != 0 - -- name: "**INFO** Print the rating by type {{ item }}" - ansible.builtin.debug: - var: cost_totals_by_type.stdout - -- name: "Output saved as yaml {{ item }}" - ansible.builtin.copy: - content: "{{ cost_totals_by_type.stdout }}" - dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_suffix }}" - mode: '0644' - -- name: "TEST Get Rate and Qty Summary from CloudKitty {{ item }}" - ansible.builtin.command: - cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml" - register: cost_totals_summary - changed_when: false - failed_when: cost_totals_summary.rc != 0 - -- name: "**INFO** Print the rating summary {{ item }}" - ansible.builtin.debug: - var: cost_totals_summary.stdout diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml deleted file mode 100644 index adaa2b34a..000000000 --- a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml +++ /dev/null @@ -1,71 +0,0 @@ ---- -- name: "Expected Count {{ item }}" - ansible.builtin.debug: - msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" - -# Query Loki -- name: "Retrieve Logs from Loki via API {{ item }}" - block: - - name: "Query Loki API" - ansible.builtin.uri: - url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" - method: GET - client_cert: "{{ cert_dir }}/tls.crt" - client_key: "{{ cert_dir }}/tls.key" - ca_path: "{{ cert_dir }}/ca.crt" - validate_certs: false - return_content: true - body_format: json - register: loki_response - # Wait condition - until: - - loki_response.status == 200 - - loki_response.json.status == 'success' - - loki_response.json.data.result | length > 0 - - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) - retries: 25 - delay: 60 - - - name: "Save Loki Data to JSON file" - ansible.builtin.copy: - content: "{{ loki_response.json | to_json }}" - dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" - mode: '0644' - - # Validate - - name: "Verify Data Integrity {{ item }}" - vars: - actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" - ansible.builtin.assert: - that: - - loki_response.json.status == 'success' - - loki_response.json.data.result | length > 0 - - actual_count | int == (synth_data_rates.data_log.log_count | int) - fail_msg: >- - Query did not return all data entries. Expected - {{ synth_data_rates.data_log.log_count }} log entries, but Loki - only returned {{ actual_count }} - success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" - - rescue: - - name: "Debug failure" - ansible.builtin.debug: - msg: - - "Status: {{ loki_response.status | default('Unknown') }}" - - "Body: {{ loki_response.content | default('No Content') }}" - - "Msg: {{ loki_response.msg | default('Request failed') }}" - - # Failure - - name: "Report Retrieval Failure" - ansible.builtin.fail: - msg: "Retrieval Failed" - -- name: "Generate chargeback stats from Loki-retrieved data file: {{ item }}" - ansible.builtin.command: - cmd: > - python3 "{{ cloudkitty_summary_script }}" - -j "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" - -o "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" - --debug "{{ cloudkitty_debug_dir }}" - register: loki_retrieved_summary_info - changed_when: loki_retrieved_summary_info.rc == 0 diff --git a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml index 5addb4a22..4d6039d29 100644 --- a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml +++ b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml @@ -1,53 +1,3 @@ --- - name: "Generate Synthetic Data for each file: {{ item }}" ansible.builtin.include_tasks: "gen_synth_loki_data.yml" - -- name: "Load data to Loki: {{ item }}" - ansible.builtin.include_tasks: "load_loki_data.yml" - -- name: "Get total rate from Loki: {{ item }}" - ansible.builtin.include_tasks: "loki_rate.yml" - -#### diff uploaded data totals vs download data totals -- name: "Check synthetic totals file exists" - ansible.builtin.stat: - path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" - register: synth_totals_stat - -- name: "Check Loki totals file exists" - ansible.builtin.stat: - path: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" - register: loki_totals_stat - -- name: "TEST Totals files exist {{ item }}" - ansible.builtin.assert: - that: - - synth_totals_stat.stat.exists | default(false) - - loki_totals_stat.stat.exists | default(false) - fail_msg: | - FAILED! Required file(s) missing for scenario {{ item }}: - - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} - - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} - success_msg: | - PASSED! Required file(s) exist {{ item }}: - - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} - - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} - -- name: "Diff synthetic totals vs Loki totals {{ item }}" - ansible.builtin.command: - cmd: > - diff - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }} - {{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }} - register: yaml_diff - failed_when: false - changed_when: false - -- name: "TEST Compare synthetic data vs Loki data results {{ item }}" - ansible.builtin.assert: - that: - - yaml_diff.rc == 0 - fail_msg: | - FAILED! {{ item }} - Files differ: - {{ yaml_diff.stdout }} - success_msg: "PASSED! {{ item }} - Data totals are identical." From 2fb8d72c3242023949976bf40d1a1ded352a5ac3 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Wed, 22 Apr 2026 14:41:50 -0400 Subject: [PATCH 04/43] added --ascending/--descending flags for generating synthetic data. --- .../files/gen_synth_loki_data.py | 23 +++++++++++++------ .../tasks/gen_synth_loki_data.yml | 1 - 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py index 263554dc6..4827cf519 100755 --- a/roles/telemetry_chargeback/files/gen_synth_loki_data.py +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -111,7 +111,7 @@ def generate_loki_data( config: Dict[str, Any], project: Union[str, int, None] = None, user: Union[str, int, None] = None, - reverse_timestamps: bool = False, + reverse_timestamps: bool = True, ): """ Generate synthetic Loki log data by preparing a data list and rendering. @@ -127,8 +127,9 @@ def generate_loki_data( log entry in the output (overrides test_* file value when set). user: Optional value to inject as groupby.user in every log entry in the output (overrides test_* file value when set). - reverse_timestamps (bool): If True, reverse the order of timestamps - in the JSON output (newest first, oldest last). + reverse_timestamps (bool): If True, sort timestamps in descending order + (newest first, oldest last). If False, sort in ascending order + (oldest first, newest last). Default is True (descending). """ # Hardcoded constant for invalid timestamps invalid_timestamp = "INVALID_TIMESTAMP" @@ -290,7 +291,7 @@ def tojson_preserve_order(obj): if reverse_timestamps: log_data_list.reverse() logger.debug( - "Reversed timestamp order (newest first, oldest last)." + "Sorted timestamps in descending order (newest first, oldest last)." ) # Calculate total number of steps for value distribution @@ -427,10 +428,18 @@ def main(): # --- Optional Utility Arguments --- parser.add_argument( - "-r", "--reverse", + "--ascending", + action="store_false", + dest="reverse", + help="Sort timestamps in ascending order: oldest first, newest last." + ) + parser.add_argument( + "--descending", action="store_true", - help="Reverse timestamp order in JSON output: newest first, " - "oldest last (default is oldest first, newest last)." + dest="reverse", + default=True, + help="Sort timestamps in descending order: newest first, oldest last " + "(default behavior)." ) parser.add_argument( "--debug", diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml index ec80ca3cc..e62d06b70 100644 --- a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -14,7 +14,6 @@ ansible.builtin.command: cmd: > python3 "{{ cloudkitty_synth_script }}" - -r --tmpl "{{ cloudkitty_data_template }}" -t "{{ cloudkitty_test_file }}" -o "{{ cloudkitty_data_file }}" From 2e84da465dacae6e6af24fa077c42ccd25ada791 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Wed, 22 Apr 2026 17:22:26 -0400 Subject: [PATCH 05/43] Updating README file. --- roles/telemetry_chargeback/README.md | 54 ++++++++++++++----- .../tasks/gen_synth_loki_data.yml | 12 ++--- roles/telemetry_chargeback/tasks/main.yml | 2 + .../tasks/run_test_scenarios.yml | 2 +- 4 files changed, 51 insertions(+), 19 deletions(-) diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index 352b58d2f..c5d582a4f 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -33,24 +33,39 @@ The role uses the following variables to control the testing environment and exe | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | +| `cloudkitty_debug` | `false` | Enable debug mode for CloudKitty database dumps. | +| `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | +| `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts and test output. | +| `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Directory for CloudKitty client certificates. | +| `local_cert_dir` | `{{ ansible_env.HOME }}/ci-framework-data/flush_certs` | Local directory for certificate extraction. | +| `cloudkitty_namespace` | `openstack` | Kubernetes namespace where CloudKitty is deployed. | + +How It Works +------------ -### Internal Variables (vars/main.yml) +The role executes the following workflow: -These variables are used internally by the role and typically do not need to be modified. +1. **CloudKitty Validation** - Enables the hashmap rating module and sets its priority to 100. +2. **Loki Environment Setup** - Extracts Loki route information and certificates from the OpenShift cluster. +3. **Admin Credentials** - Retrieves admin project ID and user ID for test data generation. +4. **Scenario Discovery** - Finds all `test_*.yml` scenario files in the scenario directory. +5. **Scenario Loop** - For each scenario file found (exposed as `{{ scenario_name }}`): + - Generates synthetic Loki log data based on the scenario configuration + - Calculates expected chargeback metrics from the generated data + - Loads the metrics for validation +6. **Cleanup** - Removes temporary certificate directories. -| Variable | Default Value | Description | -|----------|---------------|-------------| -| `logs_dir_zuul` | `/home/zuul/ci-framework-data/logs` | Remote directory for log files. | -| `artifacts_dir_zuul` | `/home/zuul/ci-framework-data/artifacts` | Directory for generated artifacts. | -| `cloudkitty_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | -| `cloudkitty_data_template` | `{{ role_path }}/templates/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | -| `ck_data_config` | `{{ role_path }}/files/test_static.yml` | Path to the scenario configuration file. | -| `ck_output_file_local` | `{{ artifacts_dir_zuul }}/loki_synth_data.json` | Local path for generated synthetic data. | -| `ck_output_file_remote` | `{{ logs_dir_zuul }}/gen_loki_synth_data.log` | Remote destination for synthetic data. | +The role uses `{{ scenario_name }}` as the loop variable when processing multiple test scenarios, making it easy to track which scenario is currently being executed. Scenario Configuration ---------------------- -The synthetic data generation is controlled by a YAML configuration file (`files/test_static.yml`). This file defines: +The synthetic data generation is controlled by YAML configuration files in the `files/` directory. Any file matching the pattern `test_*.yml` will be automatically discovered and executed. + +**Available scenarios:** +- `test_static.yml` - Static test scenario with predefined values +- `test_dyn_basic.yml` - Dynamic test scenario with variable values over time + +Each scenario file defines: * **generation** - Time range configuration (days, step_seconds) * **log_types** - List of log type definitions with name, type, unit, qty, price, groupby, and metadata @@ -58,6 +73,21 @@ The synthetic data generation is controlled by a YAML configuration file (`files * **date_fields** - Date fields to add to groupby (week_of_the_year, day_of_the_year, month, year) * **loki_stream** - Loki stream configuration (service name) +### Data Generation Script Options + +The `gen_synth_loki_data.py` script supports the following options: + +* `--tmpl` - Path to the Jinja2 template file (required) +* `-t, --test` - Path to the scenario YAML file (required) +* `-o, --output` - Path for the output JSON file (required) +* `-p, --project-id` - Optional project ID to override the scenario file value +* `-u, --user-id` - Optional user ID to override the scenario file value +* `--ascending` - Sort timestamps in ascending order (oldest first, newest last) +* `--descending` - Sort timestamps in descending order (newest first, oldest last) - **default** +* `--debug` - Enable debug logging + +By default, the script generates data in descending order (newest timestamps first), which is the expected format for Loki ingestion. + Dependencies ------------ This role has no direct hard dependencies on other Ansible roles. diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml index e62d06b70..972fb34ef 100644 --- a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -1,16 +1,16 @@ --- -- name: "Set variables dynamically for {{ item }}" +- name: "Set variables dynamically for {{ scenario_name }}" ansible.builtin.set_fact: - cloudkitty_data_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" - cloudkitty_synth_totals_file: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_totals_metrics_suffix }}" - cloudkitty_test_file: "{{ cloudkitty_scenario_dir }}/{{ item }}.yml" + cloudkitty_data_file: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_data_suffix }}" + cloudkitty_synth_totals_file: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_totals_metrics_suffix }}" + cloudkitty_test_file: "{{ cloudkitty_scenario_dir }}/{{ scenario_name }}.yml" - name: "Check for preexisting output file" ansible.builtin.stat: path: "{{ cloudkitty_data_file }}" register: file_preexists -- name: "Generate Synthetic Data for {{ item }}" +- name: "Generate Synthetic Data for {{ scenario_name }}" ansible.builtin.command: cmd: > python3 "{{ cloudkitty_synth_script }}" @@ -22,7 +22,7 @@ when: not file_preexists.stat.exists | bool changed_when: script_output.rc == 0 -- name: "Generate chargeback rating from synthetic data file {{ item }}" +- name: "Generate chargeback rating from synthetic data file {{ scenario_name }}" ansible.builtin.command: cmd: > python3 "{{ cloudkitty_summary_script }}" diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index e2f264834..4e4198747 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -46,6 +46,8 @@ - name: "Process and Loop if files exist" ansible.builtin.include_tasks: run_test_scenarios.yml loop: "{{ found_files }}" + loop_control: + loop_var: scenario_name when: found_files | length > 0 - name: "Cleanup after job run" diff --git a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml index 4d6039d29..ee28375bc 100644 --- a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml +++ b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml @@ -1,3 +1,3 @@ --- -- name: "Generate Synthetic Data for each file: {{ item }}" +- name: "Generate Synthetic Data for each file: {{ scenario_name }}" ansible.builtin.include_tasks: "gen_synth_loki_data.yml" From 1ac73cb9968289105e9735b18fe0bd6b17f5fa73 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Thu, 23 Apr 2026 10:34:28 -0400 Subject: [PATCH 06/43] Fixes based on review feedback from Jaromir --- .../files/gen_synth_loki_data.py | 31 ++----------------- .../tasks/gen_synth_loki_data.yml | 1 - roles/telemetry_chargeback/tasks/main.yml | 22 ------------- roles/telemetry_chargeback/vars/main.yml | 3 -- 4 files changed, 2 insertions(+), 55 deletions(-) diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py index 4827cf519..bc5420905 100755 --- a/roles/telemetry_chargeback/files/gen_synth_loki_data.py +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -109,8 +109,6 @@ def generate_loki_data( end_time: datetime, time_step_seconds: int, config: Dict[str, Any], - project: Union[str, int, None] = None, - user: Union[str, int, None] = None, reverse_timestamps: bool = True, ): """ @@ -123,10 +121,6 @@ def generate_loki_data( end_time (datetime): The end time for data generation. time_step_seconds (int): The duration of each log entry in seconds. config (Dict[str, Any]): Configuration dictionary loaded from file. - project: Optional value to inject as groupby.project in every - log entry in the output (overrides test_* file value when set). - user: Optional value to inject as groupby.user in every - log entry in the output (overrides test_* file value when set). reverse_timestamps (bool): If True, sort timestamps in descending order (newest first, oldest last). If False, sort in ascending order (oldest first, newest last). Default is True (descending). @@ -291,7 +285,8 @@ def tojson_preserve_order(obj): if reverse_timestamps: log_data_list.reverse() logger.debug( - "Sorted timestamps in descending order (newest first, oldest last)." + "Sorted timestamps in descending order " + "(newest first, oldest last)." ) # Calculate total number of steps for value distribution @@ -330,10 +325,6 @@ def tojson_preserve_order(obj): log_type_with_dates = log_type_data.copy() log_type_with_dates["groupby"] = log_type_data["groupby"].copy() log_type_with_dates["groupby"].update(date_fields) - if project is not None: - log_type_with_dates["groupby"]["project"] = project - if user is not None: - log_type_with_dates["groupby"]["user"] = user # Select qty and price based on step index distribution log_type_with_dates["qty"] = _get_value_for_step( log_type_data["qty"], idx, num_steps @@ -409,22 +400,6 @@ def main(): required=True, help="Path to the output file." ) - parser.add_argument( - "-p", "--project-id", - type=str, - default=None, - metavar="ID", - help="Optional alphanumeric value to use as groupby.project in " - "every log entry in the output (overrides value from test file)." - ) - parser.add_argument( - "-u", "--user-id", - type=str, - default=None, - metavar="ID", - help="Optional alphanumeric value to use as groupby.user in " - "every log entry in the output (overrides value from test file)." - ) # --- Optional Utility Arguments --- parser.add_argument( @@ -478,8 +453,6 @@ def main(): end_time=end_time_utc, time_step_seconds=step_seconds, config=config, - project=args.project_id, - user=args.user_id, reverse_timestamps=args.reverse, ) except FileNotFoundError: diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml index 972fb34ef..493356654 100644 --- a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -17,7 +17,6 @@ --tmpl "{{ cloudkitty_data_template }}" -t "{{ cloudkitty_test_file }}" -o "{{ cloudkitty_data_file }}" - {% if cloudkitty_project_id is defined and cloudkitty_project_id %} -p "{{ cloudkitty_project_id }}"{% endif %} register: script_output when: not file_preexists.stat.exists | bool changed_when: script_output.rc == 0 diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 4e4198747..4cdda7000 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -9,28 +9,6 @@ ansible.builtin.set_fact: cloudkitty_debug_dir: "{{ (cloudkitty_debug | bool) | ternary(artifacts_dir_zuul + '/debug_ck_db', '') }}" -- name: "Get admin project ID for CI" - ansible.builtin.command: - cmd: "{{ openstack_cmd }} project show admin -f value -c id" - register: get_admin_project_id - changed_when: false - failed_when: false - -- name: "Set admin project ID for CI" - ansible.builtin.set_fact: - cloudkitty_project_id: "{{ (get_admin_project_id.stdout | trim) | default('') }}" - -- name: "Get admin user ID for CI" - ansible.builtin.command: - cmd: "{{ openstack_cmd }} user show admin -f value -c id" - register: get_admin_user_id - changed_when: false - failed_when: false - -- name: "Set admin user ID for CI" - ansible.builtin.set_fact: - cloudkitty_user_id: "{{ (get_admin_user_id.stdout | trim) | default('') }}" - - name: "Find test files" ansible.builtin.find: paths: "{{ cloudkitty_scenario_dir }}" diff --git a/roles/telemetry_chargeback/vars/main.yml b/roles/telemetry_chargeback/vars/main.yml index 5d7a47804..2054c1b5b 100644 --- a/roles/telemetry_chargeback/vars/main.yml +++ b/roles/telemetry_chargeback/vars/main.yml @@ -1,7 +1,4 @@ --- -logs_dir_zuul: "/home/zuul/ci-framework-data/logs" -artifacts_dir_zuul: "/home/zuul/ci-framework-data/artifacts" - cloudkitty_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" cloudkitty_data_template: "{{ role_path }}/templates/loki_data_templ.j2" ck_data_config: "{{ role_path }}/files/test_static.yml" From b2f5d7de0904596bbe203a4a2eec75ac048bd3e4 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Fri, 24 Apr 2026 13:51:23 -0400 Subject: [PATCH 07/43] Moved debug var from tasks/main.yml to defaults/main.yml --- roles/telemetry_chargeback/defaults/main.yml | 3 ++- roles/telemetry_chargeback/tasks/main.yml | 11 ++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 9cc04c8c7..73741502d 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -2,8 +2,9 @@ # OpenStack CLI command openstack_cmd: "openstack" -# Debug mode +# Debug mode set cloudkitty_debug: false +cloudkitty_debug_dir: "{{ (cloudkitty_debug | bool) | ternary(artifacts_dir_zuul + '/debug_ck_db', '') }}" # Directory paths logs_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/logs" diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 4cdda7000..cba889981 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -5,10 +5,6 @@ - name: "Setup Loki Environment" ansible.builtin.include_tasks: "setup_loki_env.yml" -- name: "CloudKitty debug ON/OFF" - ansible.builtin.set_fact: - cloudkitty_debug_dir: "{{ (cloudkitty_debug | bool) | ternary(artifacts_dir_zuul + '/debug_ck_db', '') }}" - - name: "Find test files" ansible.builtin.find: paths: "{{ cloudkitty_scenario_dir }}" @@ -28,10 +24,11 @@ loop_var: scenario_name when: found_files | length > 0 - - name: "Cleanup after job run" - ansible.builtin.include_tasks: cleanup_ck.yml - rescue: - name: "Log failure" ansible.builtin.debug: msg: "Running test scenarios loop failed." + + always: + - name: "Cleanup after job run" + ansible.builtin.include_tasks: cleanup_ck.yml From d5d9bb084cd975094a88348c3bd9955d11c95a91 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 21 Apr 2026 19:12:40 -0400 Subject: [PATCH 08/43] From 466f5546e34b1c86d3e178445efcfee1e3567002 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Thu, 23 Apr 2026 14:20:43 -0400 Subject: [PATCH 09/43] [telemetry_chargeback] Add ability to ingest and retrieve loki data to role - Uploads data to loki --- .../tasks/flush_loki_data.yml | 52 ++++++++++++++ .../tasks/ingest_loki_data.yml | 42 +++++++++++ .../tasks/load_loki_data.yml | 9 +++ .../tasks/retrieve_loki_data.yml | 71 +++++++++++++++++++ .../tasks/run_test_scenarios.yml | 3 + 5 files changed, 177 insertions(+) create mode 100644 roles/telemetry_chargeback/tasks/flush_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/load_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml new file mode 100644 index 000000000..6ec05419d --- /dev/null +++ b/roles/telemetry_chargeback/tasks/flush_loki_data.yml @@ -0,0 +1,52 @@ +--- +# Flush Loki Ingester Memory to Storage + +- name: "Flush execution inside OpenStack CLI" + block: + # create dir + - name: "Create directory inside OpenStack CLI" + ansible.builtin.command: + cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" + changed_when: false + + # certs to Flush data to Loki + - name: "Create directory to extract certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: directory + mode: '0755' + + # copy all certs + - name: "Copy certificates to OpenStack CLI" + ansible.builtin.command: + cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" + changed_when: true + + # flush loki + - name: "Trigger Loki ingester flush" + ansible.builtin.command: + cmd: > + oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- + curl -v -X POST {{ ingester_flush_url }} + --cert {{ remote_cert_dir }}/tls.crt + --key {{ remote_cert_dir }}/tls.key + --cacert {{ remote_cert_dir }}/service-ca.crt + register: flush_response + changed_when: true + failed_when: flush_response.rc != 0 + + # Status + - name: "Verify flush status" + ansible.builtin.assert: + that: + - "'204' in flush_response.stderr or '200' in flush_response.stderr" + fail_msg: "Flush failed" + success_msg: "Ingester Memory Flushed successfully" + + rescue: + - name: "Debug failure output" + ansible.builtin.debug: + msg: + - "Failure" + - "Stdout: {{ flush_response.stdout | default('') }}" + - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml new file mode 100644 index 000000000..79e8896ee --- /dev/null +++ b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml @@ -0,0 +1,42 @@ +--- +# Ingest data log to Loki that is generated from gen_synth_loki_data.yml + +- name: "Ingest data log to Loki via API" + block: + + - name: "Read log file content" + ansible.builtin.slurp: + src: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_data_suffix }}" + register: log_file_content + + - name: "Push data to Loki" + ansible.builtin.uri: + url: "{{ loki_push_url }}" + method: POST + body: "{{ log_file_content['content'] | b64decode | from_json }}" + body_format: json + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + validate_certs: false + status_code: 204 + return_content: true + register: loki_response + ignore_errors: false + failed_when: loki_response.status != 204 + + # Success + - name: "Confirm ingestion success" + ansible.builtin.debug: + msg: "Ingestion Successful!" + + rescue: + # Rescue block + - name: "Debug failure" + ansible.builtin.debug: + msg: "{{ loki_response.status | default('N/A') }}" + + # Failure + - name: "Report ingestion failure" + ansible.builtin.fail: + msg: "Ingestion Failed" + ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml new file mode 100644 index 000000000..9376dd787 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -0,0 +1,9 @@ +--- +- name: "Ingest CloudKitty data log for {{ scenario_name }}" + ansible.builtin.include_tasks: ingest_loki_data.yml + +- name: "Flush data to Loki storage for {{ scenario_name }}" + ansible.builtin.include_tasks: flush_loki_data.yml + +- name: "Retrieve data log from Loki for {{ scenario_name }}" + ansible.builtin.include_tasks: retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml new file mode 100644 index 000000000..e21115291 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -0,0 +1,71 @@ +--- +- name: "Expected Count {{ scenario_name }}" + ansible.builtin.debug: + msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" + +# Query Loki +- name: "Retrieve Logs from Loki via API {{ scenario_name }}" + block: + - name: "Query Loki API" + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + return_content: true + body_format: json + register: loki_response + # Wait condition + until: + - loki_response.status == 200 + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) + retries: 25 + delay: 60 + + - name: "Save Loki Data to JSON file" + ansible.builtin.copy: + content: "{{ loki_response.json | to_json }}" + dest: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" + mode: '0644' + + # Validate + - name: "Verify Data Integrity {{ scenario_name }}" + vars: + actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - actual_count | int == (synth_data_rates.data_log.log_count | int) + fail_msg: >- + Query did not return all data entries. Expected + {{ synth_data_rates.data_log.log_count }} log entries, but Loki + only returned {{ actual_count }} + success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" + + rescue: + - name: "Debug failure" + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: "Report Retrieval Failure" + ansible.builtin.fail: + msg: "Retrieval Failed" + +- name: "Generate chargeback stats from Loki-retrieved data file: {{ scenario_name }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" + -o "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_totals_metrics_suffix }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + changed_when: synth_rating_info.rc == 0 diff --git a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml index ee28375bc..0ce65ee09 100644 --- a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml +++ b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml @@ -1,3 +1,6 @@ --- - name: "Generate Synthetic Data for each file: {{ scenario_name }}" ansible.builtin.include_tasks: "gen_synth_loki_data.yml" + +- name: "Load data to Loki: {{ scenario_name }}" + ansible.builtin.include_tasks: "load_loki_data.yml" From 58442340282a25d94d3bd25162b2196bde2e0524 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Fri, 1 May 2026 14:20:25 -0400 Subject: [PATCH 10/43] Add and ingest task file --- .../tasks/flush_loki_data.yml | 52 -------------- .../tasks/load_loki_data.yml | 6 -- .../tasks/retrieve_loki_data.yml | 71 ------------------- 3 files changed, 129 deletions(-) delete mode 100644 roles/telemetry_chargeback/tasks/flush_loki_data.yml delete mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml deleted file mode 100644 index 6ec05419d..000000000 --- a/roles/telemetry_chargeback/tasks/flush_loki_data.yml +++ /dev/null @@ -1,52 +0,0 @@ ---- -# Flush Loki Ingester Memory to Storage - -- name: "Flush execution inside OpenStack CLI" - block: - # create dir - - name: "Create directory inside OpenStack CLI" - ansible.builtin.command: - cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" - changed_when: false - - # certs to Flush data to Loki - - name: "Create directory to extract certificates" - ansible.builtin.file: - path: "{{ local_cert_dir }}" - state: directory - mode: '0755' - - # copy all certs - - name: "Copy certificates to OpenStack CLI" - ansible.builtin.command: - cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" - changed_when: true - - # flush loki - - name: "Trigger Loki ingester flush" - ansible.builtin.command: - cmd: > - oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- - curl -v -X POST {{ ingester_flush_url }} - --cert {{ remote_cert_dir }}/tls.crt - --key {{ remote_cert_dir }}/tls.key - --cacert {{ remote_cert_dir }}/service-ca.crt - register: flush_response - changed_when: true - failed_when: flush_response.rc != 0 - - # Status - - name: "Verify flush status" - ansible.builtin.assert: - that: - - "'204' in flush_response.stderr or '200' in flush_response.stderr" - fail_msg: "Flush failed" - success_msg: "Ingester Memory Flushed successfully" - - rescue: - - name: "Debug failure output" - ansible.builtin.debug: - msg: - - "Failure" - - "Stdout: {{ flush_response.stdout | default('') }}" - - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml index 9376dd787..c58821f27 100644 --- a/roles/telemetry_chargeback/tasks/load_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -1,9 +1,3 @@ --- - name: "Ingest CloudKitty data log for {{ scenario_name }}" ansible.builtin.include_tasks: ingest_loki_data.yml - -- name: "Flush data to Loki storage for {{ scenario_name }}" - ansible.builtin.include_tasks: flush_loki_data.yml - -- name: "Retrieve data log from Loki for {{ scenario_name }}" - ansible.builtin.include_tasks: retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml deleted file mode 100644 index e21115291..000000000 --- a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml +++ /dev/null @@ -1,71 +0,0 @@ ---- -- name: "Expected Count {{ scenario_name }}" - ansible.builtin.debug: - msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" - -# Query Loki -- name: "Retrieve Logs from Loki via API {{ scenario_name }}" - block: - - name: "Query Loki API" - ansible.builtin.uri: - url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" - method: GET - client_cert: "{{ cert_dir }}/tls.crt" - client_key: "{{ cert_dir }}/tls.key" - ca_path: "{{ cert_dir }}/ca.crt" - validate_certs: false - return_content: true - body_format: json - register: loki_response - # Wait condition - until: - - loki_response.status == 200 - - loki_response.json.status == 'success' - - loki_response.json.data.result | length > 0 - - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) - retries: 25 - delay: 60 - - - name: "Save Loki Data to JSON file" - ansible.builtin.copy: - content: "{{ loki_response.json | to_json }}" - dest: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" - mode: '0644' - - # Validate - - name: "Verify Data Integrity {{ scenario_name }}" - vars: - actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" - ansible.builtin.assert: - that: - - loki_response.json.status == 'success' - - loki_response.json.data.result | length > 0 - - actual_count | int == (synth_data_rates.data_log.log_count | int) - fail_msg: >- - Query did not return all data entries. Expected - {{ synth_data_rates.data_log.log_count }} log entries, but Loki - only returned {{ actual_count }} - success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" - - rescue: - - name: "Debug failure" - ansible.builtin.debug: - msg: - - "Status: {{ loki_response.status | default('Unknown') }}" - - "Body: {{ loki_response.content | default('No Content') }}" - - "Msg: {{ loki_response.msg | default('Request failed') }}" - - # Failure - - name: "Report Retrieval Failure" - ansible.builtin.fail: - msg: "Retrieval Failed" - -- name: "Generate chargeback stats from Loki-retrieved data file: {{ scenario_name }}" - ansible.builtin.command: - cmd: > - python3 "{{ cloudkitty_summary_script }}" - -j "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" - -o "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_totals_metrics_suffix }}" - --debug "{{ cloudkitty_debug_dir }}" - register: synth_rating_info - changed_when: synth_rating_info.rc == 0 From 051d106db69bbc24fbedb18aefef65b8a5394e7e Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 21 Apr 2026 19:12:40 -0400 Subject: [PATCH 11/43] From e7c3b87585f846e815a509947e67cae167f4cb66 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Thu, 23 Apr 2026 10:34:28 -0400 Subject: [PATCH 12/43] Add flushing loki DB task file --- .../tasks/flush_loki_data.yml | 52 +++++++++++++++++++ .../tasks/load_loki_data.yml | 4 ++ 2 files changed, 56 insertions(+) create mode 100644 roles/telemetry_chargeback/tasks/flush_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml new file mode 100644 index 000000000..6ec05419d --- /dev/null +++ b/roles/telemetry_chargeback/tasks/flush_loki_data.yml @@ -0,0 +1,52 @@ +--- +# Flush Loki Ingester Memory to Storage + +- name: "Flush execution inside OpenStack CLI" + block: + # create dir + - name: "Create directory inside OpenStack CLI" + ansible.builtin.command: + cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" + changed_when: false + + # certs to Flush data to Loki + - name: "Create directory to extract certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: directory + mode: '0755' + + # copy all certs + - name: "Copy certificates to OpenStack CLI" + ansible.builtin.command: + cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" + changed_when: true + + # flush loki + - name: "Trigger Loki ingester flush" + ansible.builtin.command: + cmd: > + oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- + curl -v -X POST {{ ingester_flush_url }} + --cert {{ remote_cert_dir }}/tls.crt + --key {{ remote_cert_dir }}/tls.key + --cacert {{ remote_cert_dir }}/service-ca.crt + register: flush_response + changed_when: true + failed_when: flush_response.rc != 0 + + # Status + - name: "Verify flush status" + ansible.builtin.assert: + that: + - "'204' in flush_response.stderr or '200' in flush_response.stderr" + fail_msg: "Flush failed" + success_msg: "Ingester Memory Flushed successfully" + + rescue: + - name: "Debug failure output" + ansible.builtin.debug: + msg: + - "Failure" + - "Stdout: {{ flush_response.stdout | default('') }}" + - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml index c58821f27..38cab1204 100644 --- a/roles/telemetry_chargeback/tasks/load_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -1,3 +1,7 @@ --- - name: "Ingest CloudKitty data log for {{ scenario_name }}" ansible.builtin.include_tasks: ingest_loki_data.yml + +- name: "Flush data to Loki storage for {{ scenario_name }}" + ansible.builtin.include_tasks: + file: flush_loki_data.yml From 05a4923b9dfde8bbd9f22d6e1f659a597afcf22e Mon Sep 17 00:00:00 2001 From: ayefimov Date: Thu, 23 Apr 2026 14:20:43 -0400 Subject: [PATCH 13/43] [telemetry_chargeback] Add tasks file to flush data to the loki DB --- .../tasks/load_loki_data.yml | 3 +- roles/telemetry_chargeback/tasks/main.yml | 2 +- .../tasks/retrieve_loki_data.yml | 71 +++++++++++++++++++ 3 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml index 38cab1204..37c2aedb9 100644 --- a/roles/telemetry_chargeback/tasks/load_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -3,5 +3,4 @@ ansible.builtin.include_tasks: ingest_loki_data.yml - name: "Flush data to Loki storage for {{ scenario_name }}" - ansible.builtin.include_tasks: - file: flush_loki_data.yml + ansible.builtin.include_tasks: flush_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index cba889981..a26d0695f 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -31,4 +31,4 @@ always: - name: "Cleanup after job run" - ansible.builtin.include_tasks: cleanup_ck.yml + ansible.builtin.include_tasks: cleanup_ck.yml diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml new file mode 100644 index 000000000..e21115291 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -0,0 +1,71 @@ +--- +- name: "Expected Count {{ scenario_name }}" + ansible.builtin.debug: + msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" + +# Query Loki +- name: "Retrieve Logs from Loki via API {{ scenario_name }}" + block: + - name: "Query Loki API" + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + return_content: true + body_format: json + register: loki_response + # Wait condition + until: + - loki_response.status == 200 + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) + retries: 25 + delay: 60 + + - name: "Save Loki Data to JSON file" + ansible.builtin.copy: + content: "{{ loki_response.json | to_json }}" + dest: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" + mode: '0644' + + # Validate + - name: "Verify Data Integrity {{ scenario_name }}" + vars: + actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - actual_count | int == (synth_data_rates.data_log.log_count | int) + fail_msg: >- + Query did not return all data entries. Expected + {{ synth_data_rates.data_log.log_count }} log entries, but Loki + only returned {{ actual_count }} + success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" + + rescue: + - name: "Debug failure" + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: "Report Retrieval Failure" + ansible.builtin.fail: + msg: "Retrieval Failed" + +- name: "Generate chargeback stats from Loki-retrieved data file: {{ scenario_name }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" + -o "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_totals_metrics_suffix }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + changed_when: synth_rating_info.rc == 0 From 1f15eed19987639422b907049df543cdda8e27d2 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Fri, 1 May 2026 14:24:21 -0400 Subject: [PATCH 14/43] Adding loki flush tasks file. --- .../tasks/retrieve_loki_data.yml | 71 ------------------- 1 file changed, 71 deletions(-) delete mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml deleted file mode 100644 index e21115291..000000000 --- a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml +++ /dev/null @@ -1,71 +0,0 @@ ---- -- name: "Expected Count {{ scenario_name }}" - ansible.builtin.debug: - msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" - -# Query Loki -- name: "Retrieve Logs from Loki via API {{ scenario_name }}" - block: - - name: "Query Loki API" - ansible.builtin.uri: - url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" - method: GET - client_cert: "{{ cert_dir }}/tls.crt" - client_key: "{{ cert_dir }}/tls.key" - ca_path: "{{ cert_dir }}/ca.crt" - validate_certs: false - return_content: true - body_format: json - register: loki_response - # Wait condition - until: - - loki_response.status == 200 - - loki_response.json.status == 'success' - - loki_response.json.data.result | length > 0 - - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) - retries: 25 - delay: 60 - - - name: "Save Loki Data to JSON file" - ansible.builtin.copy: - content: "{{ loki_response.json | to_json }}" - dest: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" - mode: '0644' - - # Validate - - name: "Verify Data Integrity {{ scenario_name }}" - vars: - actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" - ansible.builtin.assert: - that: - - loki_response.json.status == 'success' - - loki_response.json.data.result | length > 0 - - actual_count | int == (synth_data_rates.data_log.log_count | int) - fail_msg: >- - Query did not return all data entries. Expected - {{ synth_data_rates.data_log.log_count }} log entries, but Loki - only returned {{ actual_count }} - success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" - - rescue: - - name: "Debug failure" - ansible.builtin.debug: - msg: - - "Status: {{ loki_response.status | default('Unknown') }}" - - "Body: {{ loki_response.content | default('No Content') }}" - - "Msg: {{ loki_response.msg | default('Request failed') }}" - - # Failure - - name: "Report Retrieval Failure" - ansible.builtin.fail: - msg: "Retrieval Failed" - -- name: "Generate chargeback stats from Loki-retrieved data file: {{ scenario_name }}" - ansible.builtin.command: - cmd: > - python3 "{{ cloudkitty_summary_script }}" - -j "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" - -o "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_totals_metrics_suffix }}" - --debug "{{ cloudkitty_debug_dir }}" - register: synth_rating_info - changed_when: synth_rating_info.rc == 0 From 687d1edb942fb9fe5e9d8abe6cafab85efe759b7 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 21 Apr 2026 10:52:37 -0400 Subject: [PATCH 15/43] Add ability to retrieve loki db with retrieve_loki_data.yml tasks file. --- .../tasks/ingest_loki_data.yml | 2 +- .../tasks/load_loki_data.yml | 3 + .../telemetry_chargeback/tasks/loki_rate.yml | 28 ++++++++ .../tasks/retrieve_loki_data.yml | 71 +++++++++++++++++++ 4 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 roles/telemetry_chargeback/tasks/loki_rate.yml create mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml index 79e8896ee..a53751f3f 100644 --- a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml @@ -6,7 +6,7 @@ - name: "Read log file content" ansible.builtin.slurp: - src: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_data_suffix }}" + src: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" register: log_file_content - name: "Push data to Loki" diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml index 37c2aedb9..773d7322c 100644 --- a/roles/telemetry_chargeback/tasks/load_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -4,3 +4,6 @@ - name: "Flush data to Loki storage for {{ scenario_name }}" ansible.builtin.include_tasks: flush_loki_data.yml + +- name: "Retrieve data log from Loki for {{ item }}" + ansible.builtin.include_tasks: retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml new file mode 100644 index 000000000..822585336 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/loki_rate.yml @@ -0,0 +1,28 @@ +--- +- name: "TEST Get Rate and Qty by type from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml -g type" + register: cost_totals_by_type + changed_when: false + failed_when: cost_totals_by_type.rc != 0 + +- name: "**INFO** Print the rating by type {{ item }}" + ansible.builtin.debug: + var: cost_totals_by_type.stdout + +- name: "Output saved as yaml {{ item }}" + ansible.builtin.copy: + content: "{{ cost_totals_by_type.stdout }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_suffix }}" + mode: '0644' + +- name: "TEST Get Rate and Qty Summary from CloudKitty {{ item }}" + ansible.builtin.command: + cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml" + register: cost_totals_summary + changed_when: false + failed_when: cost_totals_summary.rc != 0 + +- name: "**INFO** Print the rating summary {{ item }}" + ansible.builtin.debug: + var: cost_totals_summary.stdout diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml new file mode 100644 index 000000000..adaa2b34a --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -0,0 +1,71 @@ +--- +- name: "Expected Count {{ item }}" + ansible.builtin.debug: + msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" + +# Query Loki +- name: "Retrieve Logs from Loki via API {{ item }}" + block: + - name: "Query Loki API" + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + return_content: true + body_format: json + register: loki_response + # Wait condition + until: + - loki_response.status == 200 + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) + retries: 25 + delay: 60 + + - name: "Save Loki Data to JSON file" + ansible.builtin.copy: + content: "{{ loki_response.json | to_json }}" + dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + mode: '0644' + + # Validate + - name: "Verify Data Integrity {{ item }}" + vars: + actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - actual_count | int == (synth_data_rates.data_log.log_count | int) + fail_msg: >- + Query did not return all data entries. Expected + {{ synth_data_rates.data_log.log_count }} log entries, but Loki + only returned {{ actual_count }} + success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" + + rescue: + - name: "Debug failure" + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: "Report Retrieval Failure" + ansible.builtin.fail: + msg: "Retrieval Failed" + +- name: "Generate chargeback stats from Loki-retrieved data file: {{ item }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" + -o "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" + --debug "{{ cloudkitty_debug_dir }}" + register: loki_retrieved_summary_info + changed_when: loki_retrieved_summary_info.rc == 0 From 5c1b404ebf1e347c4d085c71d2d5c6ebe5ee51d3 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 21 Apr 2026 19:12:40 -0400 Subject: [PATCH 16/43] From bb98208897456b4aa1e331e842e1bbc17f1f30dd Mon Sep 17 00:00:00 2001 From: ayefimov Date: Wed, 22 Apr 2026 09:12:33 -0400 Subject: [PATCH 17/43] Add task file to retrieve loki data --- .../tasks/flush_loki_data.yml | 52 -------------- .../tasks/ingest_loki_data.yml | 42 ----------- .../telemetry_chargeback/tasks/loki_rate.yml | 28 -------- .../tasks/retrieve_loki_data.yml | 71 ------------------- 4 files changed, 193 deletions(-) delete mode 100644 roles/telemetry_chargeback/tasks/flush_loki_data.yml delete mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml delete mode 100644 roles/telemetry_chargeback/tasks/loki_rate.yml delete mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml deleted file mode 100644 index 6ec05419d..000000000 --- a/roles/telemetry_chargeback/tasks/flush_loki_data.yml +++ /dev/null @@ -1,52 +0,0 @@ ---- -# Flush Loki Ingester Memory to Storage - -- name: "Flush execution inside OpenStack CLI" - block: - # create dir - - name: "Create directory inside OpenStack CLI" - ansible.builtin.command: - cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" - changed_when: false - - # certs to Flush data to Loki - - name: "Create directory to extract certificates" - ansible.builtin.file: - path: "{{ local_cert_dir }}" - state: directory - mode: '0755' - - # copy all certs - - name: "Copy certificates to OpenStack CLI" - ansible.builtin.command: - cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" - changed_when: true - - # flush loki - - name: "Trigger Loki ingester flush" - ansible.builtin.command: - cmd: > - oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- - curl -v -X POST {{ ingester_flush_url }} - --cert {{ remote_cert_dir }}/tls.crt - --key {{ remote_cert_dir }}/tls.key - --cacert {{ remote_cert_dir }}/service-ca.crt - register: flush_response - changed_when: true - failed_when: flush_response.rc != 0 - - # Status - - name: "Verify flush status" - ansible.builtin.assert: - that: - - "'204' in flush_response.stderr or '200' in flush_response.stderr" - fail_msg: "Flush failed" - success_msg: "Ingester Memory Flushed successfully" - - rescue: - - name: "Debug failure output" - ansible.builtin.debug: - msg: - - "Failure" - - "Stdout: {{ flush_response.stdout | default('') }}" - - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml deleted file mode 100644 index a53751f3f..000000000 --- a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml +++ /dev/null @@ -1,42 +0,0 @@ ---- -# Ingest data log to Loki that is generated from gen_synth_loki_data.yml - -- name: "Ingest data log to Loki via API" - block: - - - name: "Read log file content" - ansible.builtin.slurp: - src: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_synth_data_suffix }}" - register: log_file_content - - - name: "Push data to Loki" - ansible.builtin.uri: - url: "{{ loki_push_url }}" - method: POST - body: "{{ log_file_content['content'] | b64decode | from_json }}" - body_format: json - client_cert: "{{ cert_dir }}/tls.crt" - client_key: "{{ cert_dir }}/tls.key" - validate_certs: false - status_code: 204 - return_content: true - register: loki_response - ignore_errors: false - failed_when: loki_response.status != 204 - - # Success - - name: "Confirm ingestion success" - ansible.builtin.debug: - msg: "Ingestion Successful!" - - rescue: - # Rescue block - - name: "Debug failure" - ansible.builtin.debug: - msg: "{{ loki_response.status | default('N/A') }}" - - # Failure - - name: "Report ingestion failure" - ansible.builtin.fail: - msg: "Ingestion Failed" - ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/loki_rate.yml b/roles/telemetry_chargeback/tasks/loki_rate.yml deleted file mode 100644 index 822585336..000000000 --- a/roles/telemetry_chargeback/tasks/loki_rate.yml +++ /dev/null @@ -1,28 +0,0 @@ ---- -- name: "TEST Get Rate and Qty by type from CloudKitty {{ item }}" - ansible.builtin.command: - cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml -g type" - register: cost_totals_by_type - changed_when: false - failed_when: cost_totals_by_type.rc != 0 - -- name: "**INFO** Print the rating by type {{ item }}" - ansible.builtin.debug: - var: cost_totals_by_type.stdout - -- name: "Output saved as yaml {{ item }}" - ansible.builtin.copy: - content: "{{ cost_totals_by_type.stdout }}" - dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_suffix }}" - mode: '0644' - -- name: "TEST Get Rate and Qty Summary from CloudKitty {{ item }}" - ansible.builtin.command: - cmd: "{{ openstack_cmd }} --rating-api-version 2 rating summary get -f yaml" - register: cost_totals_summary - changed_when: false - failed_when: cost_totals_summary.rc != 0 - -- name: "**INFO** Print the rating summary {{ item }}" - ansible.builtin.debug: - var: cost_totals_summary.stdout diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml deleted file mode 100644 index adaa2b34a..000000000 --- a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml +++ /dev/null @@ -1,71 +0,0 @@ ---- -- name: "Expected Count {{ item }}" - ansible.builtin.debug: - msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" - -# Query Loki -- name: "Retrieve Logs from Loki via API {{ item }}" - block: - - name: "Query Loki API" - ansible.builtin.uri: - url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" - method: GET - client_cert: "{{ cert_dir }}/tls.crt" - client_key: "{{ cert_dir }}/tls.key" - ca_path: "{{ cert_dir }}/ca.crt" - validate_certs: false - return_content: true - body_format: json - register: loki_response - # Wait condition - until: - - loki_response.status == 200 - - loki_response.json.status == 'success' - - loki_response.json.data.result | length > 0 - - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) - retries: 25 - delay: 60 - - - name: "Save Loki Data to JSON file" - ansible.builtin.copy: - content: "{{ loki_response.json | to_json }}" - dest: "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" - mode: '0644' - - # Validate - - name: "Verify Data Integrity {{ item }}" - vars: - actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" - ansible.builtin.assert: - that: - - loki_response.json.status == 'success' - - loki_response.json.data.result | length > 0 - - actual_count | int == (synth_data_rates.data_log.log_count | int) - fail_msg: >- - Query did not return all data entries. Expected - {{ synth_data_rates.data_log.log_count }} log entries, but Loki - only returned {{ actual_count }} - success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" - - rescue: - - name: "Debug failure" - ansible.builtin.debug: - msg: - - "Status: {{ loki_response.status | default('Unknown') }}" - - "Body: {{ loki_response.content | default('No Content') }}" - - "Msg: {{ loki_response.msg | default('Request failed') }}" - - # Failure - - name: "Report Retrieval Failure" - ansible.builtin.fail: - msg: "Retrieval Failed" - -- name: "Generate chargeback stats from Loki-retrieved data file: {{ item }}" - ansible.builtin.command: - cmd: > - python3 "{{ cloudkitty_summary_script }}" - -j "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_data_suffix }}" - -o "{{ artifacts_dir_zuul }}/{{ item }}{{ cloudkitty_loki_totals_metrics_suffix }}" - --debug "{{ cloudkitty_debug_dir }}" - register: loki_retrieved_summary_info - changed_when: loki_retrieved_summary_info.rc == 0 From 81dcb03d0289ad27e3e6dc13d34510aa6ec479ed Mon Sep 17 00:00:00 2001 From: ayefimov Date: Thu, 23 Apr 2026 14:20:43 -0400 Subject: [PATCH 18/43] [telemetry_chargeback] Add ability retrieve loki data --- .../tasks/flush_loki_data.yml | 52 ++++++++++++++ .../tasks/ingest_loki_data.yml | 42 +++++++++++ .../tasks/load_loki_data.yml | 2 +- roles/telemetry_chargeback/tasks/main.yml | 2 +- .../tasks/retrieve_loki_data.yml | 71 +++++++++++++++++++ 5 files changed, 167 insertions(+), 2 deletions(-) create mode 100644 roles/telemetry_chargeback/tasks/flush_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/flush_loki_data.yml b/roles/telemetry_chargeback/tasks/flush_loki_data.yml new file mode 100644 index 000000000..6ec05419d --- /dev/null +++ b/roles/telemetry_chargeback/tasks/flush_loki_data.yml @@ -0,0 +1,52 @@ +--- +# Flush Loki Ingester Memory to Storage + +- name: "Flush execution inside OpenStack CLI" + block: + # create dir + - name: "Create directory inside OpenStack CLI" + ansible.builtin.command: + cmd: "oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- mkdir -p {{ remote_cert_dir }}" + changed_when: false + + # certs to Flush data to Loki + - name: "Create directory to extract certificates" + ansible.builtin.file: + path: "{{ local_cert_dir }}" + state: directory + mode: '0755' + + # copy all certs + - name: "Copy certificates to OpenStack CLI" + ansible.builtin.command: + cmd: "oc cp {{ local_cert_dir }}/. {{ cloudkitty_namespace }}/{{ openstackpod }}:{{ remote_cert_dir }}/" + changed_when: true + + # flush loki + - name: "Trigger Loki ingester flush" + ansible.builtin.command: + cmd: > + oc exec -n {{ cloudkitty_namespace }} {{ openstackpod }} -- + curl -v -X POST {{ ingester_flush_url }} + --cert {{ remote_cert_dir }}/tls.crt + --key {{ remote_cert_dir }}/tls.key + --cacert {{ remote_cert_dir }}/service-ca.crt + register: flush_response + changed_when: true + failed_when: flush_response.rc != 0 + + # Status + - name: "Verify flush status" + ansible.builtin.assert: + that: + - "'204' in flush_response.stderr or '200' in flush_response.stderr" + fail_msg: "Flush failed" + success_msg: "Ingester Memory Flushed successfully" + + rescue: + - name: "Debug failure output" + ansible.builtin.debug: + msg: + - "Failure" + - "Stdout: {{ flush_response.stdout | default('') }}" + - "Stderr: {{ flush_response.stderr | default('') }}" diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml new file mode 100644 index 000000000..79e8896ee --- /dev/null +++ b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml @@ -0,0 +1,42 @@ +--- +# Ingest data log to Loki that is generated from gen_synth_loki_data.yml + +- name: "Ingest data log to Loki via API" + block: + + - name: "Read log file content" + ansible.builtin.slurp: + src: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_data_suffix }}" + register: log_file_content + + - name: "Push data to Loki" + ansible.builtin.uri: + url: "{{ loki_push_url }}" + method: POST + body: "{{ log_file_content['content'] | b64decode | from_json }}" + body_format: json + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + validate_certs: false + status_code: 204 + return_content: true + register: loki_response + ignore_errors: false + failed_when: loki_response.status != 204 + + # Success + - name: "Confirm ingestion success" + ansible.builtin.debug: + msg: "Ingestion Successful!" + + rescue: + # Rescue block + - name: "Debug failure" + ansible.builtin.debug: + msg: "{{ loki_response.status | default('N/A') }}" + + # Failure + - name: "Report ingestion failure" + ansible.builtin.fail: + msg: "Ingestion Failed" + ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml index 773d7322c..9376dd787 100644 --- a/roles/telemetry_chargeback/tasks/load_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -5,5 +5,5 @@ - name: "Flush data to Loki storage for {{ scenario_name }}" ansible.builtin.include_tasks: flush_loki_data.yml -- name: "Retrieve data log from Loki for {{ item }}" +- name: "Retrieve data log from Loki for {{ scenario_name }}" ansible.builtin.include_tasks: retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index a26d0695f..cba889981 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -31,4 +31,4 @@ always: - name: "Cleanup after job run" - ansible.builtin.include_tasks: cleanup_ck.yml + ansible.builtin.include_tasks: cleanup_ck.yml diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml new file mode 100644 index 000000000..e21115291 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -0,0 +1,71 @@ +--- +- name: "Expected Count {{ scenario_name }}" + ansible.builtin.debug: + msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" + +# Query Loki +- name: "Retrieve Logs from Loki via API {{ scenario_name }}" + block: + - name: "Query Loki API" + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + return_content: true + body_format: json + register: loki_response + # Wait condition + until: + - loki_response.status == 200 + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) + retries: 25 + delay: 60 + + - name: "Save Loki Data to JSON file" + ansible.builtin.copy: + content: "{{ loki_response.json | to_json }}" + dest: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" + mode: '0644' + + # Validate + - name: "Verify Data Integrity {{ scenario_name }}" + vars: + actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - actual_count | int == (synth_data_rates.data_log.log_count | int) + fail_msg: >- + Query did not return all data entries. Expected + {{ synth_data_rates.data_log.log_count }} log entries, but Loki + only returned {{ actual_count }} + success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" + + rescue: + - name: "Debug failure" + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: "Report Retrieval Failure" + ansible.builtin.fail: + msg: "Retrieval Failed" + +- name: "Generate chargeback stats from Loki-retrieved data file: {{ scenario_name }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" + -o "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_totals_metrics_suffix }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + changed_when: synth_rating_info.rc == 0 From f954053f1f97da5ae33f6e091a7119c8073c24e4 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Fri, 1 May 2026 14:24:21 -0400 Subject: [PATCH 19/43] Adding loki flush tasks file. --- roles/telemetry_chargeback/tasks/main.yml | 2 +- .../tasks/retrieve_loki_data.yml | 71 ------------------- 2 files changed, 1 insertion(+), 72 deletions(-) delete mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index a26d0695f..cba889981 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -31,4 +31,4 @@ always: - name: "Cleanup after job run" - ansible.builtin.include_tasks: cleanup_ck.yml + ansible.builtin.include_tasks: cleanup_ck.yml diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml deleted file mode 100644 index e21115291..000000000 --- a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml +++ /dev/null @@ -1,71 +0,0 @@ ---- -- name: "Expected Count {{ scenario_name }}" - ansible.builtin.debug: - msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" - -# Query Loki -- name: "Retrieve Logs from Loki via API {{ scenario_name }}" - block: - - name: "Query Loki API" - ansible.builtin.uri: - url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" - method: GET - client_cert: "{{ cert_dir }}/tls.crt" - client_key: "{{ cert_dir }}/tls.key" - ca_path: "{{ cert_dir }}/ca.crt" - validate_certs: false - return_content: true - body_format: json - register: loki_response - # Wait condition - until: - - loki_response.status == 200 - - loki_response.json.status == 'success' - - loki_response.json.data.result | length > 0 - - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) - retries: 25 - delay: 60 - - - name: "Save Loki Data to JSON file" - ansible.builtin.copy: - content: "{{ loki_response.json | to_json }}" - dest: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" - mode: '0644' - - # Validate - - name: "Verify Data Integrity {{ scenario_name }}" - vars: - actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" - ansible.builtin.assert: - that: - - loki_response.json.status == 'success' - - loki_response.json.data.result | length > 0 - - actual_count | int == (synth_data_rates.data_log.log_count | int) - fail_msg: >- - Query did not return all data entries. Expected - {{ synth_data_rates.data_log.log_count }} log entries, but Loki - only returned {{ actual_count }} - success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" - - rescue: - - name: "Debug failure" - ansible.builtin.debug: - msg: - - "Status: {{ loki_response.status | default('Unknown') }}" - - "Body: {{ loki_response.content | default('No Content') }}" - - "Msg: {{ loki_response.msg | default('Request failed') }}" - - # Failure - - name: "Report Retrieval Failure" - ansible.builtin.fail: - msg: "Retrieval Failed" - -- name: "Generate chargeback stats from Loki-retrieved data file: {{ scenario_name }}" - ansible.builtin.command: - cmd: > - python3 "{{ cloudkitty_summary_script }}" - -j "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" - -o "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_totals_metrics_suffix }}" - --debug "{{ cloudkitty_debug_dir }}" - register: synth_rating_info - changed_when: synth_rating_info.rc == 0 From a7ade53c1ccf9912fdb3c80917427dc4cbfe5665 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Fri, 24 Apr 2026 13:51:23 -0400 Subject: [PATCH 20/43] Moved debug var from tasks/main.yml to defaults/main.yml --- roles/telemetry_chargeback/defaults/main.yml | 9 +++++---- roles/telemetry_chargeback/tasks/cleanup_ck.yml | 1 + roles/telemetry_chargeback/tasks/main.yml | 11 ++++------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 9cc04c8c7..e2c4c2c80 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -2,14 +2,15 @@ # OpenStack CLI command openstack_cmd: "openstack" -# Debug mode +# Debug mode set cloudkitty_debug: false +cloudkitty_debug_dir: "{{ (cloudkitty_debug | bool) | ternary(artifacts_dir_zuul + '/debug_ck_db', '') }}" # Directory paths -logs_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/logs" -artifacts_dir_zuul: "{{ ansible_env.HOME }}/ci-framework-data/artifacts" +logs_dir_zuul: "{{ cifmw_basedir }}/logs" +artifacts_dir_zuul: "{{ cifmw_basedir }}/artifacts" cert_dir: "{{ ansible_user_dir }}/ck-certs" -local_cert_dir: "{{ ansible_env.HOME }}/ci-framework-data/flush_certs" +local_cert_dir: "{{ cifmw_basedir }}/flush_certs" remote_cert_dir: "osp-certs" # Cloudkitty certificates and secrets diff --git a/roles/telemetry_chargeback/tasks/cleanup_ck.yml b/roles/telemetry_chargeback/tasks/cleanup_ck.yml index 01407d155..9a2c68af3 100644 --- a/roles/telemetry_chargeback/tasks/cleanup_ck.yml +++ b/roles/telemetry_chargeback/tasks/cleanup_ck.yml @@ -1,4 +1,5 @@ --- +### cleans up after each test scenario - name: "Cleanup local certificates" ansible.builtin.file: path: "{{ local_cert_dir }}" diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 4cdda7000..cba889981 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -5,10 +5,6 @@ - name: "Setup Loki Environment" ansible.builtin.include_tasks: "setup_loki_env.yml" -- name: "CloudKitty debug ON/OFF" - ansible.builtin.set_fact: - cloudkitty_debug_dir: "{{ (cloudkitty_debug | bool) | ternary(artifacts_dir_zuul + '/debug_ck_db', '') }}" - - name: "Find test files" ansible.builtin.find: paths: "{{ cloudkitty_scenario_dir }}" @@ -28,10 +24,11 @@ loop_var: scenario_name when: found_files | length > 0 - - name: "Cleanup after job run" - ansible.builtin.include_tasks: cleanup_ck.yml - rescue: - name: "Log failure" ansible.builtin.debug: msg: "Running test scenarios loop failed." + + always: + - name: "Cleanup after job run" + ansible.builtin.include_tasks: cleanup_ck.yml From 2f11ccf6882907528aae105d2cf0a29aa08d4445 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 21 Apr 2026 19:12:40 -0400 Subject: [PATCH 21/43] From 88bda47b2893ed739dfbf1b7f5444b342d28e054 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 21 Apr 2026 19:12:40 -0400 Subject: [PATCH 22/43] From 0780693dacb1c876b5df6570af5c26f9187ae1c2 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 21 Apr 2026 19:12:40 -0400 Subject: [PATCH 23/43] From 3287f3240881714c53cd0ebcc8fd34774b46d293 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 21 Apr 2026 19:12:40 -0400 Subject: [PATCH 24/43] From 661c553797f42a258e83a62c7f11398b1f16e422 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Tue, 5 May 2026 14:13:01 -0400 Subject: [PATCH 25/43] Adding retrieve_loki_data.yml file so that job will download loki DB into a file --- .../tasks/retrieve_loki_data.yml | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 roles/telemetry_chargeback/tasks/retrieve_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml new file mode 100644 index 000000000..e21115291 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retrieve_loki_data.yml @@ -0,0 +1,71 @@ +--- +- name: "Expected Count {{ scenario_name }}" + ansible.builtin.debug: + msg: "Input file has {{ synth_data_rates.data_log.log_count }} data entries that Loki has to return" + +# Query Loki +- name: "Retrieve Logs from Loki via API {{ scenario_name }}" + block: + - name: "Query Loki API" + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ synth_data_rates.time.begin_step.nanosec }}&limit={{ limit }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + return_content: true + body_format: json + register: loki_response + # Wait condition + until: + - loki_response.status == 200 + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - (loki_response.json.data.result | map(attribute='values') | map('length') | sum) >= (synth_data_rates.data_log.log_count | int) + retries: 25 + delay: 60 + + - name: "Save Loki Data to JSON file" + ansible.builtin.copy: + content: "{{ loki_response.json | to_json }}" + dest: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" + mode: '0644' + + # Validate + - name: "Verify Data Integrity {{ scenario_name }}" + vars: + actual_count: "{{ loki_response.json.data.result | map(attribute='values') | map('length') | sum }}" + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + - actual_count | int == (synth_data_rates.data_log.log_count | int) + fail_msg: >- + Query did not return all data entries. Expected + {{ synth_data_rates.data_log.log_count }} log entries, but Loki + only returned {{ actual_count }} + success_msg: "Query returned all data entries. Input file had {{ synth_data_rates.data_log.log_count }} entries and Loki returned {{ actual_count }}" + + rescue: + - name: "Debug failure" + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: "Report Retrieval Failure" + ansible.builtin.fail: + msg: "Retrieval Failed" + +- name: "Generate chargeback stats from Loki-retrieved data file: {{ scenario_name }}" + ansible.builtin.command: + cmd: > + python3 "{{ cloudkitty_summary_script }}" + -j "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_data_suffix }}" + -o "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_loki_totals_metrics_suffix }}" + --debug "{{ cloudkitty_debug_dir }}" + register: synth_rating_info + changed_when: synth_rating_info.rc == 0 From a4f6f5dc1af4e528e7c0967765b3ee755730161d Mon Sep 17 00:00:00 2001 From: Alex Yefimov Date: Tue, 5 May 2026 20:32:35 -0400 Subject: [PATCH 26/43] Adding database upload tasks file ingest_loki_data.yml (#372) * Moved debug var from tasks/main.yml to defaults/main.yml * [telemetry_chargeback] Add ability to ingest and retrieve loki data to role - Uploads data to loki * Add and ingest task file --- .../tasks/ingest_loki_data.yml | 42 +++++++++++++++++++ .../tasks/load_loki_data.yml | 3 ++ .../tasks/run_test_scenarios.yml | 3 ++ 3 files changed, 48 insertions(+) create mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml create mode 100644 roles/telemetry_chargeback/tasks/load_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml new file mode 100644 index 000000000..79e8896ee --- /dev/null +++ b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml @@ -0,0 +1,42 @@ +--- +# Ingest data log to Loki that is generated from gen_synth_loki_data.yml + +- name: "Ingest data log to Loki via API" + block: + + - name: "Read log file content" + ansible.builtin.slurp: + src: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_data_suffix }}" + register: log_file_content + + - name: "Push data to Loki" + ansible.builtin.uri: + url: "{{ loki_push_url }}" + method: POST + body: "{{ log_file_content['content'] | b64decode | from_json }}" + body_format: json + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + validate_certs: false + status_code: 204 + return_content: true + register: loki_response + ignore_errors: false + failed_when: loki_response.status != 204 + + # Success + - name: "Confirm ingestion success" + ansible.builtin.debug: + msg: "Ingestion Successful!" + + rescue: + # Rescue block + - name: "Debug failure" + ansible.builtin.debug: + msg: "{{ loki_response.status | default('N/A') }}" + + # Failure + - name: "Report ingestion failure" + ansible.builtin.fail: + msg: "Ingestion Failed" + ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml new file mode 100644 index 000000000..c58821f27 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/load_loki_data.yml @@ -0,0 +1,3 @@ +--- +- name: "Ingest CloudKitty data log for {{ scenario_name }}" + ansible.builtin.include_tasks: ingest_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml index ee28375bc..0ce65ee09 100644 --- a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml +++ b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml @@ -1,3 +1,6 @@ --- - name: "Generate Synthetic Data for each file: {{ scenario_name }}" ansible.builtin.include_tasks: "gen_synth_loki_data.yml" + +- name: "Load data to Loki: {{ scenario_name }}" + ansible.builtin.include_tasks: "load_loki_data.yml" From a9c7fd2d3022adea42642f498e577ec4634a7f79 Mon Sep 17 00:00:00 2001 From: Alex Yefimov Date: Tue, 5 May 2026 20:33:29 -0400 Subject: [PATCH 27/43] Revert "Adding database upload tasks file ingest_loki_data.yml (#372)" This reverts commit a4f6f5dc1af4e528e7c0967765b3ee755730161d. --- .../tasks/ingest_loki_data.yml | 42 ------------------- .../tasks/load_loki_data.yml | 3 -- .../tasks/run_test_scenarios.yml | 3 -- 3 files changed, 48 deletions(-) delete mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml delete mode 100644 roles/telemetry_chargeback/tasks/load_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml deleted file mode 100644 index 79e8896ee..000000000 --- a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml +++ /dev/null @@ -1,42 +0,0 @@ ---- -# Ingest data log to Loki that is generated from gen_synth_loki_data.yml - -- name: "Ingest data log to Loki via API" - block: - - - name: "Read log file content" - ansible.builtin.slurp: - src: "{{ artifacts_dir_zuul }}/{{ scenario_name }}{{ cloudkitty_synth_data_suffix }}" - register: log_file_content - - - name: "Push data to Loki" - ansible.builtin.uri: - url: "{{ loki_push_url }}" - method: POST - body: "{{ log_file_content['content'] | b64decode | from_json }}" - body_format: json - client_cert: "{{ cert_dir }}/tls.crt" - client_key: "{{ cert_dir }}/tls.key" - validate_certs: false - status_code: 204 - return_content: true - register: loki_response - ignore_errors: false - failed_when: loki_response.status != 204 - - # Success - - name: "Confirm ingestion success" - ansible.builtin.debug: - msg: "Ingestion Successful!" - - rescue: - # Rescue block - - name: "Debug failure" - ansible.builtin.debug: - msg: "{{ loki_response.status | default('N/A') }}" - - # Failure - - name: "Report ingestion failure" - ansible.builtin.fail: - msg: "Ingestion Failed" - ignore_errors: false diff --git a/roles/telemetry_chargeback/tasks/load_loki_data.yml b/roles/telemetry_chargeback/tasks/load_loki_data.yml deleted file mode 100644 index c58821f27..000000000 --- a/roles/telemetry_chargeback/tasks/load_loki_data.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -- name: "Ingest CloudKitty data log for {{ scenario_name }}" - ansible.builtin.include_tasks: ingest_loki_data.yml diff --git a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml index 0ce65ee09..ee28375bc 100644 --- a/roles/telemetry_chargeback/tasks/run_test_scenarios.yml +++ b/roles/telemetry_chargeback/tasks/run_test_scenarios.yml @@ -1,6 +1,3 @@ --- - name: "Generate Synthetic Data for each file: {{ scenario_name }}" ansible.builtin.include_tasks: "gen_synth_loki_data.yml" - -- name: "Load data to Loki: {{ scenario_name }}" - ansible.builtin.include_tasks: "load_loki_data.yml" From 6d482a96378fbff4bba61a7d07f1b9c660050d96 Mon Sep 17 00:00:00 2001 From: ayefimov Date: Fri, 24 Apr 2026 13:51:23 -0400 Subject: [PATCH 28/43] Moved debug var from tasks/main.yml to defaults/main.yml Made review changes to README and updated gen_synth_loki_data.py to improve code quality --- roles/telemetry_chargeback/README.md | 336 +++++++++++++++--- roles/telemetry_chargeback/defaults/main.yml | 9 +- .../files/gen_synth_loki_data.py | 7 +- .../telemetry_chargeback/tasks/cleanup_ck.yml | 1 + roles/telemetry_chargeback/tasks/main.yml | 11 +- 5 files changed, 291 insertions(+), 73 deletions(-) diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index c5d582a4f..9d254d2e7 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -1,111 +1,333 @@ telemetry_chargeback -========= -The **`telemetry_chargeback`** role is designed to test the **RHOSO Cloudkitty** feature. These tests are specific to the Cloudkitty feature. Tests that are not specific to this feature (e.g., standard OpenStack deployment validation, basic networking) should be added to a common role. +=================== -The role performs two main functions: +The **`telemetry_chargeback`** role validates and tests the **RHOSO CloudKitty** chargeback feature. It performs CloudKitty configuration validation and generates synthetic test data for chargeback scenario testing. -1. **CloudKitty Validation** - Enables and configures the CloudKitty hashmap rating module, then validates its state. -2. **Synthetic Data Generation** - Generates synthetic Loki log data for testing chargeback scenarios using a Python script and Jinja2 template. +**Note:** This role contains tests specific to the CloudKitty feature. Generic OpenStack tests (deployment validation, basic networking) should be placed in a common role. Requirements ------------ -It relies on the following being available on the target or control host: -* This role requires **Ansible 2.9** or newer. -* The **OpenStack CLI client** must be installed and configured with administrative credentials. -* Required Python libraries for the `openstack` CLI (e.g., `python3-openstackclient`). -* Connectivity to the OpenStack API endpoint. -* **Python 3** with the following libraries for synthetic data generation: - * `PyYAML` - * `Jinja2` +### System Requirements -It is expected to be run **after** a successful deployment and configuration of the following components: +* **Ansible:** Version 2.9 or newer +* **Python 3** with the following libraries: + * `PyYAML` - YAML parsing and generation + * `Jinja2` - Template rendering +* **OpenStack CLI:** Installed and configured with administrative credentials + * Package: `python3-openstackclient` +* **Network:** Connectivity to OpenStack API endpoints -* **OpenStack:** A functional OpenStack cloud (RHOSO) environment. -* **Cloudkitty:** The Cloudkitty service must be installed, configured, and running. +### Infrastructure Requirements + +This role must be run **after** successful deployment of: + +* **OpenStack (RHOSO):** Functional cloud environment +* **CloudKitty:** Chargeback service installed, configured, and running +* **Loki/OpenShift** (optional): Required only for Loki integration features + * Control host needs `oc` CLI access + * CloudKitty Loki stack (route, certificates, ingester) deployed Role Variables -------------- -The role uses the following variables to control the testing environment and execution. ### User-Configurable Variables (defaults/main.yml) | Variable | Default Value | Description | |----------|---------------|-------------| -| `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | -| `cloudkitty_debug` | `false` | Enable debug mode for CloudKitty database dumps. | -| `logs_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/logs` | Directory for log files. | -| `artifacts_dir_zuul` | `{{ ansible_env.HOME }}/ci-framework-data/artifacts` | Directory for generated artifacts and test output. | -| `cert_dir` | `{{ ansible_user_dir }}/ck-certs` | Directory for CloudKitty client certificates. | -| `local_cert_dir` | `{{ ansible_env.HOME }}/ci-framework-data/flush_certs` | Local directory for certificate extraction. | -| `cloudkitty_namespace` | `openstack` | Kubernetes namespace where CloudKitty is deployed. | +| `openstack_cmd` | `"openstack"` | OpenStack CLI command (customize if not in PATH) | +| `cloudkitty_debug` | `false` | Enable debug mode for CloudKitty operations | +| `cloudkitty_debug_dir` | `"{{ (cloudkitty_debug \| bool) \| ternary(artifacts_dir_zuul + '/debug_ck_db', '') }}"` | Directory for debug output (auto-set based on debug flag) | +| `logs_dir_zuul` | `"{{ cifmw_basedir }}/logs"` | Directory for log files | +| `artifacts_dir_zuul` | `"{{ cifmw_basedir }}/artifacts"` | Directory for generated artifacts and test output | +| `cert_dir` | `"{{ ansible_user_dir }}/ck-certs"` | Directory for CloudKitty client certificates | +| `local_cert_dir` | `"{{ cifmw_basedir }}/flush_certs"` | Local directory for flush certificates (cleaned up after run) | +| `remote_cert_dir` | `"osp-certs"` | Remote directory inside OpenStack pod for certificates | +| `cert_secret_name` | `"cert-cloudkitty-client-internal"` | OpenShift secret name for client certificates | +| `client_secret` | `"secret/cloudkitty-lokistack-gateway-client-http"` | Secret for flush client certificates | +| `ca_configmap` | `"cm/cloudkitty-lokistack-ca-bundle"` | ConfigMap for CA bundle | +| `logql_query` | `"{service=\"cloudkitty\"}"` | LogQL query for Loki (overridable via `loki_query`) | +| `cloudkitty_namespace` | `"openstack"` | Kubernetes namespace where CloudKitty is deployed | +| `openstackpod` | `"openstackclient"` | OpenStack client pod name for exec/cp operations | +| `lookback` | `6` | Days to look back for Loki query time range | +| `limit` | `50` | Limit for Loki query results | +| `cloudkitty_test_scenarios` | `[]` | List of test scenario files to run (empty = auto-discover) | How It Works ------------ The role executes the following workflow: -1. **CloudKitty Validation** - Enables the hashmap rating module and sets its priority to 100. -2. **Loki Environment Setup** - Extracts Loki route information and certificates from the OpenShift cluster. -3. **Admin Credentials** - Retrieves admin project ID and user ID for test data generation. -4. **Scenario Discovery** - Finds all `test_*.yml` scenario files in the scenario directory. -5. **Scenario Loop** - For each scenario file found (exposed as `{{ scenario_name }}`): - - Generates synthetic Loki log data based on the scenario configuration - - Calculates expected chargeback metrics from the generated data - - Loads the metrics for validation -6. **Cleanup** - Removes temporary certificate directories. +1. **CloudKitty Validation** (`chargeback_tests.yml`) + - Enables the hashmap rating module + - Sets priority to 100 + - Validates module state + +2. **Loki Environment Setup** (`setup_loki_env.yml`) + - Extracts Loki route information from OpenShift + - Retrieves certificates from secrets/configmaps + - Configures Loki push/query URLs + +3. **Test Scenario Discovery** + - **Auto-discovery** (default): Finds all `test_*.yml` files in `files/` directory + - **User-provided**: Uses scenarios from `cloudkitty_test_scenarios` variable + +4. **Scenario Execution Loop** (for each discovered scenario) + - Generates synthetic Loki log data (`gen_synth_loki_data.py`) + - Calculates expected chargeback metrics (`gen_db_summary.py`) + - Loads metrics for validation + +5. **Cleanup** (`cleanup_ck.yml`) + - Removes temporary certificate directories + - Always runs (even on failure) via block/rescue/always structure + +### Loop Variable + +The role uses `{{ scenario_name }}` as the loop variable when processing multiple test scenarios, making it easy to track which scenario is currently executing. + +Python Scripts +-------------- + +The role includes two Python scripts for synthetic data generation and metrics calculation. + +### gen_synth_loki_data.py + +**Purpose:** Generates synthetic Loki-format JSON log data from scenario YAML files. + +**Description:** +This script reads a scenario configuration file (YAML), processes time-series data according to the specified parameters, and renders it through a Jinja2 template to produce Loki-compatible JSON output. It supports metric transformations, date field injection, and configurable timestamp ordering. + +**Usage:** +```bash +python3 gen_synth_loki_data.py --tmpl