diff --git a/.github/workflows/build-push-image.yaml b/.github/workflows/build-push-image.yaml new file mode 100644 index 0000000..ecce2b4 --- /dev/null +++ b/.github/workflows/build-push-image.yaml @@ -0,0 +1,41 @@ +name: Build and Push Container Image + +on: + push: + branches: [master, main] + workflow_dispatch: + +jobs: + build-push: + name: Build and Push Image + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Determine image tags + id: tags + env: + GIT_SHA: ${{ github.sha }} + run: | + echo "tags=latest $GIT_SHA" >> "$GITHUB_OUTPUT" + + - name: Build image + id: build + uses: redhat-actions/buildah-build@v2 + with: + image: quay-load + context: . + tags: ${{ steps.tags.outputs.tags }} + containerfiles: | + ./Dockerfile + platforms: linux/amd64 + + - name: Push image to Quay.io + uses: redhat-actions/push-to-registry@v2 + with: + image: ${{ steps.build.outputs.image }} + tags: ${{ steps.build.outputs.tags }} + registry: quay.io/${{ vars.QUAY_IMAGE_NAMESPACE || 'projectquay' }} + username: ${{ secrets.QUAY_USERNAME }} + password: ${{ secrets.QUAY_PASSWORD }} diff --git a/Dockerfile b/Dockerfile index 27b81fa..f9312f1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,15 @@ -FROM ubuntu +FROM registry.access.redhat.com/ubi9/python-311 LABEL maintainer="syahmed@redhat.com" WORKDIR /tmp -ARG DEBIAN_FRONTEND=noninteractive +USER root -# Install necessary libraries for subsequent commands -RUN apt-get update && \ - apt-get install -y software-properties-common python3.6 python3-venv python3-pip python3-apt wget git dumb-init podman skopeo redis-server - -# Create and activate virtual environment -RUN python3 -m venv /opt/venv -ENV PATH="/opt/venv/bin:$PATH" +# Install necessary packages +RUN dnf install -y --nodocs \ + wget git podman skopeo fuse-overlayfs \ + && dnf clean all \ + && rm -rf /var/cache/dnf # Install vegeta for HTTP benchmarking RUN wget https://github.com/tsenart/vegeta/releases/download/v12.8.3/vegeta-12.8.3-linux-amd64.tar.gz \ @@ -23,18 +21,11 @@ RUN wget https://github.com/tsenart/vegeta/releases/download/v12.8.3/vegeta-12.8 RUN mkdir -p /opt/snafu/ \ && wget -O /tmp/benchmark-wrapper.tar.gz https://github.com/cloud-bulldozer/benchmark-wrapper/archive/refs/tags/v1.0.0.tar.gz \ && tar -xzf /tmp/benchmark-wrapper.tar.gz -C /opt/snafu/ --strip-components=1 \ - && pip install --upgrade pip \ - && pip install -e /opt/snafu/ \ + && pip install --upgrade pip "setuptools<71" \ + && pip install --no-build-isolation -e /opt/snafu/ \ && pip install "numpy<2" \ && rm -rf /tmp/benchmark-wrapper.tar.gz COPY . . -# Cleanup the installation remainings -RUN apt-get clean autoclean && \ - apt-get autoremove --yes && \ - rm -rf /var/lib/{apt,dpkg,cache,log}/ - -# Start the command -ENTRYPOINT ["/usr/bin/dumb-init", "--"] -CMD ["python3", "main.py"] +ENTRYPOINT ["python3", "main.py"] diff --git a/README.md b/README.md index ffd130a..c66c3de 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ The test suite is designed to run on openshift platform using a simple configura permissions (checkboxes) granted. Hold on to this token as it will be used later. - Once after the quay application is deployed. Do a `pg_dump` in the quay postgres pod to capture the initial snapshot into a sql file and keep it copied at `assets/quaydb.sql`. -- An elasticsearch cluster is needed to store results. You can spin one using the [Operator](https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-deploy-eck.html) +- (Optional) An elasticsearch cluster to store results. You can spin one using the [Operator](https://www.elastic.co/guide/en/cloud-on-k8s/current/k8s-deploy-eck.html). If ES is not available, results are written to local JSON files on a PVC. ## Running tests @@ -45,15 +45,16 @@ Now once we have the system ready, Deploy `deploy/test.job.yaml` on your openshi * `QUAY_HOST` - Sting. Indicating quay host url to perform testing. * `QUAY_OAUTH_TOKEN` - String. Application oauth token created in the prerequisites step. * `QUAY_ORG`- String. Specifies the test organization. -* `ES_HOST` - String. Elastic search host url. -* `ES_PORT` - String. Elastic search port number. -* `ES_INDEX` - String. Elastic search index to store the results. +* `ES_HOST` - String. (Optional) Elastic search host url. If not set, results are written to local JSON files only. +* `ES_PORT` - String. (Optional) Elastic search port number. +* `ES_INDEX` - String. (Optional) Elastic search index to store the results. +* `RESULTS_DIR` - String. (Optional) Directory to write local result JSON files. Defaults to `./results`. * `SKIP_PUSH` - String. Flag to skip pushes true/false. * `PULL_LAYERS` - String (Works with pull only). Images with n number of layers to be pulled. * `PULL_REPO_PREFIX` - String (Works with pull only). Prefix of the existing pull repo that has [image tags in this format](https://quay.io/repository/clair-load-test/clair-load-test?tab=tags). One can use this script [image_load.sh](https://github.com/vishnuchalla/quay-performance-scripts/blob/master/assets/image_load.sh) to build and push images accordingly. * `PUSH_PULL_IMAGE` - Image which contains source code and used in push/pull jobs for testing purposes. Same image that is used for load testing i.e `quay-load` in our case. * `CUSTOM_BUILD_IMAGE` - String. Custom base image to be used for push/pull activities. -* `PUSH_PUSH_ES_INDEX` - ES index to store quay push/pull results. It is separate as it follows different document structure. +* `PUSH_PULL_ES_INDEX` - (Optional) ES index to store quay push/pull results. It is separate as it follows different document structure. * `PUSH_PULL_NUMBERS` - The amount of images to do push/pull operations on. * `TARGET_HIT_SIZE` - String. Indicates the total amount of requests to hit the system with. * `CONCURRENCY` - String. Indicates the rate(concurrency) at which the requests hits must happen in parallel. @@ -62,6 +63,27 @@ Now once we have the system ready, Deploy `deploy/test.job.yaml` on your openshi This should spin up a redis pod and a test orchestrator pod in your desired namespace and start running the tests. Tail the pod logs for more info. +### Running without Elasticsearch + +Elasticsearch is optional. When ES env vars (`ES_HOST`, `ES_PORT`, `ES_INDEX`, `PUSH_PULL_ES_INDEX`) are not set, all results are written to local JSON files instead. The default `deploy/test.job.yaml` includes a PVC (`quay-perf-results`, 1Gi) mounted at `/results` for persistent storage. + +```bash +oc apply -f deploy/test.job.yaml -n +``` + +After the test completes, retrieve results from the PVC: +```bash +oc run pvc-reader --image=busybox --restart=Never -n \ + --overrides='{"spec":{"containers":[{"name":"pvc-reader","image":"busybox","command":["sleep","3600"],"volumeMounts":[{"name":"results","mountPath":"/results"}]}],"volumes":[{"name":"results","persistentVolumeClaim":{"claimName":"quay-perf-results"}}]}}' +oc cp /pvc-reader:/results ./results +oc delete pod pvc-reader -n +``` + +Result files: +- `_push_results.json` — per-image push timings with summary +- `_pull_results.json` — per-image pull timings with summary +- `./logs/__result.json` — Vegeta per-second timeseries (always written) + ### More about tests The tests use [Vegeta](https://github.com/tsenart/vegeta) to trigger the load and index the results to specified elastic search instance. The list of apis involved in each phase as as below: diff --git a/config.py b/config.py index 6a1a368..bfe3143 100644 --- a/config.py +++ b/config.py @@ -23,14 +23,15 @@ def get_config(self): 'quay_org': os.environ.get("QUAY_ORG"), 'test_uuid': os.environ.get('TEST_UUID'), 'auth_token': os.environ.get("QUAY_OAUTH_TOKEN"), - 'es_host': os.environ.get('ES_HOST'), - 'es_port': os.environ.get('ES_PORT'), - 'es_index': os.environ.get('ES_INDEX'), + 'es_host': os.environ.get('ES_HOST', ''), + 'es_port': os.environ.get('ES_PORT', ''), + 'es_index': os.environ.get('ES_INDEX', ''), 'push_pull_image': os.environ.get('PUSH_PULL_IMAGE'), 'custom_build_image': os.environ.get('CUSTOM_BUILD_IMAGE', ''), 'pull_layers': int(os.environ.get('PULL_LAYERS', 0)), 'pull_repo_prefix': os.environ.get('PULL_REPO_PREFIX', ''), - 'push_pull_es_index': os.environ.get('PUSH_PULL_ES_INDEX'), + 'push_pull_es_index': os.environ.get('PUSH_PULL_ES_INDEX', ''), + 'results_directory': os.environ.get('RESULTS_DIR', './results'), 'push_pull_numbers': int(os.environ.get("PUSH_PULL_NUMBERS", 50)), 'concurrency': int(os.environ.get("CONCURRENCY", 50)), 'target_hit_size': int(os.environ.get('TARGET_HIT_SIZE')), @@ -52,11 +53,7 @@ def validate_config(self): assert self.config["quay_org"], "QUAY_ORG is not set" assert self.config["test_uuid"], "TEST_UUID is not set" assert self.config["auth_token"], "AUTH_TOKEN is not set" - assert self.config["es_host"], "ES_HOST is not set" - assert self.config["es_port"], "ES_PORT is not set" - assert self.config["es_index"], "ES_INDEX is not set" assert self.config["push_pull_image"], "PUSH_PULL_IMAGE is not set" - assert self.config["push_pull_es_index"], "PUSH_PULL_INDEX is not set" assert self.config["push_pull_numbers"], "PUSH_PULL_NUMBERS is not set" assert isinstance(self.config["concurrency"], int), "CONCURRENCY is not an integer" assert self.config["target_hit_size"], "TARGET_HIT_SIZE is not set" diff --git a/deploy/test.job.yaml b/deploy/test.job.yaml index befb720..360fb3a 100644 --- a/deploy/test.job.yaml +++ b/deploy/test.job.yaml @@ -27,7 +27,7 @@ metadata: labels: quay-perf-test-component: redis spec: - replicas: 1 + replicas: 1 selector: matchLabels: quay-perf-test-component: redis @@ -80,14 +80,16 @@ spec: value: - name: ES_PORT value: - - name: PYTHONUNBUFFERED - value: "0" - name: ES_INDEX value: - - name: PUSH_PULL_IMAGE - value: - name: PUSH_PULL_ES_INDEX value: + - name: PYTHONUNBUFFERED + value: "0" + - name: RESULTS_DIR + value: "/results" + - name: PUSH_PULL_IMAGE + value: - name: PUSH_PULL_NUMBERS value: - name: TARGET_HIT_SIZE @@ -102,10 +104,28 @@ spec: value: - name: TEST_PHASES value: "LOAD,RUN,DELETE" + volumeMounts: + - name: test-results + mountPath: /results resources: requests: cpu: "1" memory: "512Mi" imagePullPolicy: Always + volumes: + - name: test-results + persistentVolumeClaim: + claimName: quay-perf-results restartPolicy: Never backoffLimit: 0 +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: quay-perf-results +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi diff --git a/main.py b/main.py index 03e537a..20679f4 100644 --- a/main.py +++ b/main.py @@ -24,7 +24,11 @@ import requests import warnings -from elasticsearch import Elasticsearch, helpers +try: + from elasticsearch import Elasticsearch, helpers +except ImportError: + Elasticsearch = None + helpers = None from kubernetes import client, config from concurrent.futures import ThreadPoolExecutor, as_completed @@ -35,6 +39,30 @@ redis_client = redis.Redis(host='redis') +def write_results_to_file(results, results_dir, filename): + if not os.path.isdir(results_dir): + os.makedirs(results_dir) + filepath = os.path.join(results_dir, filename) + with open(filepath, 'w') as f: + json.dump(results, f, indent=2, + default=lambda o: o.isoformat() if isinstance(o, datetime.datetime) else str(o)) + logging.info("Results written to local file: %s", filepath) + + +def write_results_to_es(env_config, results): + if not (env_config["es_host"] and Elasticsearch): + logging.info("ES not configured — results saved to local file only") + return + logging.info("Writing results to Elasticsearch: %s", env_config["es_host"]) + es = Elasticsearch([env_config["es_host"]], port=env_config["es_port"]) + docs = [{ + '_index': env_config["push_pull_es_index"], + 'type': '_doc', + '_source': r + } for r in results] + helpers.bulk(es, docs) + + # Configure Logging logging.basicConfig( stream=sys.stdout, @@ -185,28 +213,33 @@ def podman_create(tags, custom_build_image="", concurrency=4): if n % 10 == 0: logging.info(f"{n}/{len(tags)} images completed pushing") - # Write results to Elasticsearch - logging.info("Writing 'registry push' results to Elasticsearch") - es = Elasticsearch([env_config["es_host"]], port=env_config["es_port"]) - docs = [{ - '_index': env_config["push_pull_es_index"], - 'type': '_doc', - '_source': r - } for r in push_results] - helpers.bulk(es, docs) - - # Print summary - elapsed_times = [r['elapsed_time'] for r in push_results] - summary = { - 'durations': { - 'mean': mean(elapsed_times), - 'max': max(elapsed_times), - 'min': min(elapsed_times), - }, - 'pushes': { - 'total': len(push_results) + # Compute summary + if push_results: + elapsed_times = [r['elapsed_time'] for r in push_results] + summary = { + 'durations': { + 'mean': mean(elapsed_times), + 'max': max(elapsed_times), + 'min': min(elapsed_times), + }, + 'total': len(push_results), + 'successful': sum(1 for r in push_results if r['successful']), + 'failed': sum(1 for r in push_results if not r['successful']), } - } + else: + summary = {'durations': {}, 'total': 0, 'successful': 0, 'failed': 0} + + # Write results to local filesystem + write_results_to_file({'summary': summary, 'results': push_results}, + env_config["results_directory"], + '%s_push_results.json' % env_config["test_uuid"]) + + write_results_to_es(env_config, push_results) + + for r in push_results: + redis_client.rpush('push_results:' + env_config["test_uuid"], + json.dumps(r, default=lambda o: o.isoformat() if isinstance(o, datetime.datetime) else str(o))) + logging.info('Podman-Push Summary') logging.info(json.dumps(summary, sort_keys=True, indent=2)) @@ -367,7 +400,7 @@ def pull_single_image_http(tag, username=None, password=None, max_failures=3): 'end_time': end_time, 'success_count': success_count, 'failure_count': failure_count, - 'successful': (success_count == len(digests)), + 'successful': (failure_count == 0), } @@ -400,17 +433,7 @@ def podman_pull(tags, concurrency, username=None, password=None): if n % 10 == 0: logging.info(f"Pulling {n}/{len(tags)} images completed.") - # Write results to Elasticsearch - logging.info("Writing 'registry pull' results to Elasticsearch") - es = Elasticsearch([env_config["es_host"]], port=env_config["es_port"]) - docs = [{ - '_index': env_config["push_pull_es_index"], - 'type': '_doc', - '_source': r - } for r in results] - helpers.bulk(es, docs) - - # Summary logging (same fields as earlier) + # Compute summary if results: elapsed_times = [r['elapsed_time'] for r in results] summary = { @@ -419,12 +442,23 @@ def podman_pull(tags, concurrency, username=None, password=None): 'max': max(elapsed_times), 'min': min(elapsed_times), }, - 'pulls': { - 'total': len(results) - } + 'total': len(results), + 'successful': sum(1 for r in results if r['successful']), + 'failed': sum(1 for r in results if not r['successful']), } else: - summary = {'durations': {}, 'pulls': {'total': 0}} + summary = {'durations': {}, 'total': 0, 'successful': 0, 'failed': 0} + + # Write results to local filesystem + write_results_to_file({'summary': summary, 'results': results}, + env_config["results_directory"], + '%s_pull_results.json' % env_config["test_uuid"]) + + write_results_to_es(env_config, results) + + for r in results: + redis_client.rpush('pull_results:' + env_config["test_uuid"], + json.dumps(r, default=lambda o: o.isoformat() if isinstance(o, datetime.datetime) else str(o))) logging.info('HTTP-Pull Summary') logging.info(json.dumps(summary, sort_keys=True, indent=2)) @@ -512,6 +546,7 @@ def create_test_push_job(namespace, quay_host, username, password, concurrency, client.V1EnvVar(name='ES_PORT', value=str(env_config["es_port"])), client.V1EnvVar(name='ES_INDEX', value=env_config["es_index"]), client.V1EnvVar(name='TEST_PHASES', value=env_config["test_phases"]), + client.V1EnvVar(name='RESULTS_DIR', value=env_config["results_directory"]), ] resource_requirements = client.V1ResourceRequirements( @@ -528,7 +563,7 @@ def create_test_push_job(namespace, quay_host, username, password, concurrency, env=env_vars, resources=resource_requirements, ) - + template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(labels={'quay-perf-test-component-push': 'executor-'+"-".join(username.split("_"))}), spec=client.V1PodSpec(restart_policy='Never', containers=[container]) @@ -586,6 +621,7 @@ def create_test_pull_job(namespace, quay_host, username, password, concurrency, client.V1EnvVar(name='ES_PORT', value=str(env_config["es_port"])), client.V1EnvVar(name='ES_INDEX', value=env_config["es_index"]), client.V1EnvVar(name='TEST_PHASES', value=env_config["test_phases"]), + client.V1EnvVar(name='RESULTS_DIR', value=env_config["results_directory"]), ] resource_requirements = client.V1ResourceRequirements( @@ -602,7 +638,7 @@ def create_test_pull_job(namespace, quay_host, username, password, concurrency, env=env_vars, resources=resource_requirements, ) - + template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(labels={'quay-perf-test-component-pull': 'executor-'+"-".join(username.split("_"))}), spec=client.V1PodSpec(restart_policy='Never', containers=[container]) @@ -643,6 +679,7 @@ def parallel_process(user, **kwargs): :return: None """ common_args = kwargs + env_config = Config().get_config() # Container Operations redis_client.delete('tags_to_push'+"-".join(user.split("_"))) # avoid stale data redis_client.rpush('tags_to_push'+"-".join(user.split("_")), *common_args['tags']) @@ -650,6 +687,9 @@ def parallel_process(user, **kwargs): redis_client.delete('tags_to_pull'+"-".join(user.split("_"))) # avoid stale data redis_client.rpush('tags_to_pull'+"-".join(user.split("_")), *common_args['tags']) + + redis_client.delete('push_results:' + common_args['uuid']) # avoid stale data + redis_client.delete('pull_results:' + common_args['uuid']) # avoid stale data logging.info('Queued %s tags to be pulled' % len(common_args['tags'])) # Start the Registry Push Test job @@ -674,6 +714,26 @@ def parallel_process(user, **kwargs): logging.info('Waiting for %s to finish. Queue: %s/%s' % (job_name, remaining, len(common_args['tags']))) time.sleep(60 * 1) # 1 minute + # Collect push results from all worker pods via Redis + push_results = [] + while True: + data = redis_client.lpop('push_results:' + common_args['uuid']) + if data is None: + break + push_results.append(json.loads(data)) + if push_results: + elapsed_times = [r['elapsed_time'] for r in push_results] + summary = { + 'durations': {'mean': mean(elapsed_times), 'max': max(elapsed_times), 'min': min(elapsed_times)}, + 'total': len(push_results), + 'successful': sum(1 for r in push_results if r.get('successful')), + 'failed': sum(1 for r in push_results if not r.get('successful')), + } + write_results_to_file({'summary': summary, 'results': push_results}, + env_config["results_directory"], + '%s_push_results.json' % common_args['uuid']) + logging.info("Collected %d push results from worker pods", len(push_results)) + # Start the Registry Pull Test job create_test_pull_job(common_args['namespace'], common_args['quay_host'], user, common_args['password'], common_args['concurrency'], common_args['uuid'], common_args['auth_token'], @@ -696,6 +756,26 @@ def parallel_process(user, **kwargs): logging.info('Waiting for %s to finish. Queue: %s/%s' % (job_name, remaining, len(common_args['tags']))) time.sleep(60 * 1) # 1 minute + # Collect pull results from all worker pods via Redis + pull_results = [] + while True: + data = redis_client.lpop('pull_results:' + common_args['uuid']) + if data is None: + break + pull_results.append(json.loads(data)) + if pull_results: + elapsed_times = [r['elapsed_time'] for r in pull_results] + summary = { + 'durations': {'mean': mean(elapsed_times), 'max': max(elapsed_times), 'min': min(elapsed_times)}, + 'total': len(pull_results), + 'successful': sum(1 for r in pull_results if r.get('successful')), + 'failed': sum(1 for r in pull_results if not r.get('successful')), + } + write_results_to_file({'summary': summary, 'results': pull_results}, + env_config["results_directory"], + '%s_pull_results.json' % common_args['uuid']) + logging.info("Collected %d pull results from worker pods", len(pull_results)) + def batch_process(users_chunk, batch_args): jobs = [] diff --git a/utils/attacker.py b/utils/attacker.py index 0b3ffbb..18bbc39 100644 --- a/utils/attacker.py +++ b/utils/attacker.py @@ -98,23 +98,26 @@ def run_vegeta(self, test_name, request_dicts, target_name): assert p.returncode == 0 logging.info('Results for test %s written to file: %s' % (test_name, result_filename)) - # Use Snafu to push results to Elasticsearch - logging.info("Recording test results in ElasticSearch: %s", env_config["es_host"]) - cmd = [ - 'run_snafu', - '-t', 'vegeta', - '-u', env_config["test_uuid"], - '-w', str(env_config["concurrency"]), - '-r', result_filename, - # '--target_name', target_name, - '--target_name', test_name, - ] - snafu_env = os.environ.copy() - snafu_env['es'] = env_config["es_host"] - snafu_env['es_port'] = env_config["es_port"] - snafu_env['es_index'] = env_config["es_index"] - snafu_env['clustername'] = env_config["quay_host"] - p = Popen(cmd, stdout=PIPE, stderr=STDOUT, env=snafu_env) - output, _ = p.communicate() - logging.info(output) - assert p.returncode == 0 + # Use Snafu to push results to Elasticsearch (if configured) + if env_config["es_host"]: + logging.info("Recording test results in ElasticSearch: %s", env_config["es_host"]) + cmd = [ + 'run_snafu', + '-t', 'vegeta', + '-u', env_config["test_uuid"], + '-w', str(env_config["concurrency"]), + '-r', result_filename, + # '--target_name', target_name, + '--target_name', test_name, + ] + snafu_env = os.environ.copy() + snafu_env['es'] = env_config["es_host"] + snafu_env['es_port'] = env_config["es_port"] + snafu_env['es_index'] = env_config["es_index"] + snafu_env['clustername'] = env_config["quay_host"] + p = Popen(cmd, stdout=PIPE, stderr=STDOUT, env=snafu_env) + output, _ = p.communicate() + logging.info(output) + assert p.returncode == 0 + else: + logging.info("ES not configured — vegeta results saved to local file only: %s", result_filename)