Address review: SP-mediated uploads, login_required, platform binary, code smell fix

pulk17 · pulk17 · commit 040603c41c45 · 2026-06-02T00:03:24.000+05:30
diff --git a/install/ci-vm/ci-linux/ci/runCI b/install/ci-vm/ci-linux/ci/runCI
@@ -139,10 +139,11 @@ if [ -e "${dstDir}/ccextractor" ]; then
 #!/bin/bash
 COMBINED_LOG="/tmp/combined_stdout.log"
 REAL_BINARY="PLACEHOLDER_BINARY"
-EXIT_CODE_FILE="/tmp/.wrapper_exit_code"
+EXIT_CODE_FILE=$(mktemp)
 echo "=== TEST INVOCATION: $@ ===" >> "$COMBINED_LOG"
 { "$REAL_BINARY" "$@" 2>&1; echo $? > "$EXIT_CODE_FILE"; } | tee -a "$COMBINED_LOG"
 exit_code=$(cat "$EXIT_CODE_FILE")
+rm -f "$EXIT_CODE_FILE"
 echo "=== EXIT CODE: ${exit_code} ===" >> "$COMBINED_LOG"
 echo "" >> "$COMBINED_LOG"
 exit $exit_code
@@ -153,37 +154,33 @@ WRAPPER_EOF
         executeCommand cd ${suiteDstDir}
         executeCommand ${tester} --debug --entries "${testFile}" --executable "${wrapper_path}" --tempfolder "${tempFolder}" --timeout 600 --reportfolder "${reportFolder}" --resultfolder "${resultFolder}" --samplefolder "${sampleFolder}" --method Server --url "${reportURL}"
 
-        # Upload AI artifacts to GCS
-        gcs_bucket=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/bucket" -H "Metadata-Flavor: Google")
-        test_id=$(curl -s "http://metadata/computeMetadata/v1/instance/attributes/testID" -H "Metadata-Flavor: Google")
-        token=$(curl -s "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" -H "Metadata-Flavor: Google" | python3 -c "import sys,json; print(json.load(sys.stdin)['access_token'])")
-
+        # Upload artifacts through the Sample Platform server
         upload_artifact() {
             local file_path="$1"
-            local dest_path="$2"
+            local artifact_name="$2"
             if [ -f "$file_path" ]; then
                 local http_code
-                http_code=$(curl -s -X POST --data-binary @"$file_path" \
-                    -H "Authorization: Bearer $token" \
-                    -H "Content-Type: application/octet-stream" \
-                    -w "%{http_code}" \
-                    -o /dev/null \
-                    "https://storage.googleapis.com/upload/storage/v1/b/${gcs_bucket}/o?uploadType=media&name=${dest_path}")
-                if [ -z "$http_code" ] || [ "$http_code" -ne 200 ]; then
-                    echo "GCS upload failed for ${dest_path}: HTTP ${http_code:-no_response}" >> "${logFile}"
+                http_code=$(curl -s -A "${userAgent}" \
+                    --form "type=artifact" \
+                    --form "name=${artifact_name}" \
+                    --form "file=@${file_path}" \
+                    -w "%{http_code}" -o /dev/null \
+                    "${reportURL}" 2>/dev/null)
+                if [ -z "$http_code" ] || [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then
+                    echo "Artifact upload failed for ${artifact_name}: HTTP ${http_code:-no_response}" >> "${logFile}"
                 fi
             fi
         }
 
-        upload_artifact "$ccextractor_path" "test_artifacts/${test_id}/ccextractor"
+        upload_artifact "$ccextractor_path" "ccextractor"
 
         # Upload combined stdout log
-        upload_artifact "${combined_stdout}" "test_artifacts/${test_id}/combined_stdout.log"
+        upload_artifact "${combined_stdout}" "combined_stdout.log"
 
         # Upload coredumps if any
         for core_file in /tmp/coredumps/core.*; do
             if [ -f "$core_file" ]; then
-                upload_artifact "$core_file" "test_artifacts/${test_id}/coredump"
+                upload_artifact "$core_file" "coredump"
                 break
             fi
         done
diff --git a/mod_ci/controllers.py b/mod_ci/controllers.py
@@ -1194,9 +1194,7 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict:
             startup_script = f.read()
         metadata_items = [
             {'key': 'startup-script', 'value': startup_script},
-            {'key': 'reportURL', 'value': reportURL},
-            {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')},
-            {'key': 'testID', 'value': str(test.id)}
+            {'key': 'reportURL', 'value': reportURL}
         ]
     elif test.platform == TestPlatform.windows:
         image_response = compute.images().getFromFamily(project=config.get('WINDOWS_INSTANCE_PROJECT_NAME', ''),
@@ -1217,9 +1215,7 @@ def create_instance(compute, project, zone, test, reportURL) -> Dict:
             {'key': 'windows-startup-script-ps1', 'value': startup_script},
             {'key': 'service_account', 'value': service_account},
             {'key': 'rclone_conf', 'value': rclone_conf},
-            {'key': 'reportURL', 'value': reportURL},
-            {'key': 'bucket', 'value': config.get('GCS_BUCKET_NAME', '')},
-            {'key': 'testID', 'value': str(test.id)}
+            {'key': 'reportURL', 'value': reportURL}
         ]
     source_disk_image = image_response['selfLink']
 
@@ -2346,6 +2342,11 @@ def progress_reporter(test_id, token):
                 if not upload_type_request(log, test_id, repo_folder, test, request):
                     return "EMPTY"
 
+            elif request.form['type'] == 'artifact':
+                log.info(f'[PROGRESS_REPORTER][Test: {test_id}] Artifact upload')
+                if not artifact_upload_request(log, test_id, request):
+                    return "EMPTY"
+
             elif request.form['type'] == 'finish':
                 log.info(f'[PROGRESS_REPORTER][Test: {test_id}] Test finished')
                 finish_type_request(log, test_id, test, request)
@@ -2695,6 +2696,45 @@ def upload_type_request(log, test_id, repo_folder, test, request) -> bool:
     return False
 
 
+# Allowed artifact names that the VM can upload
+ALLOWED_ARTIFACT_NAMES = {'ccextractor', 'ccextractor.exe', 'combined_stdout.log', 'coredump'}
+
+
+def artifact_upload_request(log, test_id, request) -> bool:
+    """
+    Handle artifact upload from the CI VM.
+
+    Validates the artifact name against an allow-list, then uploads
+    the file to GCS under test_artifacts/{test_id}/{name}.
+
+    :param log: logger
+    :type log: Logger
+    :param test_id: The id of the test to update.
+    :type test_id: int
+    :param request: Request parameters
+    :type request: Request
+    :return: True if upload succeeded, False otherwise.
+    :rtype: bool
+    """
+    from run import storage_client_bucket
+
+    artifact_name = request.form.get('name', '')
+    if artifact_name not in ALLOWED_ARTIFACT_NAMES:
+        log.warning(f"[Test: {test_id}] Rejected artifact upload with disallowed name: {artifact_name}")
+        return False
+
+    if 'file' not in request.files:
+        log.warning(f"[Test: {test_id}] Artifact upload missing file")
+        return False
+
+    uploaded_file = request.files['file']
+    blob_path = f'test_artifacts/{test_id}/{artifact_name}'
+    blob = storage_client_bucket.blob(blob_path)
+    blob.upload_from_file(uploaded_file.stream)
+    log.info(f"[Test: {test_id}] Artifact '{artifact_name}' uploaded to {blob_path}")
+    return True
+
+
 def finish_type_request(log, test_id, test, request):
     """
     Handle finish request type for progress reporter.
diff --git a/mod_test/controllers.py b/mod_test/controllers.py
@@ -21,6 +21,9 @@
 
 mod_test = Blueprint('test', __name__)
 
+CCEXTRACTOR_WIN_BINARY = 'ccextractor.exe'
+CCEXTRACTOR_LINUX_BINARY = 'ccextractor'
+
 
 @mod_test.before_app_request
 def before_app_request() -> None:
@@ -367,6 +370,7 @@ def generate_diff(test_id: int, regression_test_id: int, output_id: int, to_view
 
 
 @mod_test.route('/log-files/<test_id>')
+@login_required
 def download_build_log_file(test_id):
     """
     Serve download of build log.
@@ -464,18 +468,18 @@ def _artifact_redirect(blob_path, filename='artifact'):
 
 
 @mod_test.route('/<int:test_id>/binary', methods=['GET'])
+@login_required
 def download_binary(test_id):
-    """Download the ccextractor binary used in a test (linux or windows)."""
-    from run import storage_client_bucket
-    # Try linux name first, then windows
-    for name in ['ccextractor', 'ccextractor.exe']:
-        blob_path = f'test_artifacts/{test_id}/{name}'
-        if storage_client_bucket.blob(blob_path).exists():
-            return _artifact_redirect(blob_path, filename=name)
-    abort(404)
+    """Download the ccextractor binary used in a test."""
+    test = Test.query.filter(Test.id == test_id).first()
+    if test is None:
+        abort(404)
+    name = CCEXTRACTOR_LINUX_BINARY if test.platform == TestPlatform.linux else CCEXTRACTOR_WIN_BINARY
+    return _artifact_redirect(f'test_artifacts/{test_id}/{name}', filename=name)
 
 
 @mod_test.route('/<int:test_id>/coredump', methods=['GET'])
+@login_required
 def download_coredump(test_id):
     """Download the coredump from a test, if one was produced."""
     return _artifact_redirect(
@@ -485,6 +489,7 @@ def download_coredump(test_id):
 
 
 @mod_test.route('/<int:test_id>/combined-stdout', methods=['GET'])
+@login_required
 def download_combined_stdout(test_id):
     """Download the combined stdout/stderr log from all test invocations."""
     return _artifact_redirect(
@@ -494,6 +499,7 @@ def download_combined_stdout(test_id):
 
 
 @mod_test.route('/<int:test_id>/regression/<int:regression_test_id>/<int:output_id>/output-got', methods=['GET'])
+@login_required
 def download_output_got(test_id, regression_test_id, output_id):
     """Download the actual output file from TestResults using DB hash."""
     rf = TestResultFile.query.filter(and_(
@@ -511,6 +517,7 @@ def download_output_got(test_id, regression_test_id, output_id):
 
 
 @mod_test.route('/<int:test_id>/regression/<int:regression_test_id>/<int:output_id>/output-expected', methods=['GET'])
+@login_required
 def download_output_expected(test_id, regression_test_id, output_id):
     """Download the expected output file from TestResults using DB hash."""
     rf = TestResultFile.query.filter(and_(
@@ -526,8 +533,9 @@ def download_output_expected(test_id, regression_test_id, output_id):
         filename=f'output_expected_{regression_test_id}_{output_id}{ext}'
     )
 @mod_test.route('/<int:test_id>/sample/<int:sample_id>', methods=['GET'])
+@login_required
 def download_sample_ai(test_id, sample_id):
-    """Download the sample file for a regression test (no auth required for AI workflow)."""
+    """Download the sample file for a regression test."""
     from mod_sample.models import Sample
     sample = Sample.query.filter(Sample.id == sample_id).first()
     if sample is None:
@@ -538,75 +546,69 @@ def download_sample_ai(test_id, sample_id):
     )
 
 
-def _process_test_case(test_id, category_name, t_data):
-    """Helper function to process a single test case."""
+def _build_output_entry(test_id, rt, expected_output, result_files):
+    """Build a single output entry dict for the ai.json response."""
+    matched_rf = next(
+        (rf for rf in result_files
+         if rf.test_id != -1 and rf.regression_test_output_id == expected_output.id),
+        None
+    )
+
+    got_url = None
+    diff_url = None
+
+    if matched_rf and matched_rf.got is not None:
+        got_url = url_for(
+            '.download_output_got',
+            test_id=test_id,
+            regression_test_id=rt.id,
+            output_id=expected_output.id,
+            _external=True
+        )
+        diff_url = url_for(
+            '.generate_diff',
+            test_id=test_id,
+            regression_test_id=rt.id,
+            output_id=expected_output.id,
+            to_view=0,
+            _external=True
+        )
+
+    return {
+        'output_id': expected_output.id,
+        'correct_extension': expected_output.correct_extension,
+        'expected_url': url_for(
+            '.download_output_expected',
+            test_id=test_id,
+            regression_test_id=rt.id,
+            output_id=expected_output.id,
+            _external=True
+        ),
+        'got_url': got_url,
+        'diff_url': diff_url,
+    }
+
+
+def _process_test_case(test, category_name, t_data):
+    """Build a structured dict for a single test case in the ai.json response."""
     rt = t_data['test']
     result = t_data['result']
     is_error = t_data.get('error', False)
     result_files = t_data['files']
 
-    outputs = []
-    for expected_output in rt.output_files:
-        if expected_output.ignore:
-            continue
-        
-        matched_rf = None
-        for rf in result_files:
-            if rf.test_id != -1 and rf.regression_test_output_id == expected_output.id:
-                matched_rf = rf
-                break
-        
-        got_url = None
-        diff_url = None
-        
-        if matched_rf and matched_rf.got is not None:
-            got_url = url_for(
-                '.download_output_got',
-                test_id=test_id,
-                regression_test_id=rt.id,
-                output_id=expected_output.id,
-                _external=True
-            )
-            diff_url = url_for(
-                '.generate_diff',
-                test_id=test_id,
-                regression_test_id=rt.id,
-                output_id=expected_output.id,
-                to_view=0,
-                _external=True
-            )
-        else:
-            # If test passed, got and expected match exactly.
-            got_url = url_for(
-                '.download_output_expected',
-                test_id=test_id,
-                regression_test_id=rt.id,
-                output_id=expected_output.id,
-                _external=True
-            )
-        
-        output_entry = {
-            'output_id': expected_output.id,
-            'correct_extension': expected_output.correct_extension,
-            'expected_url': url_for(
-                '.download_output_expected',
-                test_id=test_id,
-                regression_test_id=rt.id,
-                output_id=expected_output.id,
-                _external=True
-            ),
-            'got_url': got_url,
-            'diff_url': diff_url,
-        }
-        outputs.append(output_entry)
+    outputs = [
+        _build_output_entry(test.id, rt, expected_output, result_files)
+        for expected_output in rt.output_files
+        if not expected_output.ignore
+    ]
 
-    return {
+    response_dict = {
         'regression_test_id': rt.id,
         'category': category_name,
         'sample_filename': rt.sample.original_name,
         'sample_url': url_for(
             '.download_sample_ai',
-            test_id=test_id,
+            test_id=test.id,
             sample_id=rt.sample.id,
             _external=True
         ),
@@ -616,11 +618,17 @@ def _process_test_case(test_id, category_name, t_data):
         'expected_exit_code': result.expected_rc if result else None,
         'runtime_ms': result.runtime if result else None,
         'outputs': outputs,
-        'how_to_reproduce': f'./ccextractor {rt.command} {rt.sample.original_name}',
     }
 
+    # Format the reproduction command based on platform
+    binary_name = f'./{CCEXTRACTOR_LINUX_BINARY}' if test.platform == TestPlatform.linux else CCEXTRACTOR_WIN_BINARY
+    response_dict['how_to_reproduce'] = f'{binary_name} {rt.command} {rt.sample.original_name}'
+
+    return response_dict
+
 
 @mod_test.route('/<int:test_id>/ai.json', methods=['GET'])
+@login_required
 def ai_json_endpoint(test_id):
     """Structured JSON with download URLs for all artifacts — for AI agents."""
     from run import storage_client_bucket
@@ -632,10 +640,8 @@ def ai_json_endpoint(test_id):
     def blob_exists(path):
         return storage_client_bucket.blob(path).exists()
 
-    has_binary = (
-        blob_exists(f'test_artifacts/{test_id}/ccextractor') or
-        blob_exists(f'test_artifacts/{test_id}/ccextractor.exe')
-    )
+    binary_name = CCEXTRACTOR_LINUX_BINARY if test.platform == TestPlatform.linux else CCEXTRACTOR_WIN_BINARY
+    has_binary = blob_exists(f'test_artifacts/{test_id}/{binary_name}')
     has_coredump = blob_exists(f'test_artifacts/{test_id}/coredump')
     has_combined_stdout = blob_exists(f'test_artifacts/{test_id}/combined_stdout.log')
 
@@ -653,7 +659,7 @@ def blob_exists(path):
             else:
                 passed += 1
 
-            test_cases.append(_process_test_case(test_id, category['category'].name, t_data))
+            test_cases.append(_process_test_case(test, category['category'].name, t_data))
 
     report = {
         'test_id': test.id,