Skip to content

Commit c730807

Browse files
Swarming: Calculates CF zip url in preprocess to be used in main (#5235)
The setups scripts rely on a set of values that form the `GS storage url` to download the latest CF .zip, these values either come from the env vars, or the metadata server. Since in swarming we don't have metadata server, we are calculating these values beforehand and sending them as env vars in the swarming request. ## Test Proofs These changes have been in `dev` [since last week](a609252). We have successfully use them to fix this issue: ``` Downloading ClusterFuzz source code. ++ '[' '' = 1 ']' ++ gsutil cp gs:///linux-3.zip clusterfuzz_package.zip ``` Allowing the code to tgarget the correct URL: ``` Downloading ClusterFuzz source code. ++ '[' '' = 1 ']' ++ gsutil cp gs://deployment.clusterfuzz-development.appspot.com/linux-3.zip clusterfuzz_package.zip ``` Note: The failing e2e check is known to be failing right now so we can ignore that --------- Co-authored-by: Diego Jardon <37823380+jardondiego@users.noreply.github.com>
1 parent f4e663c commit c730807

4 files changed

Lines changed: 109 additions & 21 deletions

File tree

src/clusterfuzz/_internal/swarming/__init__.py

Lines changed: 64 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from clusterfuzz._internal.base.feature_flags import FeatureFlags
2626
from clusterfuzz._internal.config import local_config
2727
from clusterfuzz._internal.datastore import data_types
28+
from clusterfuzz._internal.google_cloud_utils import compute_metadata
2829
from clusterfuzz._internal.google_cloud_utils import credentials
2930
from clusterfuzz._internal.metrics import logs
3031
from clusterfuzz._internal.protos import swarming_pb2
@@ -112,6 +113,68 @@ def _get_task_dimensions(job: data_types.Job, platform_specific_dimensions: list
112113
return task_dimensions
113114

114115

116+
def _append_metadata_env_var(
117+
env_vars: list[swarming_pb2.StringPair], # pylint: disable=no-member
118+
env_var_name: str,
119+
metadata_path: str) -> None:
120+
"""Attempts to get a variable from the environment or metadata and appends it.
121+
122+
Args:
123+
env_vars: The list of string pairs to append the retrieved value to.
124+
env_var_name: The name of the environment variable to check and set.
125+
metadata_path: The path in the compute metadata to check if not set in env.
126+
"""
127+
value = environment.get_value(env_var_name)
128+
if not value:
129+
try:
130+
value = compute_metadata.get(metadata_path)
131+
except Exception:
132+
pass
133+
134+
if value:
135+
env_vars.append(
136+
swarming_pb2.StringPair( # pylint: disable=no-member
137+
key=env_var_name, value=str(value)))
138+
else:
139+
logs.warning(f'{env_var_name} is not set or cannot be fetched.')
140+
141+
142+
def _get_env_vars(logs_project_id: str,
143+
instance_spec: dict) -> list[swarming_pb2.StringPair]: # pylint: disable=no-member
144+
"""Retrieve required environment variables from metadata and config."""
145+
default_task_environment = [
146+
swarming_pb2.StringPair(key='UWORKER', value='True'), # pylint: disable=no-member
147+
swarming_pb2.StringPair(key='SWARMING_BOT', value='True'), # pylint: disable=no-member
148+
swarming_pb2.StringPair(key='LOG_TO_GCP', value='True'), # pylint: disable=no-member
149+
swarming_pb2.StringPair(key='IS_K8S_ENV', value='True'), # pylint: disable=no-member
150+
swarming_pb2.StringPair( # pylint: disable=no-member
151+
key='LOGGING_CLOUD_PROJECT_ID',
152+
value=logs_project_id or ''),
153+
]
154+
155+
_append_metadata_env_var(default_task_environment, 'DEPLOYMENT_BUCKET',
156+
'project/attributes/deployment-bucket')
157+
_append_metadata_env_var(default_task_environment, 'HOST_JOB_SELECTION',
158+
'instance/attributes/host-job-selection')
159+
_append_metadata_env_var(default_task_environment, 'DEPLOYMENT_ZIP',
160+
'project/attributes/deployment-zip')
161+
162+
env_vars = []
163+
env_vars.append(
164+
swarming_pb2.StringPair( # pylint: disable=no-member
165+
key='DOCKER_IMAGE',
166+
value=instance_spec.get('docker_image', '')))
167+
168+
platform_specific_env = instance_spec.get('env', [])
169+
for var in platform_specific_env:
170+
env_vars.append(swarming_pb2.StringPair(key=var['key'], value=var['value'])) # pylint: disable=no-member
171+
172+
env_vars.append(_env_vars_to_json(default_task_environment))
173+
env_vars.extend(default_task_environment)
174+
175+
return env_vars
176+
177+
115178
def _env_vars_to_json(
116179
env_vars: list[swarming_pb2.StringPair]) -> swarming_pb2.StringPair: # pylint: disable=no-member
117180
"""
@@ -166,27 +229,7 @@ def create_new_task_request(command: str, job_name: str, download_url: str
166229
# env_prefixes allows the modification of existing environment variables by
167230
# adding the values as prefixes to the env variable.
168231
env_prefixes = instance_spec.get('env_prefixes', {})
169-
default_task_environment = [
170-
swarming_pb2.StringPair(key='UWORKER', value='True'), # pylint: disable=no-member
171-
swarming_pb2.StringPair(key='SWARMING_BOT', value='True'), # pylint: disable=no-member
172-
swarming_pb2.StringPair(key='LOG_TO_GCP', value='True'), # pylint: disable=no-member
173-
swarming_pb2.StringPair(key='IS_K8S_ENV', value='True'), # pylint: disable=no-member
174-
swarming_pb2.StringPair( # pylint: disable=no-member
175-
key='LOGGING_CLOUD_PROJECT_ID',
176-
value=logs_project_id),
177-
]
178-
179-
platform_specific_env = instance_spec.get('env', [])
180-
swarming_bot_environment = []
181-
swarming_bot_environment.append(
182-
swarming_pb2.StringPair( # pylint: disable=no-member
183-
key='DOCKER_IMAGE',
184-
value=instance_spec.get('docker_image', '')))
185-
for var in platform_specific_env:
186-
swarming_bot_environment.append(
187-
swarming_pb2.StringPair(key=var['key'], value=var['value'])) # pylint: disable=no-member
188-
swarming_bot_environment.append(_env_vars_to_json(default_task_environment))
189-
swarming_bot_environment.extend(default_task_environment)
232+
swarming_bot_environment = _get_env_vars(logs_project_id, instance_spec)
190233
dimensions = instance_spec.get('dimensions', [])
191234
cas_input_root = instance_spec.get('cas_input_root', {})
192235

src/clusterfuzz/_internal/tests/core/swarming/service_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ def setUp(self):
3131
'clusterfuzz._internal.swarming.create_new_task_request',
3232
'clusterfuzz._internal.base.tasks.task_utils.get_command_from_module',
3333
'clusterfuzz._internal.metrics.logs.error',
34+
'clusterfuzz._internal.google_cloud_utils.compute_metadata.get',
3435
])
3536
self.service = service.SwarmingService()
3637
self.mock.create_new_task_request.return_value = 'fake_request'
38+
self.mock.get.return_value = None
3739

3840
def test_create_utask_main_job_success(self):
3941
"""Test creating a single task successfully."""

src/clusterfuzz/_internal/tests/core/swarming/swarming_config_error_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@ class SwarmingConfigErrorTest(unittest.TestCase):
2929
def setUp(self):
3030
helpers.patch(self, [
3131
'clusterfuzz._internal.swarming.FeatureFlags',
32+
'clusterfuzz._internal.google_cloud_utils.compute_metadata.get',
3233
])
3334
helpers.patch_environ(self)
3435
self.mock.FeatureFlags.SWARMING_REMOTE_EXECUTION.enabled = True
36+
self.mock.get.return_value = None
3537

3638
def test_is_swarming_task_bad_config(self):
3739
"""Tests that is_swarming_task returns False when there's a BadConfigError."""

src/clusterfuzz/_internal/tests/core/swarming/swarming_test.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414
"""Swarming tests."""
1515
import base64
16+
import os
1617
import unittest
1718
from unittest import mock
1819

@@ -38,11 +39,17 @@ def setUp(self):
3839
'clusterfuzz._internal.google_cloud_utils.credentials.get_scoped_service_account_credentials',
3940
'google.auth.transport.requests.Request',
4041
'clusterfuzz._internal.swarming.FeatureFlags',
42+
'clusterfuzz._internal.google_cloud_utils.compute_metadata.get',
4143
])
4244
helpers.patch_environ(self)
4345
self.mock._get_task_name.return_value = 'task_name' # pylint: disable=protected-access
4446
self.mock.FeatureFlags.SWARMING_REMOTE_EXECUTION.enabled = True
47+
self.mock.get.return_value = None
4548
self.maxDiff = None
49+
os.environ.pop('DEPLOYMENT_ZIP', None)
50+
os.environ.pop('DEPLOYMENT_BUCKET', None)
51+
os.environ.pop('PROJECT_NAME', None)
52+
os.environ.pop('HOST_JOB_SELECTION', None)
4653

4754
def test_get_spec_from_config_with_docker_image(self):
4855
"""Tests that create_new_task_request works as expected."""
@@ -430,3 +437,37 @@ def test_get_task_dimensions_job_precedence(self):
430437
swarming_pb2.StringPair(key='key2', value='value2'),
431438
]
432439
self.assertCountEqual(dimensions, expected_dimensions)
440+
441+
def test_get_env_vars_with_metadata_server(self):
442+
"""Tests that _get_env_vars uses values from the metadata server when available."""
443+
444+
def metadata_get(path):
445+
if path == 'project/attributes/deployment-bucket':
446+
return 'test-bucket-from-metadata'
447+
return None
448+
449+
self.mock.get.side_effect = metadata_get
450+
instance_spec = {
451+
"docker_image": "gcr.io/clusterfuzz-images/base:a2f4dd6-202202070654"
452+
}
453+
env = swarming._get_env_vars('project_id', instance_spec) # pylint: disable=protected-access
454+
455+
expected_env = [
456+
swarming_pb2.StringPair(
457+
key='DOCKER_IMAGE',
458+
value='gcr.io/clusterfuzz-images/base:a2f4dd6-202202070654'),
459+
swarming_pb2.StringPair(
460+
key='DOCKER_ENV_VARS',
461+
value=
462+
'{"UWORKER": "True", "SWARMING_BOT": "True", "LOG_TO_GCP": "True", "IS_K8S_ENV": "True", "LOGGING_CLOUD_PROJECT_ID": "project_id", "DEPLOYMENT_BUCKET": "test-bucket-from-metadata"}'
463+
),
464+
swarming_pb2.StringPair(key='UWORKER', value='True'),
465+
swarming_pb2.StringPair(key='SWARMING_BOT', value='True'),
466+
swarming_pb2.StringPair(key='LOG_TO_GCP', value='True'),
467+
swarming_pb2.StringPair(key='IS_K8S_ENV', value='True'),
468+
swarming_pb2.StringPair(
469+
key='LOGGING_CLOUD_PROJECT_ID', value='project_id'),
470+
swarming_pb2.StringPair(
471+
key='DEPLOYMENT_BUCKET', value='test-bucket-from-metadata'),
472+
]
473+
self.assertEqual(env, expected_env)

0 commit comments

Comments
 (0)