diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1b8b1d951..a1baca654 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,69 +5,6 @@ on: pull_request: jobs: - format: - name: Format - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-node@v3 - with: - node-version: 14.x - - name: Set up Python 3.7 - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - run: npm ci - working-directory: ./frontend - - run: npm run check-ci - working-directory: ./frontend - env: - CI: true - - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: pip-${{ hashFiles('requirements.dev.txt') }} - restore-keys: | - pip- - - run: ./pre-commit.sh && git diff --exit-code - - install: - name: Install - runs-on: ${{ matrix.os }} - strategy: - matrix: - python-version: [3.7, 3.8, 3.9] - os: [ubuntu-20.04, macos-latest] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: pip-${{ hashFiles('requirements.txt') }}-${{ matrix.python-version }} - restore-keys: | - pip- - - run: pip install -e . - - run: cl - - test_frontend: - name: Test Frontend - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-node@v3 - with: - node-version: 14.x - - run: npm ci - working-directory: ./frontend - - run: npm test - working-directory: ./frontend - env: - CI: true - build: name: Build runs-on: ubuntu-latest @@ -94,7 +31,7 @@ jobs: # otherwise, "master" if on a push-triggered build VERSION: ${{ github.head_ref || 'master' }} SERVICE: ${{ matrix.service }} - + test_backend: name: Test backend runs-on: ubuntu-latest @@ -102,24 +39,20 @@ jobs: strategy: matrix: test: - - unittest gen-rest-docs gen-cli-docs gen-readthedocs basic auth status batch anonymous competition unicode rest1 upload1 upload2 upload3 upload4 download disk - - refs binary rm make worksheet_search worksheet_tags bundle_freeze_unfreeze worksheet_freeze_unfreeze detach perm search_time groups - - worker_manager service - - run time - - run2 - - search link read kill write mimic workers edit_user sharing_workers - - resources - - memoize - - copy - - netcat netcurl - - edit - - open wopen - - store_add parallel - runtime: [docker, kubernetes] - exclude: - # netcat / netcurl not supported for kubernetes. - - test: netcat netcurl - runtime: kubernetes + - {name: search read kill, index: 26} + - {name: search read kill, index: 27} + - {name: search read kill, index: 28} + - {name: search read kill, index: 29} + - {name: search read kill, index: 30} + - {name: search read kill, index: 31} + - {name: search read kill, index: 32} + - {name: search read kill, index: 33} + - {name: search read kill, index: 34} + - {name: search read kill, index: 35} + - {name: search read kill, index: 36} + - {name: search read kill, index: 37} + - {name: search read kill, index: 38} + runtime: [docker] steps: - name: Clear free space run: | @@ -149,350 +82,21 @@ jobs: python3 codalab_service.py start --services default --version ${VERSION} python3 test_runner.py --version ${VERSION} ${TEST} env: - TEST: ${{ matrix.test }} - VERSION: ${{ github.head_ref || 'master' }} - CODALAB_LINK_MOUNTS: /tmp - - uses: actions/setup-go@v3 - if: matrix.runtime == 'kubernetes' - with: - go-version: '1.18.1' - - name: Run tests using Kubernetes runtime - if: matrix.runtime == 'kubernetes' - run: | - sh ./tests/test-setup.sh - sh ./scripts/local-k8s/setup-ci.sh - python3 test_runner.py --version ${VERSION} ${TEST} - env: - TEST: ${{ matrix.test }} + TEST: ${{ matrix.test.name }} VERSION: ${{ github.head_ref || 'master' }} CODALAB_LINK_MOUNTS: /tmp - name: Save logs - if: (failure() || cancelled()) - run: | - mkdir /tmp/logs - for c in $(docker ps -a --format="{{.Names}}"); do docker logs $c > /tmp/logs/$c.log 2> /tmp/logs/$c.err.log; done - - name: Save kubernetes logs - if: (failure() || cancelled()) && matrix.runtime == 'kubernetes' - run: | - kubectl config use-context kind-codalab - kubectl cluster-info dump --output-directory /tmp/logs - - name: Upload logs - if: (failure() || cancelled()) - uses: actions/upload-artifact@v1 - with: - name: logs-test-${{ matrix.runtime }}-${{ matrix.test }} - path: /tmp/logs - - test_backend_on_worker_restart: - name: Test backend - on worker restart - runs-on: ubuntu-latest - needs: [build] - strategy: - matrix: - test: [run] - steps: - - name: Clear free space - run: | - sudo rm -rf /opt/ghc - df -h - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: pip-${{ hashFiles('requirements.txt') }} - restore-keys: | - pip- - - run: pip install -r requirements.txt - - name: Setup tests - run: | - sudo service mysql stop - python3 codalab_service.py build services --version ${VERSION} --pull - env: - VERSION: ${{ github.head_ref || 'master' }} - - name: Run tests - # Make sure restarting worker doesn't cause any issues (ie in serialization/deserialization) - run: | - python3 codalab_service.py start --services default --version ${VERSION} - docker restart codalab_worker_1 - python3 test_runner.py --version ${VERSION} ${TEST} - env: - TEST: ${{ matrix.test }} - VERSION: ${{ github.head_ref || 'master' }} - - name: Save logs - run: | - mkdir /tmp/logs - for c in $(docker ps -a --format="{{.Names}}"); do docker logs $c > /tmp/logs/$c.log 2> /tmp/logs/$c.err.log; done - - name: Upload logs - if: (failure() || cancelled()) - uses: actions/upload-artifact@v1 - with: - name: logs-test-${{ matrix.test }} - path: /tmp/logs - - test_backend_sharedfs: - name: Test backend - shared FS - runs-on: ubuntu-latest - needs: [build] - strategy: - matrix: - test: [run,run2,link read write kill resources] - steps: - - name: Clear free space - run: | - sudo rm -rf /opt/ghc - df -h - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: pip-${{ hashFiles('requirements.txt') }} - restore-keys: | - pip- - - run: pip install -r requirements.txt - - name: Setup tests - run: | - sudo service mysql stop - python3 codalab_service.py build services --version ${VERSION} --pull - env: - VERSION: ${{ github.head_ref || 'master' }} - - name: Run shared filesystem tests - run: | - sh ./tests/test-setup.sh - python3 codalab_service.py start --services default --version ${VERSION} --shared-file-system - python3 test_runner.py --version ${VERSION} ${TEST} - env: - TEST: ${{ matrix.test }} - VERSION: ${{ github.head_ref || 'master' }} - CODALAB_LINK_MOUNTS: /tmp - - name: Save logs - if: (failure() || cancelled()) - run: | - mkdir /tmp/logs - for c in $(docker ps -a --format="{{.Names}}"); do docker logs $c > /tmp/logs/$c.log 2> /tmp/logs/$c.err.log; done - - name: Upload logs - if: (failure() || cancelled()) - uses: actions/upload-artifact@v1 - with: - name: logs-test-sharedfs-${{ matrix.test }} - path: /tmp/logs - - test_backend_protected_mode: - name: Test backend - protected mode - runs-on: ubuntu-latest - needs: [build] - strategy: - matrix: - test: - - basic status batch anonymous unicode rest1 upload1 download disk - - refs binary rm make worksheet_search worksheet_tags bundle_freeze_unfreeze worksheet_freeze_unfreeze detach perm search_time groups - - run - - search read kill write mimic workers - - copy netcat - - protected_mode - steps: - - name: Clear free space - run: | - sudo rm -rf /opt/ghc - df -h - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: pip-${{ hashFiles('requirements.txt') }} - restore-keys: | - pip- - - run: pip install -r requirements.txt - - name: Setup tests - run: | - sudo service mysql stop - python3 codalab_service.py build services --version ${VERSION} --pull - env: - VERSION: ${{ github.head_ref || 'master' }} - - name: Run tests - run: | - python3 codalab_service.py start --services default --version ${VERSION} --protected-mode - python3 test_runner.py --version ${VERSION} ${TEST} - env: - TEST: ${{ matrix.test }} - VERSION: ${{ github.head_ref || 'master' }} - - name: Save logs - if: (failure() || cancelled()) - run: | - mkdir /tmp/logs - for c in $(docker ps -a --format="{{.Names}}"); do docker logs $c > /tmp/logs/$c.log 2> /tmp/logs/$c.err.log; done - - name: Upload logs - if: (failure() || cancelled()) - uses: actions/upload-artifact@v1 - with: - name: logs-test-protectedmode-${{ matrix.test }} - path: /tmp/logs - - test_backend_default_bundle_store: - name: Test backend - default bundle store - runs-on: ubuntu-latest - needs: [build] - strategy: - matrix: - test: - - default_bundle_store - steps: - - name: Clear free space - run: | - sudo rm -rf /opt/ghc - df -h - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: pip-${{ hashFiles('requirements.txt') }} - restore-keys: | - pip- - - run: pip install -r requirements.txt - - name: Setup tests - run: | - sudo service mysql stop - python3 codalab_service.py build services --version ${VERSION} --pull - env: - VERSION: ${{ github.head_ref || 'master' }} - - name: Run tests - run: | - CODALAB_DEFAULT_BUNDLE_STORE_NAME=store$(date +%s) python3 codalab_service.py start --services default --version ${VERSION} --protected-mode - python3 test_runner.py --version ${VERSION} ${TEST} - env: - TEST: ${{ matrix.test }} - VERSION: ${{ github.head_ref || 'master' }} - - name: Save logs - if: (failure() || cancelled()) + if: always() run: | mkdir /tmp/logs for c in $(docker ps -a --format="{{.Names}}"); do docker logs $c > /tmp/logs/$c.log 2> /tmp/logs/$c.err.log; done - name: Upload logs - if: (failure() || cancelled()) + if: always() uses: actions/upload-artifact@v1 with: - name: logs-test-${{ matrix.test }} + name: logs-test-docker-${{ matrix.test.name }}-${{ matrix.test.index }} path: /tmp/logs - test_backend_default_bundle_store_azure: - name: Test backend - use azure as default bundle store - runs-on: ubuntu-20.04 - needs: [build] - strategy: - matrix: - test: - - upload1 upload2 upload3 upload4 download - steps: - - name: Clear free space - run: | - sudo rm -rf /opt/ghc - df -h - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: pip-${{ hashFiles('requirements.txt') }} - restore-keys: | - pip- - - run: pip install -r requirements.txt - - run: pip install -e . - - name: Setup tests - run: | - sudo service mysql stop - python3 codalab_service.py build services --version ${VERSION} --pull - env: - VERSION: ${{ github.head_ref || 'master' }} - - name: Run tests - run: | - python3 codalab_service.py start --services default azurite --version ${VERSION} - sh ./tests/test-setup-default-store.sh - CODALAB_DEFAULT_BUNDLE_STORE_NAME=azure-store-default python3 codalab_service.py start --services default azurite --version ${VERSION} - python3 test_runner.py --version ${VERSION} ${TEST} - env: - TEST: ${{ matrix.test }} - VERSION: ${{ github.head_ref || 'master' }} - CODALAB_USERNAME: codalab - CODALAB_PASSWORD: codalab - - name: Save logs - if: (failure() || cancelled()) - run: | - mkdir /tmp/logs - for c in $(docker ps -a --format="{{.Names}}"); do docker logs $c > /tmp/logs/$c.log 2> /tmp/logs/$c.err.log; done - - name: Upload logs - if: (failure() || cancelled()) - uses: actions/upload-artifact@v1 - with: - name: logs-test-${{ matrix.test }} - path: /tmp/logs - - test_backend_preemptible_worker: - name: Test backend - preemptible workers - runs-on: ubuntu-latest - needs: [build] - strategy: - matrix: - test: - - preemptible - steps: - - name: Clear free space - run: | - sudo rm -rf /opt/ghc - df -h - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: pip-${{ hashFiles('requirements.txt') }} - restore-keys: | - pip- - - run: pip install -r requirements.txt - - run: pip install -e . - - name: Setup tests - run: | - sudo service mysql stop - python3 codalab_service.py build services --version ${VERSION} --pull - env: - VERSION: ${{ github.head_ref || 'master' }} - - name: Run tests - run: | - python3 codalab_service.py start --services default no-worker worker-preemptible --version ${VERSION} - sleep 20 - python3 codalab_service.py start --services worker-preemptible2 --version ${VERSION} - ./tests/test-setup-preemptible.sh - python3 test_runner.py --version ${VERSION} ${TEST} - env: - TEST: ${{ matrix.test }} - VERSION: ${{ github.head_ref || 'master' }} - CODALAB_USERNAME: codalab - CODALAB_PASSWORD: codalab - - name: Save logs - if: (failure() || cancelled()) - run: | - mkdir /tmp/logs - for c in $(docker ps -a --format="{{.Names}}"); do docker logs $c > /tmp/logs/$c.log 2> /tmp/logs/$c.err.log; done - - name: Upload logs - if: (failure() || cancelled()) - uses: actions/upload-artifact@v1 - with: - name: logs-test-${{ matrix.test }} - path: /tmp/logs test_backend_azure_blob: name: Test backend with Azure Blob Storage @@ -501,18 +105,19 @@ jobs: strategy: matrix: test: - - unittest gen-rest-docs gen-cli-docs gen-readthedocs basic auth status batch anonymous competition unicode rest1 upload1 upload2 upload3 upload4 download disk - - refs binary rm make worksheet_search worksheet_tags bundle_freeze_unfreeze worksheet_freeze_unfreeze detach perm search_time groups - - worker_manager service - - run time - - run2 - - search read kill write mimic workers edit_user sharing_workers - # - search link read kill write mimic workers edit_user sharing_workers - - resources - - memoize - - copy netcat netcurl - - edit blob - - open wopen + - {name: search read kill, index: 26} + - {name: search read kill, index: 27} + - {name: search read kill, index: 28} + - {name: search read kill, index: 29} + - {name: search read kill, index: 30} + - {name: search read kill, index: 31} + - {name: search read kill, index: 32} + - {name: search read kill, index: 33} + - {name: search read kill, index: 34} + - {name: search read kill, index: 35} + - {name: search read kill, index: 36} + - {name: search read kill, index: 37} + - {name: search read kill, index: 38} steps: - name: Clear free space run: | @@ -540,81 +145,18 @@ jobs: python3 codalab_service.py start --services default azurite --version ${VERSION} python3 test_runner.py --version ${VERSION} ${TEST} env: - TEST: ${{ matrix.test }} + TEST: ${{ matrix.test.name }} VERSION: ${{ github.head_ref || 'master' }} CODALAB_LINK_MOUNTS: /tmp CODALAB_ALWAYS_USE_AZURE_BLOB_BETA: 1 - name: Save logs - if: (failure() || cancelled()) - run: | - mkdir /tmp/logs - for c in $(docker ps -a --format="{{.Names}}"); do docker logs $c > /tmp/logs/$c.log 2> /tmp/logs/$c.err.log; done - - name: Upload logs - if: (failure() || cancelled()) - uses: actions/upload-artifact@v1 - with: - name: logs-test-azblob-${{ matrix.test }} - path: /tmp/logs - - test_ui: - name: End-to-end UI Tests - runs-on: ubuntu-latest - needs: [build] - strategy: - matrix: - test: [frontend] - steps: - - name: Clear free space - run: | - sudo rm -rf /opt/ghc - df -h - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: pip-${{ hashFiles('requirements.txt') }} - restore-keys: | - pip- - - run: pip install -r requirements.txt - - name: Setup tests - run: | - sudo service mysql stop - python3 codalab_service.py build services --version ${VERSION} --pull - env: - VERSION: ${{ github.head_ref || 'master' }} - - name: Run tests - run: | - python3 codalab_service.py start --services default --version ${VERSION} - docker exec codalab_rest-server_1 /bin/bash -c "python3 scripts/create_sample_worksheet.py --test-print" - python3 test_runner.py --version ${VERSION} ${TEST} - env: - TEST: ${{ matrix.test }} - VERSION: ${{ github.head_ref || 'master' }} - - name: Upload screenshots on failure - uses: actions/upload-artifact@v1 - if: failure() - with: - name: screenshots-test-${{ matrix.test }} - path: tests/ui - - name: Save logs - if: (failure() || cancelled()) + if: always() run: | mkdir /tmp/logs for c in $(docker ps -a --format="{{.Names}}"); do docker logs $c > /tmp/logs/$c.log 2> /tmp/logs/$c.err.log; done - name: Upload logs - if: (failure() || cancelled()) + if: always() uses: actions/upload-artifact@v1 with: - name: logs-test-${{ matrix.test }} + name: logs-test-azblob-docker-${{ matrix.test.name }}-${{ matrix.test.index }} path: /tmp/logs - - ci: - name: All CI tasks complete - runs-on: ubuntu-latest - needs: [format, install, test_frontend, build, test_backend, test_backend_on_worker_restart, test_backend_sharedfs, test_backend_protected_mode, test_ui] - steps: - - uses: actions/checkout@v3 - - run: echo Done diff --git a/codalab/model/bundle_model.py b/codalab/model/bundle_model.py index 4224ac1d0..6fa138dd9 100644 --- a/codalab/model/bundle_model.py +++ b/codalab/model/bundle_model.py @@ -917,6 +917,9 @@ def transition_bundle_staged(self, bundle): Returns False if the bundle was not in STARTING state. Clears the job_handle metadata and removes the worker_run row. """ + logger.error("IN TRANSTIOIN BUNDLE STAGED FOR SOME REASON ... THIS MIGHT BE WHY METADATA GETTING WIPED") + logger.error(f"{bundle.metadata.to_dict()}") + logger.error(f"state: {bundle.to_dict()['state']}") with self.engine.begin() as connection: # Make sure it's still starting. row = connection.execute( @@ -928,11 +931,11 @@ def transition_bundle_staged(self, bundle): # Reset all metadata fields that aren't input by user from RunBundle class to be None. # Excluding all the fields that can be set by users, which for now is just the "actions" field. # Excluding the "created" field to keep track of the original date when the bundle is created - metadata_update = { - spec.key: None - for spec in RunBundle.METADATA_SPECS - if spec.generated and spec.key not in ['actions', 'created'] - } + # metadata_update = { + # spec.key: None + # for spec in RunBundle.METADATA_SPECS + # if spec.generated and spec.key not in ['actions', 'created'] + # } metadata_update[ 'staged_status' ] = "Bundle's dependencies are all ready. Waiting for the bundle to be assigned to a worker to be run." @@ -1050,9 +1053,13 @@ def transition_bundle_running(self, bundle, worker_run, row, user_id, worker_id, RunStage.UPLOADING_RESULTS ]['elapsed'] + logger.error("-"*80) + logger.error("in transition_bundle_running") + logger.error(f"metadata_update:{metadata_update}") self.update_bundle( bundle, {'state': worker_run.state, 'metadata': metadata_update}, connection ) + logger.error(f"bundle metadata: {bundle.metadata.to_dict()}") return True @@ -1108,15 +1115,23 @@ def transition_bundle_finalizing(self, bundle, worker_run, user_id, connection): failure_message = 'Exit code %d' % exitcode # Build metadata + logger.error("-"*80) + logger.error("In transition_bundle_finalizing...") + logger.error(worker_run.as_dict) metadata = {} if failure_message is not None: metadata['failure_message'] = failure_message if exitcode is not None: metadata['exitcode'] = exitcode + + logger.error(f"failure_message: {failure_message}") + logger.error(f"metadata: {metadata}") bundle_update = {'state': State.FINALIZING, 'metadata': metadata} self.update_bundle(bundle, bundle_update, connection) + logger.error(f"bundle: {bundle.to_dict()}") + logger.error(f"bundle metadata: {bundle.metadata.to_dict()}") return True def transition_bundle_finished(self, bundle, bundle_location): @@ -1125,10 +1140,15 @@ def transition_bundle_finished(self, bundle, bundle_location): The final state is determined by whether a failure message or exitcode was recorded during finalization of the bundle. """ + logger.error("-"*80) + logger.error("In transition_bundle_finished") metadata = bundle.metadata.to_dict() + logger.error(f"metadata for bundle {bundle.uuid}: {metadata}") failure_message = metadata.get('failure_message', None) exitcode = metadata.get('exitcode', 0) + actions = metadata.get('actions', None) state = State.FAILED if failure_message or exitcode else State.READY + #if (failure_message and 'Kill requested' in failure_message) or (actions and 'kill' in actions): if failure_message and 'Kill requested' in failure_message: state = State.KILLED @@ -1245,6 +1265,7 @@ def update_bundle(self, bundle, update, connection=None, delete=False): This method validates all updates to the bundle, so it is appropriate to use this method to update bundles based on user input (eg: cl edit). """ + logger.error(f"in update bundle for bundle {bundle.uuid}") message = 'Illegal update: %s' % (update,) precondition('id' not in update and 'uuid' not in update, message) # Apply the column and metadata updates in memory and validate the result. @@ -1290,6 +1311,8 @@ def update_bundle(self, bundle, update, connection=None, delete=False): # Perform the actual updates and deletes. def do_update(connection): try: + logger.error(f"BUNDLE UPDATE: {update}") + logger.error(f"BUNDLE METADATA UPDATE: {metadata_update}") if update: connection.execute(cl_bundle.update().where(clause).values(update)) if metadata_update: diff --git a/codalab/model/worker_model.py b/codalab/model/worker_model.py index 746e66928..d312338b6 100644 --- a/codalab/model/worker_model.py +++ b/codalab/model/worker_model.py @@ -406,7 +406,7 @@ def send_json_message_with_unix_socket( if not success: # Shouldn't be too expensive just to keep retrying. # TODO: maybe exponential backoff - logging.error("Sleeping for 0.1 seconds.") + logging.error("Sleeping for 0.3 seconds.") time.sleep(0.3) continue diff --git a/codalab/rest/workers.py b/codalab/rest/workers.py index 9dcd5cf9f..dfafaac5d 100644 --- a/codalab/rest/workers.py +++ b/codalab/rest/workers.py @@ -46,6 +46,7 @@ def checkin(worker_id): for run in request.json["runs"]: try: worker_run = BundleCheckinState.from_dict(run) + logger.error(f"worker run: {worker_run}") bundle = local.model.get_bundle(worker_run.uuid) local.model.bundle_checkin( bundle, worker_run, request.user.user_id, worker_id, diff --git a/codalab/server/bundle_manager.py b/codalab/server/bundle_manager.py index e341762e2..723e7f5e4 100644 --- a/codalab/server/bundle_manager.py +++ b/codalab/server/bundle_manager.py @@ -371,6 +371,7 @@ def _restage_stuck_starting_bundles(self, workers): ): # Run message went missing. logger.info('Re-staging run bundle %s', bundle.uuid) if self._model.transition_bundle_staged(bundle): + logger.error("transition staged from restage stuck starting") workers.restage(bundle.uuid) def _acknowledge_recently_finished_bundles(self, workers): @@ -394,9 +395,15 @@ def _acknowledge_recently_finished_bundles(self, workers): ) bundle_location = self._bundle_store.get_bundle_location(bundle.uuid) # TODO(Ashwin): fix this -- bundle location could be linked. + logger.error("-"*80) + logger.error(bundle.to_dict()) self._model.transition_bundle_finished(bundle, bundle_location) +<<<<<<< HEAD + logger.info("finished acknowledge recently finished bundles.") +======= else: logger.info(f"Bundle {bundle.uuid} could not be finalized.") +>>>>>>> 5e9725499fb644b85c31ff041ec7d85d02fe5918 def _bring_offline_stuck_running_bundles(self, workers): """ diff --git a/requirements.txt b/requirements.txt index 735a14a72..17b6b5754 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,6 +32,7 @@ selenium==3.141.0 requests==2.25.0 azure-storage-blob==12.4.0 azure-core==1.8.0 +msrest==0.6.21 sentry-sdk==1.14.0 requests-oauthlib==1.1.0 oauthlib==2.1.0 diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index ba381995c..b4c397997 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -195,7 +195,7 @@ def wait_until_state(uuid, expected_state, timeout_seconds=1000, exclude_final_s uuid, expected_state, current_state ) ) - time.sleep(0.5) + time.sleep(1) def wait_for_contents(uuid, substring, timeout_seconds=1000): @@ -2005,9 +2005,9 @@ def test_run(ctx): # Test that bundle fails when run without sufficient time quota time_used = int(_run_command([cl, 'uinfo', 'codalab', '-f', 'time_used'])) _run_command([cl, 'uedit', 'codalab', '--time-quota', str(time_used + 2)]) - uuid = _run_command([cl, 'run', 'sleep 100000']) + uuid = _run_command([cl, 'run', 'sleep 100000;']) wait_until_state(uuid, State.RUNNING) - wait_until_state(uuid, State.KILLED, timeout_seconds=120) + wait_until_state(uuid, State.KILLED, timeout_seconds=500) check_equals( 'Kill requested: User time quota exceeded. To apply for more quota,' ' please visit the following link: ' @@ -2401,7 +2401,7 @@ def test_read(ctx): @TestModule.register('kill') def test_kill(ctx): - uuid = _run_command([cl, 'run', 'while true; do sleep 100; done']) + uuid = _run_command([cl, 'run', 'sleep 100000;']) wait_until_state(uuid, State.RUNNING) check_equals(uuid, _run_command([cl, 'kill', uuid])) wait_until_state(uuid, State.KILLED)