Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
636 changes: 533 additions & 103 deletions codalab/migration.py

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions codalab/run-migration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

# 5min timeout
timeout_duration=300
# default location migration script writes to
filename='/home/azureuser/codalab-worksheets/var/codalab/home/bundle_ids_0.csv'
while read line
do
# Command to run
command_to_run="python migration.py -t blob-prod -u $line -p 1"

timeout -k 20 $timeout_duration $command_to_run
exit_status=$?
if [ $exit_status -eq 124 ]; then
echo "Process took too long. Killing the process for bundle $line..."
fi
# skips header row
done < <(tail -n +2 $filename)
17 changes: 17 additions & 0 deletions codalab/worker/file_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import subprocess
import bz2
import hashlib
import tarfile
import stat

from codalab.common import BINARY_PLACEHOLDER, UsageError
Expand Down Expand Up @@ -466,6 +467,22 @@ def read_file_section(file_path, offset, length):
return fileobj.read(length)


def read_file_section_gzip(bundle_path, file_name, offset, length):
"""
TODO: UNSAFE

Given a tar.gz file, reads length bytes of given file_name from the
given offset.
Return bytes.
"""
with OpenFile(bundle_path, 'rb', gzipped=True) as bundle:
tf = tarfile.open(fileobj=bundle, mode='r:gz')
member = tf.getmember(file_name)
fileobj = tf.extractfile(member)
fileobj.seek(offset, os.SEEK_SET)
return fileobj.read(length)


def summarize_file(file_path, num_head_lines, num_tail_lines, max_line_length, truncation_text):
"""
Summarizes the file at the given path, returning a string containing the
Expand Down
25 changes: 25 additions & 0 deletions log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import logging

logger = logging.getLogger('simple_example')
logger.setLevel(logging.DEBUG)
# create file handler that logs debug and higher level messages
fh = logging.FileHandler('spam.log')
fh.setLevel(logging.DEBUG)
# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setLevel(logging.ERROR)
# create formatter and add it to the handlers
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
fh.setFormatter(formatter)
# add the handlers to logger
logger.addHandler(ch)
logger.addHandler(fh)

# 'application' code
logger.debug('debug message')
logger.info('info message')
logger.warn('warn message')
logger.error('error message')
logger.critical('critical message')
17 changes: 17 additions & 0 deletions log2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.FileHandler('example.log')
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

# 'application' code
logger.debug('debug message')
logger.info('info message')
logger.warning('warn message')
logger.error('error message')
logger.critical('critical message')

import pdb; pdb.set_trace()
7 changes: 7 additions & 0 deletions my-test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from scripts.test_util import Timer
import time

with Timer(timeout_seconds=3):
while True:
print("hello")
time.sleep(0.5)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ websockets==9.1
kubernetes==12.0.1
google-cloud-storage==2.0.0
httpio==0.3.0
pandas==1.1.5
6 changes: 2 additions & 4 deletions scripts/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def cleanup(cl, tag, should_wait=True):
print('Removed {} bundles and {} worksheets.'.format(bundles_removed, worksheets_removed))



class Timer:
"""
Class that uses signal to interrupt functions while they're running
Expand Down Expand Up @@ -219,10 +220,7 @@ def __enter__(self):
self.start_time = time.time()
if self.handle_timeouts:
signal.signal(signal.SIGALRM, self.handle_timeout)
signal.setitimer(signal.ITIMER_REAL, self.timeout_seconds, self.timeout_seconds)

# now, reset itimer.
signal.setitimer(signal.ITIMER_REAL, 0, 0)
signal.alarm(self.timeout_seconds)

def __exit__(self, type, value, traceback):
self.time_elapsed = time.time() - self.start_time
Expand Down
Loading