Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New migration #4576

Open
wants to merge 28 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e45bd44
Update file
epicfaace Nov 15, 2023
0112d99
Rm excessive sanity check
epicfaace Nov 15, 2023
fa7b43e
Updates
epicfaace Nov 15, 2023
2cacce9
max result
epicfaace Nov 15, 2023
569cbdd
update
epicfaace Nov 15, 2023
3eff718
Updates
epicfaace Nov 15, 2023
287292b
added to script
AndrewJGaut Nov 20, 2023
e2e7315
Changing logging
AndrewJGaut Nov 21, 2023
bfbe026
tried adding better logging
AndrewJGaut Nov 21, 2023
129602b
Data tracking is working pretty well now; just need toa dd last condi…
AndrewJGaut Nov 22, 2023
0ff92dc
Appears to be working
AndrewJGaut Nov 22, 2023
d22519c
some changes
AndrewJGaut Nov 22, 2023
1546886
Add in new timer that actually works
AndrewJGaut Nov 29, 2023
8cf7ac9
minor changes
AndrewJGaut Dec 3, 2023
0801e98
add pands
AndrewJGaut Dec 3, 2023
8aeea17
merge conflict
HidyHan Dec 3, 2023
a63e383
migration script fixes
HidyHan Dec 4, 2023
2005fde
merge conflict
HidyHan Dec 5, 2023
36509ff
initial changes
dma1dma1 Dec 6, 2023
6c64e56
more changes
dma1dma1 Dec 6, 2023
8751ae4
Add in tar gzip directory
AndrewJGaut Dec 8, 2023
0ac581d
Adds shell script to kill timeout process. Uploads one bundle at a ti…
HidyHan Dec 11, 2023
f213477
fixes a weird typing typo
HidyHan Dec 15, 2023
eff98bf
More changes
dma1dma1 Jan 16, 2024
ace4110
Merge branch 'new-migration' into metadata_only_migration
dma1dma1 Jan 16, 2024
c3be5d1
All migration changes
dma1dma1 Jan 28, 2024
c7bc2ab
Sanity Check for Directories Added
dma1dma1 Jan 28, 2024
51bbd25
Merge pull request #4589 from codalab/sanity_check
dma1dma1 Jan 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
636 changes: 533 additions & 103 deletions codalab/migration.py

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions codalab/run-migration.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

# 5min timeout
timeout_duration=300
# default location migration script writes to
filename='/home/azureuser/codalab-worksheets/var/codalab/home/bundle_ids_0.csv'
while read line
do
# Command to run
command_to_run="python migration.py -t blob-prod -u $line -p 1"

timeout -k 20 $timeout_duration $command_to_run
exit_status=$?
if [ $exit_status -eq 124 ]; then
echo "Process took too long. Killing the process for bundle $line..."
fi
# skips header row
done < <(tail -n +2 $filename)
17 changes: 17 additions & 0 deletions codalab/worker/file_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import subprocess
import bz2
import hashlib
import tarfile
import stat

from codalab.common import BINARY_PLACEHOLDER, UsageError
Expand Down Expand Up @@ -466,6 +467,22 @@ def read_file_section(file_path, offset, length):
return fileobj.read(length)


def read_file_section_gzip(bundle_path, file_name, offset, length):
"""
TODO: UNSAFE

Given a tar.gz file, reads length bytes of given file_name from the
given offset.
Return bytes.
"""
with OpenFile(bundle_path, 'rb', gzipped=True) as bundle:
tf = tarfile.open(fileobj=bundle, mode='r:gz')
member = tf.getmember(file_name)
fileobj = tf.extractfile(member)
fileobj.seek(offset, os.SEEK_SET)
return fileobj.read(length)


def summarize_file(file_path, num_head_lines, num_tail_lines, max_line_length, truncation_text):
"""
Summarizes the file at the given path, returning a string containing the
Expand Down
25 changes: 25 additions & 0 deletions log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import logging

logger = logging.getLogger('simple_example')
logger.setLevel(logging.DEBUG)
# create file handler that logs debug and higher level messages
fh = logging.FileHandler('spam.log')
fh.setLevel(logging.DEBUG)
# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setLevel(logging.ERROR)
# create formatter and add it to the handlers
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
fh.setFormatter(formatter)
# add the handlers to logger
logger.addHandler(ch)
logger.addHandler(fh)

# 'application' code
logger.debug('debug message')
logger.info('info message')
logger.warn('warn message')
logger.error('error message')
logger.critical('critical message')
17 changes: 17 additions & 0 deletions log2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.FileHandler('example.log')
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)

# 'application' code
logger.debug('debug message')
logger.info('info message')
logger.warning('warn message')
logger.error('error message')
logger.critical('critical message')

import pdb; pdb.set_trace()
7 changes: 7 additions & 0 deletions my-test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from scripts.test_util import Timer
import time

with Timer(timeout_seconds=3):
while True:
print("hello")
time.sleep(0.5)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ websockets==9.1
kubernetes==12.0.1
google-cloud-storage==2.0.0
httpio==0.3.0
pandas==1.1.5
6 changes: 2 additions & 4 deletions scripts/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def cleanup(cl, tag, should_wait=True):
print('Removed {} bundles and {} worksheets.'.format(bundles_removed, worksheets_removed))



class Timer:
"""
Class that uses signal to interrupt functions while they're running
Expand Down Expand Up @@ -219,10 +220,7 @@ def __enter__(self):
self.start_time = time.time()
if self.handle_timeouts:
signal.signal(signal.SIGALRM, self.handle_timeout)
signal.setitimer(signal.ITIMER_REAL, self.timeout_seconds, self.timeout_seconds)

# now, reset itimer.
signal.setitimer(signal.ITIMER_REAL, 0, 0)
signal.alarm(self.timeout_seconds)

def __exit__(self, type, value, traceback):
self.time_elapsed = time.time() - self.start_time
Expand Down
Loading
Loading