Skip to content

Commit

Permalink
Add type hints to the transcribe_file script
Browse files Browse the repository at this point in the history
  • Loading branch information
replaceafill committed Aug 21, 2024
1 parent 8742013 commit 7a5e938
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 64 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,14 @@ module = [
"src.MCPClient.lib.clientScripts.identify_file_format",
"src.MCPClient.lib.clientScripts.normalize",
"src.MCPClient.lib.clientScripts.policy_check",
"src.MCPClient.lib.clientScripts.transcribe_file",
"src.MCPClient.lib.clientScripts.validate_file",
"tests.MCPClient.conftest",
"tests.MCPClient.test_characterize_file",
"tests.MCPClient.test_identify_file_format",
"tests.MCPClient.test_normalize",
"tests.MCPClient.test_policy_check",
"tests.MCPClient.test_transcribe_file",
"tests.MCPClient.test_validate_file",
]
check_untyped_defs = true
Expand Down
2 changes: 1 addition & 1 deletion src/MCPClient/lib/clientScripts/characterize_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def main(job: Job, file_uuid: uuid.UUID, sip_uuid: uuid.UUID) -> int:


def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Identify file formats.")
parser = argparse.ArgumentParser(description="Characterize file.")
parser.add_argument("file_uuid", type=uuid.UUID)
parser.add_argument("sip_uuid", type=uuid.UUID)

Expand Down
2 changes: 1 addition & 1 deletion src/MCPClient/lib/clientScripts/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ def main(job: Job, opts: NormalizeArgs) -> int:


def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Identify file formats.")
parser = argparse.ArgumentParser(description="Normalize.")
parser.add_argument(
"purpose", type=str, help='"preservation", "access", "thumbnail"'
)
Expand Down
75 changes: 54 additions & 21 deletions src/MCPClient/lib/clientScripts/transcribe_file.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
#!/usr/bin/env python
import argparse
import dataclasses
import multiprocessing
import os
from uuid import uuid4
import uuid
from typing import List
from typing import Optional
from typing import Sequence
from typing import Tuple

import django
from django.db import transaction

django.setup()
# dashboard

import databaseFunctions
import fileOperations

# archivematicaCommon
from client.job import Job
from dicts import ReplacementDict
from django.conf import settings as mcpclient_settings
from django.core.exceptions import ValidationError
from django.db import transaction
from django.utils import timezone
from executeOrRunSubProcess import executeOrRun
from fpr.models import FPRule
Expand All @@ -24,19 +29,27 @@
from main.models import FileFormatVersion


def concurrent_instances():
@dataclasses.dataclass
class TranscribeFileArgs:
task_uuid: uuid.UUID
file_uuid: uuid.UUID


def concurrent_instances() -> int:
return multiprocessing.cpu_count()


def insert_transcription_event(status, file_uuid, rule, relative_location):
def insert_transcription_event(
status: int, file_uuid: uuid.UUID, rule: FPRule, relative_location: str
) -> str:
outcome = "transcribed" if status == 0 else "not transcribed"

tool = rule.command.tool
event_detail = 'program={}; version={}; command="{}"'.format(
tool.description, tool.version, rule.command.command.replace('"', r"\"")
)

event_uuid = str(uuid4())
event_uuid = str(uuid.uuid4())

databaseFunctions.insertIntoEvents(
fileUUID=file_uuid,
Expand All @@ -51,9 +64,14 @@ def insert_transcription_event(status, file_uuid, rule, relative_location):


def insert_file_into_database(
task_uuid, file_uuid, sip_uuid, event_uuid, rule, output_path, relative_path
):
transcription_uuid = str(uuid4())
task_uuid: uuid.UUID,
file_uuid: uuid.UUID,
sip_uuid: str,
event_uuid: str,
output_path: str,
relative_path: str,
) -> None:
transcription_uuid = str(uuid.uuid4())
today = timezone.now()
fileOperations.addFileToSIP(
relative_path,
Expand All @@ -66,7 +84,7 @@ def insert_file_into_database(
)

fileOperations.updateSizeAndChecksum(
transcription_uuid, output_path, today, str(uuid4())
transcription_uuid, output_path, today, str(uuid.uuid4())
)

databaseFunctions.insertIntoDerivations(
Expand All @@ -76,17 +94,18 @@ def insert_file_into_database(
)


def fetch_rules_for(file_):
def fetch_rules_for(file_: File) -> Sequence[FPRule]:
try:
format = FileFormatVersion.objects.get(file_uuid=file_)
return FPRule.active.filter(
result: Sequence[FPRule] = FPRule.active.filter(
format=format.format_version, purpose="transcription"
)
return result
except (FileFormatVersion.DoesNotExist, ValidationError):
return []


def fetch_rules_for_derivatives(file_):
def fetch_rules_for_derivatives(file_: File) -> Tuple[Optional[File], Sequence[FPRule]]:
derivs = Derivation.objects.filter(source_file=file_)
for deriv in derivs:
derived_file = deriv.derived_file
Expand All @@ -101,7 +120,7 @@ def fetch_rules_for_derivatives(file_):
return None, []


def main(job, task_uuid, file_uuid):
def main(job: Job, task_uuid: uuid.UUID, file_uuid: uuid.UUID) -> int:
setup_dicts(mcpclient_settings)

succeeded = True
Expand Down Expand Up @@ -163,19 +182,33 @@ def main(job, task_uuid, file_uuid):
file_uuid,
rd["%SIPUUID%"],
event,
rule,
output_path,
relative_path,
)

return 0 if succeeded else 1


def call(jobs):
def get_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Transcribe file.")
parser.add_argument("task_uuid", type=uuid.UUID)
parser.add_argument("file_uuid", type=uuid.UUID)

return parser


def parse_args(parser: argparse.ArgumentParser, job: Job) -> TranscribeFileArgs:
namespace = parser.parse_args(job.args[1:])

return TranscribeFileArgs(**vars(namespace))


def call(jobs: List[Job]) -> None:
parser = get_parser()

with transaction.atomic():
for job in jobs:
with job.JobContext():
task_uuid = job.args[1]
file_uuid = job.args[2]
args = parse_args(parser, job)

job.set_status(main(job, task_uuid, file_uuid))
job.set_status(main(job, args.task_uuid, args.file_uuid))
Loading

0 comments on commit 7a5e938

Please sign in to comment.