Skip to content

Commit

Permalink
Merge branch 'main' into jgadling/more-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jgadling authored Feb 8, 2024
2 parents c17c011 + 34d0246 commit e32518c
Show file tree
Hide file tree
Showing 37 changed files with 335 additions and 354 deletions.
48 changes: 48 additions & 0 deletions .github/workflows/staging-deploy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: Deploy to staging

on:
push:
branches:
- main
paths:
- ".happy/**"
- "api_server/**"

# https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-amazon-web-services
permissions:
id-token: write
contents: read

jobs:
deploy-to-staging:
concurrency:
group: staging-cryoet-api-deploy-${{ github.ref }}
cancel-in-progress: true

name: deploy staging branch
runs-on: [ARM64, self-hosted, Linux]
environment: staging
if: github.repository == 'chanzuckerberg/cryoet-data-portal-backend'
steps:
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
mask-aws-account-id: true
aws-region: ${{ secrets.AWS_REGION }}
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
role-duration-seconds: 1200
- name: Create or update stack
uses: chanzuckerberg/github-actions/.github/actions/deploy-happy-stack@v1.24.0
env:
# Force using BuildKit instead of normal Docker, required so that metadata
# is written/read to allow us to use layers of previous builds as cache.
DOCKER_BUILDKIT: 1
COMPOSE_DOCKER_CLI_BUILD: 1
DOCKER_REPO: ${{ secrets.ECR_REPO }}/
ENV: staging
with:
stack-name: "graphql"
create-tag: true
tfe-token: ${{ secrets.TFE_TOKEN }}
env: staging
operation: create-or-update
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,4 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/
1 change: 1 addition & 0 deletions .happy/terraform/envs/dev/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ module "stack" {
deployment_stage = "dev"
stack_prefix = "/${var.stack_name}"
k8s_namespace = var.k8s_namespace
platform_architecture = "arm64"
additional_env_vars = {
HASURA_GRAPHQL_ENABLE_CONSOLE = "false"
HASURA_GRAPHQL_DATABASE_URL = data.aws_ssm_parameter.db_url.value
Expand Down
1 change: 1 addition & 0 deletions .happy/terraform/envs/prod/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ module "stack" {
deployment_stage = "prod"
stack_prefix = "/${var.stack_name}"
k8s_namespace = var.k8s_namespace
platform_architecture = "arm64"
additional_env_vars = {
HASURA_GRAPHQL_ENABLE_CONSOLE = "false"
HASURA_GRAPHQL_DATABASE_URL = data.aws_ssm_parameter.db_url.value
Expand Down
1 change: 1 addition & 0 deletions .happy/terraform/envs/staging/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ module "stack" {
deployment_stage = "staging"
stack_prefix = "/${var.stack_name}"
k8s_namespace = var.k8s_namespace
platform_architecture = "arm64"
additional_env_vars = {
HASURA_GRAPHQL_ENABLE_CONSOLE = "false"
HASURA_GRAPHQL_DATABASE_URL = data.aws_ssm_parameter.db_url.value
Expand Down
18 changes: 16 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,28 @@ repos:
rev: 23.9.1
hooks:
- id: black
files: backend|client/python
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.292
hooks:
- id: ruff
files: backend|client/python
args:
- --fix
#- repo: https://github.com/pre-commit/mirrors-mypy
# rev: "v1.4.1"
# hooks:
# - id: mypy
# exclude: "packages/frontend"
# # entry: mypy --config-file ingestion_tools/pyproject.toml
# args: ["--config-file", "ingestion_tools/pyproject.toml"]
# # args: [--config-file=./packages/backend/pyproject.toml]
# # entry: mypy --config-file packages/backend/pyproject.toml
# # entry: bash -c 'cd packages/backend && mypy --config-file pyproject.toml .'
# additional_dependencies:
# [
# pydantic,
# pydantic-settings,
# click,
# ]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
Expand Down
7 changes: 7 additions & 0 deletions ingestion_tools/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,17 @@ select = [
ignore = [
"E501", # line too long
"C408", # rewrite empty built-ins as literals
"T201", # print statements.
"DTZ007", # Datetime objects without timezones.
"DTZ005", # More datetimes without timezones.
]
line-length = 120
target-version = "py39"

[tool.ruff.lint.per-file-ignores]
# Ignore `SIM115` (not using open() in a context manager) since all calls to this method *do* use a context manager.
"scripts/common/fs.py" = ["SIM115"]

[tool.ruff.isort]
known-first-party =["common"]

Expand Down
23 changes: 9 additions & 14 deletions ingestion_tools/scripts/common/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import csv
import os
import os.path
Expand Down Expand Up @@ -112,10 +113,8 @@ def load_map_files(self) -> None:
def load_run_metadata_file(self, file_attr: str) -> dict[str, Any]:
mapdata = {}
map_filename = None
try:
with contextlib.suppress(AttributeError):
map_filename = getattr(self, file_attr)
except AttributeError:
pass
if not map_filename:
return mapdata
with self.fs.open(f"{self.input_path}/{map_filename}", "r") as tsvfile:
Expand All @@ -130,10 +129,8 @@ def load_run_metadata_file(self, file_attr: str) -> dict[str, Any]:
def load_run_csv_file(self, file_attr: str) -> dict[str, Any]:
mapdata = {}
map_filename = None
try:
with contextlib.suppress(AttributeError):
map_filename = getattr(self, file_attr)
except AttributeError:
pass
if not map_filename:
return mapdata
with self.fs.open(f"{self.input_path}/{map_filename}", "r") as csvfile:
Expand Down Expand Up @@ -181,7 +178,7 @@ def get_run_data_map(self, run_name: str) -> dict[str, Any]:
return {}

def expand_string(self, run_name: str, string_template: Any) -> int | float | str:
if type(string_template) != str:
if not isinstance(string_template, str):
return string_template
if run_data := self.get_run_data_map(run_name):
string_template = string_template.format(**run_data)
Expand All @@ -195,16 +192,16 @@ def expand_string(self, run_name: str, string_template: Any) -> int | float | st

def expand_metadata(self, run_name: str, metadata_dict: dict[str, Any]) -> dict[str, Any]:
for k, v in metadata_dict.items():
if type(v) == str:
if isinstance(v, str):
metadata_dict[k] = self.expand_string(run_name, v)
elif (type(v)) == dict:
elif isinstance(v, dict):
metadata_dict[k] = self.expand_metadata(run_name, v)
elif (type(v)) == list:
elif isinstance(v, list):
for idx in range(len(v)):
# Note - we're not supporting deeply nested lists,
# but we don't need to with our current data model.
item = v[idx]
if type(item) == str:
if isinstance(item, str):
v[idx] = self.expand_string(run_name, item)
return metadata_dict

Expand Down Expand Up @@ -267,10 +264,8 @@ def glob_files(self, obj: BaseImporter, globstring: str) -> list[str]:
if not globstring:
return []
globvars = run.get_glob_vars()
try:
with contextlib.suppress(ValueError):
globvars["int_run_name"] = int(run.run_name)
except ValueError:
pass
expanded_glob = os.path.join(self.dataset_root_dir, globstring.format(**globvars))
results = self.fs.glob(expanded_glob)
if not results:
Expand Down
5 changes: 2 additions & 3 deletions ingestion_tools/scripts/common/fs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import glob
import os
import os.path
Expand Down Expand Up @@ -99,10 +100,8 @@ def push(self, path: str) -> None:
remote_file = os.path.relpath(path, self.tmpdir)
src_size = os.path.getsize(path)
dest_size = 0
try:
with contextlib.suppress(FileNotFoundError):
dest_size = self.s3fs.size(remote_file)
except FileNotFoundError:
pass
if src_size == dest_size:
if self.force_overwrite:
print(f"Forcing re-upload of {path}")
Expand Down
42 changes: 17 additions & 25 deletions ingestion_tools/scripts/common/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import os.path
from datetime import datetime
from typing import Any, List, Callable
from typing import Any, Callable, List

import mrcfile
import numpy as np
Expand Down Expand Up @@ -34,9 +34,7 @@ def __init__(self, fs: FileSystemApi, mrc_filename: str, header_only: bool = Fal
self.mrc_filename = fs.read_block(mrc_filename)
else:
self.mrc_filename = fs.localreadable(mrc_filename)
with mrcfile.open(
self.mrc_filename, permissive=True, header_only=header_only
) as mrc:
with mrcfile.open(self.mrc_filename, permissive=True, header_only=header_only) as mrc:
if mrc.data is None and not header_only:
raise Exception("missing mrc data")
self.header = mrc.header
Expand Down Expand Up @@ -83,7 +81,6 @@ def pyramid_to_mrc(
print(f"skipping remote push for {filename}")
return mrcfiles


def pyramid_to_omezarr(
self,
fs: FileSystemApi,
Expand Down Expand Up @@ -120,7 +117,7 @@ def update_headers(self, mrcfile: MrcFile, header_mapper, voxel_spacing):
header.cella.y = isotropic_voxel_size * data.shape[1]
header.cella.z = isotropic_voxel_size * data.shape[0]
header.label[0] = "{0:40s}{1:>39s}".format("Validated by cryoET data portal.", time)
header.rms = np.sqrt(np.mean((data - np.mean(data))**2))
header.rms = np.sqrt(np.mean((data - np.mean(data)) ** 2))
header.extra1 = self.header.extra1
header.extra2 = self.header.extra2

Expand All @@ -130,7 +127,7 @@ def update_headers(self, mrcfile: MrcFile, header_mapper, voxel_spacing):
header.exttyp = self.header.exttyp
else:
header.nsymbt = np.array(0, dtype="i4")
header.exttyp = np.array(b'MRCO', dtype="S4")
header.exttyp = np.array(b"MRCO", dtype="S4")

if header_mapper:
header_mapper(header)
Expand Down Expand Up @@ -168,10 +165,12 @@ def get_tomo_metadata(
scales = []
size: dict[str, float] = {}
omezarr_dir = fs.destformat(f"{output_prefix}.zarr")
zarrinfo = json.loads(open(fs.localreadable(os.path.join(omezarr_dir, ".zattrs")), "r").read())
with open(fs.localreadable(os.path.join(omezarr_dir, ".zattrs")), "r") as fh:
zarrinfo = json.loads(fh.read())
multiscales = zarrinfo["multiscales"][0]["datasets"]
for scale in multiscales:
scaleinfo = json.loads(open(fs.localreadable(os.path.join(omezarr_dir, scale["path"], ".zarray")), "r").read())
with open(fs.localreadable(os.path.join(omezarr_dir, scale["path"], ".zarray")), "r") as fh:
scaleinfo = json.loads(fh.read())
shape = scaleinfo["shape"]
dims = {"z": shape[0], "y": shape[1], "x": shape[2]}
if not size:
Expand All @@ -196,26 +195,19 @@ def get_header(fs: FileSystemApi, tomo_filename: str) -> MrcObject:


def scale_mrcfile(
fs: FileSystemApi,
output_prefix: str,
tomo_filename: str,
scale_z_axis: bool = True,
write_mrc: bool = True,
write_zarr: bool = True,
header_mapper: Callable[[np.array], None] = None,
voxel_spacing=None,
fs: FileSystemApi,
output_prefix: str,
tomo_filename: str,
scale_z_axis: bool = True,
write_mrc: bool = True,
write_zarr: bool = True,
header_mapper: Callable[[np.array], None] = None,
voxel_spacing=None,
):
tc = TomoConverter(fs, tomo_filename)
pyramid = tc.make_pyramid(scale_z_axis=scale_z_axis)
_ = tc.pyramid_to_omezarr(fs, pyramid, f"{output_prefix}.zarr", write_zarr)
_ = tc.pyramid_to_mrc(
fs,
pyramid,
f"{output_prefix}.mrc",
write_mrc,
header_mapper,
voxel_spacing
)
_ = tc.pyramid_to_mrc(fs, pyramid, f"{output_prefix}.mrc", write_mrc, header_mapper, voxel_spacing)


def scale_maskfile(
Expand Down
14 changes: 7 additions & 7 deletions ingestion_tools/scripts/common/make_key_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,13 @@ def generate_preview(

# Don't explode if we can't find an annotations file.
def wrapiterator(iterator):
while True:
try:
yield next(iterator)
except StopIteration:
break
except Exception as e:
print(f"Ignoring missing annotation for keyframe generation: {e}")
while True:
try:
yield next(iterator)
except StopIteration:
break
except Exception as e:
print(f"Ignoring missing annotation for keyframe generation: {e}")

for i, annotation in wrapiterator(enumerate(annotations)):
color = cmap(i)
Expand Down
8 changes: 3 additions & 5 deletions ingestion_tools/scripts/common/metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import contextlib
import os
import re
from typing import Any, Dict

from common.formats import tojson
from common.fs import FileSystemApi
Expand Down Expand Up @@ -49,10 +49,8 @@ class AnnotationMetadata(MergedMetadata):
def get_filename_prefix(self, output_dir: str, identifier: int) -> str:
version = self.metadata["version"]
obj = None
try:
with contextlib.suppress(KeyError):
obj = self.metadata["annotation_object"]["description"]
except KeyError:
pass
if not obj:
obj = self.metadata["annotation_object"]["name"]
dest_filename = os.path.join(
Expand All @@ -62,7 +60,7 @@ def get_filename_prefix(self, output_dir: str, identifier: int) -> str:
str(identifier),
re.sub("[^0-9a-z]", "_", obj.lower()),
re.sub("[^0-9a-z.]", "_", f"{version.lower()}"),
]
],
),
)
return dest_filename
4 changes: 2 additions & 2 deletions ingestion_tools/scripts/common/normalize_fields.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
def normalize_fiducial_alignment(status):
# Grant jensen configs use true/false
if status == True:
if status is True:
return "FIDUCIAL"
if status == False:
if status is False:
return "NON_FIDUCIAL"
# Everybody else uses proper values
if status.upper() in ["FIDUCIAL", "NON_FIDUCIAL"]:
Expand Down
Loading

0 comments on commit e32518c

Please sign in to comment.