Merge branch 'main' into jgadling/more-tests

chanzuckerberg · Feb 8, 2024 · e32518c · e32518c
2 parents c17c011 + 34d0246
commit e32518c
Show file tree

Hide file tree

Showing 37 changed files with 335 additions and 354 deletions.
diff --git a/.github/workflows/staging-deploy.yaml b/.github/workflows/staging-deploy.yaml
@@ -0,0 +1,48 @@
+name: Deploy to staging
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - ".happy/**"
+      - "api_server/**"
+
+# https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-amazon-web-services
+permissions:
+  id-token: write
+  contents: read
+
+jobs:
+  deploy-to-staging:
+    concurrency:
+      group: staging-cryoet-api-deploy-${{ github.ref }}
+      cancel-in-progress: true
+
+    name: deploy staging branch
+    runs-on: [ARM64, self-hosted, Linux]
+    environment: staging
+    if: github.repository == 'chanzuckerberg/cryoet-data-portal-backend'
+    steps:
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          mask-aws-account-id: true
+          aws-region: ${{ secrets.AWS_REGION }}
+          role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
+          role-duration-seconds: 1200
+      - name: Create or update stack
+        uses: chanzuckerberg/github-actions/.github/actions/deploy-happy-stack@v1.24.0
+        env:
+          # Force using BuildKit instead of normal Docker, required so that metadata
+          # is written/read to allow us to use layers of previous builds as cache.
+          DOCKER_BUILDKIT: 1
+          COMPOSE_DOCKER_CLI_BUILD: 1
+          DOCKER_REPO: ${{ secrets.ECR_REPO }}/
+          ENV: staging
+        with:
+          stack-name: "graphql"
+          create-tag: true
+          tfe-token: ${{ secrets.TFE_TOKEN }}
+          env: staging
+          operation: create-or-update
diff --git a/.gitignore b/.gitignore
@@ -157,4 +157,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
diff --git a/.happy/terraform/envs/dev/main.tf b/.happy/terraform/envs/dev/main.tf
@@ -14,6 +14,7 @@ module "stack" {
   deployment_stage = "dev"
   stack_prefix     = "/${var.stack_name}"
   k8s_namespace    = var.k8s_namespace
+  platform_architecture = "arm64"
   additional_env_vars = {
     HASURA_GRAPHQL_ENABLE_CONSOLE = "false"
     HASURA_GRAPHQL_DATABASE_URL = data.aws_ssm_parameter.db_url.value

diff --git a/.happy/terraform/envs/prod/main.tf b/.happy/terraform/envs/prod/main.tf
@@ -14,6 +14,7 @@ module "stack" {
   deployment_stage = "prod"
   stack_prefix     = "/${var.stack_name}"
   k8s_namespace    = var.k8s_namespace
+  platform_architecture = "arm64"
   additional_env_vars = {
     HASURA_GRAPHQL_ENABLE_CONSOLE = "false"
     HASURA_GRAPHQL_DATABASE_URL = data.aws_ssm_parameter.db_url.value

diff --git a/.happy/terraform/envs/staging/main.tf b/.happy/terraform/envs/staging/main.tf
@@ -14,6 +14,7 @@ module "stack" {
   deployment_stage = "staging"
   stack_prefix     = "/${var.stack_name}"
   k8s_namespace    = var.k8s_namespace
+  platform_architecture = "arm64"
   additional_env_vars = {
     HASURA_GRAPHQL_ENABLE_CONSOLE = "false"
     HASURA_GRAPHQL_DATABASE_URL = data.aws_ssm_parameter.db_url.value

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,14 +3,28 @@ repos:
   rev: 23.9.1
   hooks:
   - id: black
-    files: backend|client/python
 - repo: https://github.com/astral-sh/ruff-pre-commit
   rev: v0.0.292
   hooks:
   - id: ruff
-    files: backend|client/python
     args:
     - --fix
+      #- repo: https://github.com/pre-commit/mirrors-mypy
+      #  rev: "v1.4.1"
+      #  hooks:
+      #  - id: mypy
+      #    exclude: "packages/frontend"
+      #    # entry: mypy --config-file ingestion_tools/pyproject.toml
+      #    args: ["--config-file", "ingestion_tools/pyproject.toml"]
+      #    # args: [--config-file=./packages/backend/pyproject.toml]
+      #    # entry: mypy --config-file packages/backend/pyproject.toml
+      #    # entry: bash -c 'cd packages/backend && mypy --config-file pyproject.toml .'
+      #    additional_dependencies:
+      #      [
+      #        pydantic,
+      #        pydantic-settings,
+      #        click,
+      #      ]
 - repo: https://github.com/pre-commit/pre-commit-hooks
   rev: v4.5.0
   hooks:

diff --git a/ingestion_tools/pyproject.toml b/ingestion_tools/pyproject.toml
@@ -68,10 +68,17 @@ select = [
 ignore = [
     "E501",  # line too long
     "C408",  # rewrite empty built-ins as literals
+    "T201",  # print statements.
+    "DTZ007", # Datetime objects without timezones.
+    "DTZ005", # More datetimes without timezones.
 ]
 line-length = 120
 target-version = "py39"
 
+[tool.ruff.lint.per-file-ignores]
+# Ignore `SIM115` (not using open() in a context manager) since all calls to this method *do* use a context manager.
+"scripts/common/fs.py" = ["SIM115"]
+
 [tool.ruff.isort]
 known-first-party =["common"]
 

diff --git a/ingestion_tools/scripts/common/config.py b/ingestion_tools/scripts/common/config.py
@@ -1,3 +1,4 @@
+import contextlib
 import csv
 import os
 import os.path
@@ -112,10 +113,8 @@ def load_map_files(self) -> None:
     def load_run_metadata_file(self, file_attr: str) -> dict[str, Any]:
         mapdata = {}
         map_filename = None
-        try:
+        with contextlib.suppress(AttributeError):
             map_filename = getattr(self, file_attr)
-        except AttributeError:
-            pass
         if not map_filename:
             return mapdata
         with self.fs.open(f"{self.input_path}/{map_filename}", "r") as tsvfile:
@@ -130,10 +129,8 @@ def load_run_metadata_file(self, file_attr: str) -> dict[str, Any]:
     def load_run_csv_file(self, file_attr: str) -> dict[str, Any]:
         mapdata = {}
         map_filename = None
-        try:
+        with contextlib.suppress(AttributeError):
             map_filename = getattr(self, file_attr)
-        except AttributeError:
-            pass
         if not map_filename:
             return mapdata
         with self.fs.open(f"{self.input_path}/{map_filename}", "r") as csvfile:
@@ -181,7 +178,7 @@ def get_run_data_map(self, run_name: str) -> dict[str, Any]:
         return {}
 
     def expand_string(self, run_name: str, string_template: Any) -> int | float | str:
-        if type(string_template) != str:
+        if not isinstance(string_template, str):
             return string_template
         if run_data := self.get_run_data_map(run_name):
             string_template = string_template.format(**run_data)
@@ -195,16 +192,16 @@ def expand_string(self, run_name: str, string_template: Any) -> int | float | st
 
     def expand_metadata(self, run_name: str, metadata_dict: dict[str, Any]) -> dict[str, Any]:
         for k, v in metadata_dict.items():
-            if type(v) == str:
+            if isinstance(v, str):
                 metadata_dict[k] = self.expand_string(run_name, v)
-            elif (type(v)) == dict:
+            elif isinstance(v, dict):
                 metadata_dict[k] = self.expand_metadata(run_name, v)
-            elif (type(v)) == list:
+            elif isinstance(v, list):
                 for idx in range(len(v)):
                     # Note - we're not supporting deeply nested lists,
                     # but we don't need to with our current data model.
                     item = v[idx]
-                    if type(item) == str:
+                    if isinstance(item, str):
                         v[idx] = self.expand_string(run_name, item)
         return metadata_dict
 
@@ -267,10 +264,8 @@ def glob_files(self, obj: BaseImporter, globstring: str) -> list[str]:
         if not globstring:
             return []
         globvars = run.get_glob_vars()
-        try:
+        with contextlib.suppress(ValueError):
             globvars["int_run_name"] = int(run.run_name)
-        except ValueError:
-            pass
         expanded_glob = os.path.join(self.dataset_root_dir, globstring.format(**globvars))
         results = self.fs.glob(expanded_glob)
         if not results:

diff --git a/ingestion_tools/scripts/common/fs.py b/ingestion_tools/scripts/common/fs.py
@@ -1,3 +1,4 @@
+import contextlib
 import glob
 import os
 import os.path
@@ -99,10 +100,8 @@ def push(self, path: str) -> None:
         remote_file = os.path.relpath(path, self.tmpdir)
         src_size = os.path.getsize(path)
         dest_size = 0
-        try:
+        with contextlib.suppress(FileNotFoundError):
             dest_size = self.s3fs.size(remote_file)
-        except FileNotFoundError:
-            pass
         if src_size == dest_size:
             if self.force_overwrite:
                 print(f"Forcing re-upload of {path}")

diff --git a/ingestion_tools/scripts/common/image.py b/ingestion_tools/scripts/common/image.py
@@ -2,7 +2,7 @@
 import os
 import os.path
 from datetime import datetime
-from typing import Any, List, Callable
+from typing import Any, Callable, List
 
 import mrcfile
 import numpy as np
@@ -34,9 +34,7 @@ def __init__(self, fs: FileSystemApi, mrc_filename: str, header_only: bool = Fal
             self.mrc_filename = fs.read_block(mrc_filename)
         else:
             self.mrc_filename = fs.localreadable(mrc_filename)
-        with mrcfile.open(
-                self.mrc_filename, permissive=True, header_only=header_only
-        ) as mrc:
+        with mrcfile.open(self.mrc_filename, permissive=True, header_only=header_only) as mrc:
             if mrc.data is None and not header_only:
                 raise Exception("missing mrc data")
             self.header = mrc.header
@@ -83,7 +81,6 @@ def pyramid_to_mrc(
             print(f"skipping remote push for {filename}")
         return mrcfiles
 
-
     def pyramid_to_omezarr(
         self,
         fs: FileSystemApi,
@@ -120,7 +117,7 @@ def update_headers(self, mrcfile: MrcFile, header_mapper, voxel_spacing):
         header.cella.y = isotropic_voxel_size * data.shape[1]
         header.cella.z = isotropic_voxel_size * data.shape[0]
         header.label[0] = "{0:40s}{1:>39s}".format("Validated by cryoET data portal.", time)
-        header.rms = np.sqrt(np.mean((data - np.mean(data))**2))
+        header.rms = np.sqrt(np.mean((data - np.mean(data)) ** 2))
         header.extra1 = self.header.extra1
         header.extra2 = self.header.extra2
 
@@ -130,7 +127,7 @@ def update_headers(self, mrcfile: MrcFile, header_mapper, voxel_spacing):
             header.exttyp = self.header.exttyp
         else:
             header.nsymbt = np.array(0, dtype="i4")
-            header.exttyp = np.array(b'MRCO', dtype="S4")
+            header.exttyp = np.array(b"MRCO", dtype="S4")
 
         if header_mapper:
             header_mapper(header)
@@ -168,10 +165,12 @@ def get_tomo_metadata(
     scales = []
     size: dict[str, float] = {}
     omezarr_dir = fs.destformat(f"{output_prefix}.zarr")
-    zarrinfo = json.loads(open(fs.localreadable(os.path.join(omezarr_dir, ".zattrs")), "r").read())
+    with open(fs.localreadable(os.path.join(omezarr_dir, ".zattrs")), "r") as fh:
+        zarrinfo = json.loads(fh.read())
     multiscales = zarrinfo["multiscales"][0]["datasets"]
     for scale in multiscales:
-        scaleinfo = json.loads(open(fs.localreadable(os.path.join(omezarr_dir, scale["path"], ".zarray")), "r").read())
+        with open(fs.localreadable(os.path.join(omezarr_dir, scale["path"], ".zarray")), "r") as fh:
+            scaleinfo = json.loads(fh.read())
         shape = scaleinfo["shape"]
         dims = {"z": shape[0], "y": shape[1], "x": shape[2]}
         if not size:
@@ -196,26 +195,19 @@ def get_header(fs: FileSystemApi, tomo_filename: str) -> MrcObject:
 
 
 def scale_mrcfile(
-        fs: FileSystemApi,
-        output_prefix: str,
-        tomo_filename: str,
-        scale_z_axis: bool = True,
-        write_mrc: bool = True,
-        write_zarr: bool = True,
-        header_mapper: Callable[[np.array], None] = None,
-        voxel_spacing=None,
+    fs: FileSystemApi,
+    output_prefix: str,
+    tomo_filename: str,
+    scale_z_axis: bool = True,
+    write_mrc: bool = True,
+    write_zarr: bool = True,
+    header_mapper: Callable[[np.array], None] = None,
+    voxel_spacing=None,
 ):
     tc = TomoConverter(fs, tomo_filename)
     pyramid = tc.make_pyramid(scale_z_axis=scale_z_axis)
     _ = tc.pyramid_to_omezarr(fs, pyramid, f"{output_prefix}.zarr", write_zarr)
-    _ = tc.pyramid_to_mrc(
-        fs,
-        pyramid,
-        f"{output_prefix}.mrc",
-        write_mrc,
-        header_mapper,
-        voxel_spacing
-    )
+    _ = tc.pyramid_to_mrc(fs, pyramid, f"{output_prefix}.mrc", write_mrc, header_mapper, voxel_spacing)
 
 
 def scale_maskfile(

diff --git a/ingestion_tools/scripts/common/make_key_image.py b/ingestion_tools/scripts/common/make_key_image.py
@@ -75,13 +75,13 @@ def generate_preview(
 
     # Don't explode if we can't find an annotations file.
     def wrapiterator(iterator):
-      while True:
-        try:
-          yield next(iterator)
-        except StopIteration:
-          break
-        except Exception as e:
-            print(f"Ignoring missing annotation for keyframe generation: {e}")
+        while True:
+            try:
+                yield next(iterator)
+            except StopIteration:
+                break
+            except Exception as e:
+                print(f"Ignoring missing annotation for keyframe generation: {e}")
 
     for i, annotation in wrapiterator(enumerate(annotations)):
         color = cmap(i)

diff --git a/ingestion_tools/scripts/common/metadata.py b/ingestion_tools/scripts/common/metadata.py
@@ -1,6 +1,6 @@
+import contextlib
 import os
 import re
-from typing import Any, Dict
 
 from common.formats import tojson
 from common.fs import FileSystemApi
@@ -49,10 +49,8 @@ class AnnotationMetadata(MergedMetadata):
     def get_filename_prefix(self, output_dir: str, identifier: int) -> str:
         version = self.metadata["version"]
         obj = None
-        try:
+        with contextlib.suppress(KeyError):
             obj = self.metadata["annotation_object"]["description"]
-        except KeyError:
-            pass
         if not obj:
             obj = self.metadata["annotation_object"]["name"]
         dest_filename = os.path.join(
@@ -62,7 +60,7 @@ def get_filename_prefix(self, output_dir: str, identifier: int) -> str:
                     str(identifier),
                     re.sub("[^0-9a-z]", "_", obj.lower()),
                     re.sub("[^0-9a-z.]", "_", f"{version.lower()}"),
-                ]
+                ],
             ),
         )
         return dest_filename
diff --git a/ingestion_tools/scripts/common/normalize_fields.py b/ingestion_tools/scripts/common/normalize_fields.py
@@ -1,8 +1,8 @@
 def normalize_fiducial_alignment(status):
     # Grant jensen configs use true/false
-    if status == True:
+    if status is True:
         return "FIDUCIAL"
-    if status == False:
+    if status is False:
         return "NON_FIDUCIAL"
     # Everybody else uses proper values
     if status.upper() in ["FIDUCIAL", "NON_FIDUCIAL"]: