Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Epimeta export rewrite #922

Merged
merged 19 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
275 changes: 187 additions & 88 deletions hawc/apps/epimeta/exports.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,158 @@
import pandas as pd

from ..common.exports import Exporter, ModelExport
from ..common.helper import FlatFileExporter
from ..study.models import Study
from . import models
from ..common.models import sql_display, sql_format, str_m2m
from ..epi.exports import ResultMetricExport
from ..study.exports import StudyExport
from . import constants


class MetaProtocolExport(ModelExport):
def get_value_map(self):
return {
"pk": "pk",
"url": "url",
"name": "name",
"protocol_type": "protocol_type",
"lit_search_strategy": "lit_search_strategy",
"lit_search_notes": "lit_search_notes",
"lit_search_start_date": "lit_search_start_date",
"lit_search_end_date": "lit_search_end_date",
"total_references": "total_references",
"inclusion_criteria": "inclusion_criteria",
"exclusion_criteria": "exclusion_criteria",
"total_studies_identified": "total_studies_identified",
"notes": "notes",
}

def get_annotation_map(self, query_prefix):
return {
"url": sql_format("/epi-meta/protocol/{}/", query_prefix + "id"), # hardcoded URL
"protocol_type": sql_display(query_prefix + "protocol_type", constants.MetaProtocol),
"lit_search_strategy": sql_display(
query_prefix + "lit_search_strategy", constants.MetaLitSearch
),
"inclusion_criteria": str_m2m(query_prefix + "inclusion_criteria__description"),
"exclusion_criteria": str_m2m(query_prefix + "exclusion_criteria__description"),
}

def prepare_df(self, df):
for key in [
self.get_column_name("lit_search_start_date"),
self.get_column_name("lit_search_end_date"),
]:
if key in df.columns:
df.loc[:, key] = df[key].apply(lambda x: x.isoformat() if not pd.isna(x) else x)
return df


class MetaResultExport(ModelExport):
def get_value_map(self):
return {
"pk": "pk",
"url": "url",
"label": "label",
"data_location": "data_location",
"health_outcome": "health_outcome",
"health_outcome_notes": "health_outcome_notes",
"exposure_name": "exposure_name",
"exposure_details": "exposure_details",
"number_studies": "number_studies",
"statistical_metric": "metric__metric",
"statistical_notes": "statistical_notes",
"n": "n",
"estimate": "estimate",
"lower_ci": "lower_ci",
"upper_ci": "upper_ci",
"ci_units": "ci_units",
"heterogeneity": "heterogeneity",
"adjustment_factors": "adjustment_factors_str",
"notes": "notes",
}

def get_annotation_map(self, query_prefix):
return {
"url": sql_format("/epi-meta/result/{}/", query_prefix + "id"), # hardcoded URL
"adjustment_factors_str": str_m2m(query_prefix + "adjustment_factors__description"),
}


class SingleResultExport(ModelExport):
def get_value_map(self):
return {
"pk": "pk",
"study": "study_id",
"exposure_name": "exposure_name",
"weight": "weight",
"n": "n",
"estimate": "estimate",
"lower_ci": "lower_ci",
"upper_ci": "upper_ci",
"ci_units": "ci_units",
"notes": "notes",
}


class EpiMetaExporter(Exporter):
def build_modules(self) -> list[ModelExport]:
return [
StudyExport("study", "protocol__study"),
MetaProtocolExport("meta_protocol", "protocol"),
MetaResultExport("meta_result", ""),
SingleResultExport("single_result", "single_results"),
]


class EpiMetaDataPivotExporter(Exporter):
def build_modules(self) -> list[ModelExport]:
return [
StudyExport(
"study",
"protocol__study",
include=(
"id",
"short_citation",
"published",
),
),
MetaProtocolExport(
"meta_protocol",
"protocol",
include=(
"pk",
"name",
"protocol_type",
"total_references",
"total_studies_identified",
),
),
MetaResultExport(
"meta_result",
"",
include=(
"pk",
"label",
"health_outcome",
"exposure_name",
"number_studies",
"n",
"estimate",
"lower_ci",
"upper_ci",
"ci_units",
"heterogeneity",
),
),
ResultMetricExport(
"metric",
"metric",
include=(
"name",
"abbreviation",
),
),
]


class MetaResultFlatComplete(FlatFileExporter):
Expand All @@ -9,36 +161,8 @@ class MetaResultFlatComplete(FlatFileExporter):
epidemiological meta-result study type from scratch.
"""

def _get_header_row(self):
header = []
header.extend(Study.flat_complete_header_row())
header.extend(models.MetaProtocol.flat_complete_header_row())
header.extend(models.MetaResult.flat_complete_header_row())
header.extend(models.SingleResult.flat_complete_header_row())
return header

def _get_data_rows(self):
rows = []
identifiers_df = Study.identifiers_df(self.queryset, "protocol__study_id")
for obj in self.queryset:
ser = obj.get_json(json_encode=False)
row = []
row.extend(Study.flat_complete_data_row(ser["protocol"]["study"], identifiers_df))
row.extend(models.MetaProtocol.flat_complete_data_row(ser["protocol"]))
row.extend(models.MetaResult.flat_complete_data_row(ser))

if len(ser["single_results"]) == 0:
# print one-row with no single-results
row.extend([None] * 10)
rows.append(row)
else:
# print each single-result as a new row
for sr in ser["single_results"]:
row_copy = list(row) # clone
row_copy.extend(models.SingleResult.flat_complete_data_row(sr))
rows.append(row_copy)

return rows
def build_df(self) -> pd.DataFrame:
return EpiMetaExporter().get_df(self.queryset)


class MetaResultFlatDataPivot(FlatFileExporter):
Expand All @@ -49,60 +173,35 @@ class MetaResultFlatDataPivot(FlatFileExporter):
Note: data pivot does not currently include study confidence. Could be added if needed.
"""

def _get_header_row(self):
return [
"study id",
"study name",
"study published",
"protocol id",
"protocol name",
"protocol type",
"total references",
"identified references",
"key",
"meta result id",
"meta result label",
"health outcome",
"exposure",
"result references",
"statistical metric",
"statistical metric abbreviation",
"N",
"estimate",
"lower CI",
"upper CI",
"CI units",
"heterogeneity",
]
def build_df(self) -> pd.DataFrame:
df = EpiMetaDataPivotExporter().get_df(self.queryset)

df["key"] = df["meta_result-pk"]

def _get_data_rows(self):
rows = []
for obj in self.queryset:
ser = obj.get_json(json_encode=False)
row = [
ser["protocol"]["study"]["id"],
ser["protocol"]["study"]["short_citation"],
ser["protocol"]["study"]["published"],
ser["protocol"]["id"],
ser["protocol"]["name"],
ser["protocol"]["protocol_type"],
ser["protocol"]["total_references"],
ser["protocol"]["total_studies_identified"],
ser["id"], # repeat for data-pivot key
ser["id"],
ser["label"],
ser["health_outcome"],
ser["exposure_name"],
ser["number_studies"],
ser["metric"]["metric"],
ser["metric"]["abbreviation"],
ser["n"],
ser["estimate"],
ser["lower_ci"],
ser["upper_ci"],
ser["ci_units"],
ser["heterogeneity"],
]
rows.append(row)

return rows
df = df.rename(
columns={
"study-id": "study id",
"study-short_citation": "study name",
"study-published": "study published",
"meta_protocol-pk": "protocol id",
"meta_protocol-name": "protocol name",
"meta_protocol-protocol_type": "protocol type",
"meta_protocol-total_references": "total references",
"meta_protocol-total_studies_identified": "identified references",
"meta_result-pk": "meta result id",
"meta_result-label": "meta result label",
"meta_result-health_outcome": "health outcome",
"meta_result-exposure_name": "exposure",
"meta_result-number_studies": "result references",
"metric-name": "statistical metric",
"metric-abbreviation": "statistical metric abbreviation",
"meta_result-n": "N",
"meta_result-estimate": "estimate",
"meta_result-lower_ci": "lower CI",
"meta_result-upper_ci": "upper CI",
"meta_result-ci_units": "CI units",
"meta_result-heterogeneity": "heterogeneity",
},
errors="raise",
)
return df
Loading
Loading