From b2e3efdf0e2658b438df0a51ee7ba931384daca1 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Fri, 29 Sep 2023 21:22:43 -0400 Subject: [PATCH 01/15] preliminary epi rewrite --- hawc/apps/epi/exports.py | 261 +++++++++++++++++++++++++++++++++------ 1 file changed, 225 insertions(+), 36 deletions(-) diff --git a/hawc/apps/epi/exports.py b/hawc/apps/epi/exports.py index befea2868c..e617ff1e0e 100644 --- a/hawc/apps/epi/exports.py +++ b/hawc/apps/epi/exports.py @@ -1,46 +1,235 @@ from ..common.helper import FlatFileExporter from ..materialized.models import FinalRiskOfBiasScore from ..study.models import Study -from . import models +from . import models, constants +from ..common.exports import Exporter, ModelExport +from ..common.models import sql_display, sql_format, str_m2m, to_display_array +from ..study.exports import StudyExport +import pandas as pd +from django.db.models import Q, Case, When + + +class StudyPopulationExport(ModelExport): + def get_value_map(self): + return { + "id":"id", + "url":"url", + "name":"name", + "design":"design", + "age_profile":"age_profile", + "source":"source", + "countries":"countries__name", + "region":"region", + "state":"state", + "eligible_n":"eligible_n", + "invited_n":"invited_n", + "participant_n":"participant_n", + "inclusion_criteria":"inclusion_criteria", + "exclusion_criteria":"exclusion_criteria", + "confounding_criteria":"confounding_criteria", + "comments":"comments", + "created":"created", + "last_updated":"last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi/study-population/{}/", query_prefix + "id"), # hardcoded URL + "countries__name": str_m2m(query_prefix + "countries__name"), + "inclusion_criteria": str_m2m(query_prefix + "spcriteria__criteria__description", filter=Q(**{query_prefix +"spcriteria__criteria_type":constants.CriteriaType.I})), + "exclusion_criteria": str_m2m(query_prefix + "spcriteria__criteria__description", filter=Q(**{query_prefix +"spcriteria__criteria_type":constants.CriteriaType.E})), + "confounding_criteria": str_m2m(query_prefix + "spcriteria__criteria__description", filter=Q(**{query_prefix +"spcriteria__criteria_type":constants.CriteriaType.C})), + } + + +class OutcomeExport(ModelExport): + def get_value_map(self): + return { + "id":"id", + "url":"url", + "name":"name", + "effects":"effects__name", + "system":"system", + "effect":"effect", + "effect_subtype":"effect_subtype", + "diagnostic":"diagnostic", + "diagnostic_description":"diagnostic_description", + "age_of_measurement":"age_of_measurement", + "outcome_n":"outcome_n", + "summary":"summary", + "created":"created", + "last_updated":"last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi/outcome/{}/", query_prefix + "id"), # hardcoded URL + "effects__name": str_m2m(query_prefix + "effects__name"), + } + +class ExposureExport(ModelExport): + def get_value_map(self): + return { + "id":"id", + "url":"url", + "name":"name", + "inhalation":"inhalation", + "dermal":"dermal", + "oral":"oral", + "in_utero":"in_utero", + "iv":"iv", + "unknown_route":"unknown_route", + "measured":"measured", + "metric":"metric", + "metric_units_id":"metric_units__id", + "metric_units_name":"metric_units__name", + "metric_description":"metric_description", + "analytical_method":"analytical_method", + "sampling_period":"sampling_period", + "age_of_exposure":"age_of_exposure", + "duration":"duration", + "n":"n", + "exposure_distribution":"exposure_distribution", + "description":"description", + "created":"created", + "last_updated":"last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi/exposure/{}/", query_prefix + "id"), # hardcoded URL + } + +class ComparisonSetExport(ModelExport): + def get_value_map(self): + return { + "id":"id", + "url":"url", + "name":"name", + "description":"description", + "created":"created", + "last_updated":"last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi/comparison-set/{}/", query_prefix + "id"), # hardcoded URL + } + +class ResultMetricExport(ModelExport): + def get_value_map(self): + return { + "id":"id", + "name":"metric", + "abbreviation":"abbreviation", + } + +class ResultExport(ModelExport): + def get_value_map(self): + return { + "id":"id", + "name":"name", + "metric_description":"metric_description", + "metric_units":"metric_units", + "data_location":"data_location", + "population_description":"population_description", + "dose_response":"dose_response", + "dose_response_details":"dose_response_details", + "prevalence_incidence":"prevalence_incidence", + "statistical_power":"statistical_power", + "statistical_power_details":"statistical_power_details", + "statistical_test_results":"statistical_test_results", + "trend_test":"trend_test", + "adjustment_factors":"adjustment_factors", + "adjustment_factors_considered":"adjustment_factors_considered", + "estimate_type":"estimate_type", + "variance_type":"variance_type", + "ci_units":"ci_units", + "comments":"comments", + "created":"created", + "last_updated":"last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "adjustment_factors": str_m2m(query_prefix + "resfactors__adjustment_factor__description", filter=Q(**{query_prefix +"resfactors__included_in_final_model":True})), + "adjustment_factors_considered": str_m2m(query_prefix + "resfactors__adjustment_factor__description", filter=Q(**{query_prefix +"resfactors__included_in_final_model":False})), + } + +class GroupExport(ModelExport): + def get_value_map(self): + return { + "id":"id", + "group_id":"group_id", + "name":"name", + "numeric":"numeric", + "comparative_name":"comparative_name", + "sex":"sex", + "ethnicities":"ethnicities", + "eligible_n":"eligible_n", + "invited_n":"invited_n", + "participant_n":"participant_n", + "isControl":"isControl", + "comments":"comments", + "created":"created", + "last_updated":"last_updated", + } + +class GroupResultExport(ModelExport): + def get_value_map(self): + return { + "id":"id", + "n":"n", + "estimate":"estimate", + "variance":"variance", + "lower_ci":"lower_ci", + "upper_ci":"upper_ci", + "lower_range":"lower_range", + "upper_range":"upper_range", + "lower_bound_interval":"lower_bound_interval", + "upper_bound_interval":"upper_bound_interval", + "p_value_qualifier":"p_value_qualifier", + "p_value":"p_value", + "is_main_finding":"is_main_finding", + "main_finding_support":"main_finding_support", + "created":"created", + "last_updated":"last_updated", + } + + def get_annotation_map(self, query_prefix): + return { + "lower_bound_interval": Case( + When(**{query_prefix+"lower_ci":None}, then=query_prefix+"lower_range"), + default=query_prefix+"lower_ci", + ), + "upper_bound_interval": Case( + When(**{query_prefix+"upper_ci":None}, then=query_prefix+"upper_range"), + default=query_prefix+"upper_ci", + ),} + +class EpiExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport("study", "study_population__study"), + StudyPopulationExport("sp", "study_population"), + OutcomeExport("outcome", ""), + ExposureExport("exposure", "results__comparison_set__exposure"), + ComparisonSetExport("cs", "results__comparison_set"), + ResultMetricExport("metric","results__metric"), + ResultExport("result", "results"), + GroupExport("group", "results__results__group"), + GroupResultExport("result_group", "results__results"), + ] class OutcomeComplete(FlatFileExporter): - def _get_header_row(self): - header = [] - header.extend(Study.flat_complete_header_row()) - header.extend(models.StudyPopulation.flat_complete_header_row()) - header.extend(models.Outcome.flat_complete_header_row()) - header.extend(models.Exposure.flat_complete_header_row()) - header.extend(models.ComparisonSet.flat_complete_header_row()) - header.extend(models.Result.flat_complete_header_row()) - header.extend(models.Group.flat_complete_header_row()) - header.extend(models.GroupResult.flat_complete_header_row()) - return header + """ + Returns a complete export of all data required to rebuild the the + epidemiological meta-result study type from scratch. + """ - def _get_data_rows(self): - rows = [] - identifiers_df = Study.identifiers_df(self.queryset, "study_population__study_id") - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - row = [] - row.extend( - Study.flat_complete_data_row(ser["study_population"]["study"], identifiers_df) - ) - row.extend(models.StudyPopulation.flat_complete_data_row(ser["study_population"])) - row.extend(models.Outcome.flat_complete_data_row(ser)) - for res in ser["results"]: - row_copy = list(row) - row_copy.extend( - models.Exposure.flat_complete_data_row(res["comparison_set"]["exposure"]) - ) - row_copy.extend(models.ComparisonSet.flat_complete_data_row(res["comparison_set"])) - row_copy.extend(models.Result.flat_complete_data_row(res)) - for rg in res["results"]: - row_copy2 = list(row_copy) - row_copy2.extend(models.Group.flat_complete_data_row(rg["group"])) - row_copy2.extend(models.GroupResult.flat_complete_data_row(rg)) - rows.append(row_copy2) - return rows + def build_df(self) -> pd.DataFrame: + return EpiExporter().get_df(self.queryset) class OutcomeDataPivot(FlatFileExporter): From 9686a8e27fd9d04003c5a0f037e0fb488bb92d74 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Tue, 3 Oct 2023 09:35:07 -0400 Subject: [PATCH 02/15] changes --- hawc/apps/epi/exports.py | 342 ++++++++++++++++++++++--------------- hawc/apps/study/exports.py | 4 + 2 files changed, 207 insertions(+), 139 deletions(-) diff --git a/hawc/apps/epi/exports.py b/hawc/apps/epi/exports.py index e617ff1e0e..aea3eb5973 100644 --- a/hawc/apps/epi/exports.py +++ b/hawc/apps/epi/exports.py @@ -1,114 +1,142 @@ -from ..common.helper import FlatFileExporter -from ..materialized.models import FinalRiskOfBiasScore -from ..study.models import Study -from . import models, constants +import pandas as pd +from django.db.models import Case, CharField, Q, When +from django.db.models.functions import Cast + from ..common.exports import Exporter, ModelExport +from ..common.helper import FlatFileExporter from ..common.models import sql_display, sql_format, str_m2m, to_display_array +from ..materialized.models import FinalRiskOfBiasScore from ..study.exports import StudyExport -import pandas as pd -from django.db.models import Q, Case, When +from ..study.models import Study +from . import constants, models + + +def format_time(model_export: ModelExport, df): + df.loc[:, model_export.get_column_name("created")] = df[ + model_export.get_column_name("created") + ].apply(lambda x: x.isoformat()) + df.loc[:, model_export.get_column_name("last_updated")] = df[ + model_export.get_column_name("last_updated") + ].apply(lambda x: x.isoformat()) + return df + - class StudyPopulationExport(ModelExport): def get_value_map(self): return { - "id":"id", - "url":"url", - "name":"name", - "design":"design", - "age_profile":"age_profile", - "source":"source", - "countries":"countries__name", - "region":"region", - "state":"state", - "eligible_n":"eligible_n", - "invited_n":"invited_n", - "participant_n":"participant_n", - "inclusion_criteria":"inclusion_criteria", - "exclusion_criteria":"exclusion_criteria", - "confounding_criteria":"confounding_criteria", - "comments":"comments", - "created":"created", - "last_updated":"last_updated", + "id": "id", + "url": "url", + "name": "name", + "design": "design_display", + "age_profile": "age_profile", + "source": "source", + "countries": "countries__name", + "region": "region", + "state": "state", + "eligible_n": "eligible_n", + "invited_n": "invited_n", + "participant_n": "participant_n", + "inclusion_criteria": "inclusion_criteria", + "exclusion_criteria": "exclusion_criteria", + "confounding_criteria": "confounding_criteria", + "comments": "comments", + "created": "created", + "last_updated": "last_updated", } - + def get_annotation_map(self, query_prefix): return { "url": sql_format("/epi/study-population/{}/", query_prefix + "id"), # hardcoded URL + "design_display": sql_display(query_prefix + "design", constants.Design), "countries__name": str_m2m(query_prefix + "countries__name"), - "inclusion_criteria": str_m2m(query_prefix + "spcriteria__criteria__description", filter=Q(**{query_prefix +"spcriteria__criteria_type":constants.CriteriaType.I})), - "exclusion_criteria": str_m2m(query_prefix + "spcriteria__criteria__description", filter=Q(**{query_prefix +"spcriteria__criteria_type":constants.CriteriaType.E})), - "confounding_criteria": str_m2m(query_prefix + "spcriteria__criteria__description", filter=Q(**{query_prefix +"spcriteria__criteria_type":constants.CriteriaType.C})), + "inclusion_criteria": str_m2m( + query_prefix + "spcriteria__criteria__description", + filter=Q(**{query_prefix + "spcriteria__criteria_type": constants.CriteriaType.I}), + ), + "exclusion_criteria": str_m2m( + query_prefix + "spcriteria__criteria__description", + filter=Q(**{query_prefix + "spcriteria__criteria_type": constants.CriteriaType.E}), + ), + "confounding_criteria": str_m2m( + query_prefix + "spcriteria__criteria__description", + filter=Q(**{query_prefix + "spcriteria__criteria_type": constants.CriteriaType.C}), + ), } + def prepare_df(self, df): + return df + class OutcomeExport(ModelExport): def get_value_map(self): return { - "id":"id", - "url":"url", - "name":"name", - "effects":"effects__name", - "system":"system", - "effect":"effect", - "effect_subtype":"effect_subtype", - "diagnostic":"diagnostic", - "diagnostic_description":"diagnostic_description", - "age_of_measurement":"age_of_measurement", - "outcome_n":"outcome_n", - "summary":"summary", - "created":"created", - "last_updated":"last_updated", + "id": "id", + "url": "url", + "name": "name", + "effects": "effects__name", + "system": "system", + "effect": "effect", + "effect_subtype": "effect_subtype", + "diagnostic": "diagnostic_display", + "diagnostic_description": "diagnostic_description", + "age_of_measurement": "age_of_measurement", + "outcome_n": "outcome_n", + "summary": "summary", + "created": "created", + "last_updated": "last_updated", } def get_annotation_map(self, query_prefix): return { "url": sql_format("/epi/outcome/{}/", query_prefix + "id"), # hardcoded URL "effects__name": str_m2m(query_prefix + "effects__name"), + "diagnostic_display": sql_display(query_prefix + "diagnostic", constants.Diagnostic), } + class ExposureExport(ModelExport): def get_value_map(self): return { - "id":"id", - "url":"url", - "name":"name", - "inhalation":"inhalation", - "dermal":"dermal", - "oral":"oral", - "in_utero":"in_utero", - "iv":"iv", - "unknown_route":"unknown_route", - "measured":"measured", - "metric":"metric", - "metric_units_id":"metric_units__id", - "metric_units_name":"metric_units__name", - "metric_description":"metric_description", - "analytical_method":"analytical_method", - "sampling_period":"sampling_period", - "age_of_exposure":"age_of_exposure", - "duration":"duration", - "n":"n", - "exposure_distribution":"exposure_distribution", - "description":"description", - "created":"created", - "last_updated":"last_updated", + "id": "id", + "url": "url", + "name": "name", + "inhalation": "inhalation", + "dermal": "dermal", + "oral": "oral", + "in_utero": "in_utero", + "iv": "iv", + "unknown_route": "unknown_route", + "measured": "measured", + "metric": "metric", + "metric_units_id": "metric_units__id", + "metric_units_name": "metric_units__name", + "metric_description": "metric_description", + "analytical_method": "analytical_method", + "sampling_period": "sampling_period", + "age_of_exposure": "age_of_exposure", + "duration": "duration", + "n": "n", + "exposure_distribution": "exposure_distribution", + "description": "description", + "created": "created", + "last_updated": "last_updated", } - + def get_annotation_map(self, query_prefix): return { "url": sql_format("/epi/exposure/{}/", query_prefix + "id"), # hardcoded URL } + class ComparisonSetExport(ModelExport): def get_value_map(self): return { - "id":"id", - "url":"url", - "name":"name", - "description":"description", - "created":"created", - "last_updated":"last_updated", + "id": "id", + "url": "url", + "name": "name", + "description": "description", + "created": "created", + "last_updated": "last_updated", } def get_annotation_map(self, query_prefix): @@ -116,96 +144,132 @@ def get_annotation_map(self, query_prefix): "url": sql_format("/epi/comparison-set/{}/", query_prefix + "id"), # hardcoded URL } + class ResultMetricExport(ModelExport): def get_value_map(self): return { - "id":"id", - "name":"metric", - "abbreviation":"abbreviation", + "id": "id", + "name": "metric", + "abbreviation": "abbreviation", } - + + class ResultExport(ModelExport): def get_value_map(self): return { - "id":"id", - "name":"name", - "metric_description":"metric_description", - "metric_units":"metric_units", - "data_location":"data_location", - "population_description":"population_description", - "dose_response":"dose_response", - "dose_response_details":"dose_response_details", - "prevalence_incidence":"prevalence_incidence", - "statistical_power":"statistical_power", - "statistical_power_details":"statistical_power_details", - "statistical_test_results":"statistical_test_results", - "trend_test":"trend_test", - "adjustment_factors":"adjustment_factors", - "adjustment_factors_considered":"adjustment_factors_considered", - "estimate_type":"estimate_type", - "variance_type":"variance_type", - "ci_units":"ci_units", - "comments":"comments", - "created":"created", - "last_updated":"last_updated", + "id": "id", + "name": "name", + "metric_description": "metric_description", + "metric_units": "metric_units", + "data_location": "data_location", + "population_description": "population_description", + "dose_response": "dose_response_display", + "dose_response_details": "dose_response_details", + "prevalence_incidence": "prevalence_incidence", + "statistical_power": "statistical_power_display", + "statistical_power_details": "statistical_power_details", + "statistical_test_results": "statistical_test_results", + "trend_test": "trend_test", + "adjustment_factors": "adjustment_factors", + "adjustment_factors_considered": "adjustment_factors_considered", + "estimate_type": "estimate_type_display", + "variance_type": "variance_type_display", + "ci_units": "ci_units", + "comments": "comments", + "created": "created", + "last_updated": "last_updated", } - + def get_annotation_map(self, query_prefix): return { - "adjustment_factors": str_m2m(query_prefix + "resfactors__adjustment_factor__description", filter=Q(**{query_prefix +"resfactors__included_in_final_model":True})), - "adjustment_factors_considered": str_m2m(query_prefix + "resfactors__adjustment_factor__description", filter=Q(**{query_prefix +"resfactors__included_in_final_model":False})), + "dose_response_type": sql_display( + query_prefix + "dose_response", constants.DoseResponse + ), + "adjustment_factors": str_m2m( + query_prefix + "resfactors__adjustment_factor__description", + filter=Q(**{query_prefix + "resfactors__included_in_final_model": True}), + ), + "adjustment_factors_considered": str_m2m( + query_prefix + "resfactors__adjustment_factor__description", + filter=Q(**{query_prefix + "resfactors__included_in_final_model": False}), + ), + "statistical_power_display": sql_display( + query_prefix + "statistical_power", constants.StatisticalPower + ), + "estimate_type_display": sql_display( + query_prefix + "estimate_type", constants.EstimateType + ), + "variance_type_display": sql_display( + query_prefix + "variance_type", constants.VarianceType + ), } + class GroupExport(ModelExport): def get_value_map(self): return { - "id":"id", - "group_id":"group_id", - "name":"name", - "numeric":"numeric", - "comparative_name":"comparative_name", - "sex":"sex", - "ethnicities":"ethnicities", - "eligible_n":"eligible_n", - "invited_n":"invited_n", - "participant_n":"participant_n", - "isControl":"isControl", - "comments":"comments", - "created":"created", - "last_updated":"last_updated", + "id": "id", + "group_id": "group_id", + "name": "name", + "numeric": "numeric", + "comparative_name": "comparative_name", + "sex": "sex_display", + "ethnicities": "ethnicities", + "eligible_n": "eligible_n", + "invited_n": "invited_n", + "participant_n": "participant_n", + "isControl": "isControl", + "comments": "comments", + "created": "created", + "last_updated": "last_updated", } - + + def get_annotation_map(self, query_prefix): + return { + "sex_display": sql_display(query_prefix + "sex", constants.Sex), + "ethnicities": str_m2m(query_prefix + "ethnicities__name"), + } + + class GroupResultExport(ModelExport): def get_value_map(self): return { - "id":"id", - "n":"n", - "estimate":"estimate", - "variance":"variance", - "lower_ci":"lower_ci", - "upper_ci":"upper_ci", - "lower_range":"lower_range", - "upper_range":"upper_range", - "lower_bound_interval":"lower_bound_interval", - "upper_bound_interval":"upper_bound_interval", - "p_value_qualifier":"p_value_qualifier", - "p_value":"p_value", - "is_main_finding":"is_main_finding", - "main_finding_support":"main_finding_support", - "created":"created", - "last_updated":"last_updated", + "id": "id", + "n": "n", + "estimate": "estimate", + "variance": "variance", + "lower_ci": "lower_ci", + "upper_ci": "upper_ci", + "lower_range": "lower_range", + "upper_range": "upper_range", + "lower_bound_interval": "lower_bound_interval", + "upper_bound_interval": "upper_bound_interval", + "p_value_qualifier": "p_value_qualifier_display", + "p_value": "p_value", + "is_main_finding": "is_main_finding", + "main_finding_support": "main_finding_support_display", + "created": "created", + "last_updated": "last_updated", } def get_annotation_map(self, query_prefix): return { "lower_bound_interval": Case( - When(**{query_prefix+"lower_ci":None}, then=query_prefix+"lower_range"), - default=query_prefix+"lower_ci", - ), + When(**{query_prefix + "lower_ci": None}, then=query_prefix + "lower_range"), + default=query_prefix + "lower_ci", + ), "upper_bound_interval": Case( - When(**{query_prefix+"upper_ci":None}, then=query_prefix+"upper_range"), - default=query_prefix+"upper_ci", - ),} + When(**{query_prefix + "upper_ci": None}, then=query_prefix + "upper_range"), + default=query_prefix + "upper_ci", + ), + "p_value_qualifier_display": sql_display( + query_prefix + "p_value_qualifier", constants.PValueQualifier + ), + "main_finding_support_display": sql_display( + query_prefix + "main_finding_support", constants.MainFinding + ), + } + class EpiExporter(Exporter): def build_modules(self) -> list[ModelExport]: @@ -215,7 +279,7 @@ def build_modules(self) -> list[ModelExport]: OutcomeExport("outcome", ""), ExposureExport("exposure", "results__comparison_set__exposure"), ComparisonSetExport("cs", "results__comparison_set"), - ResultMetricExport("metric","results__metric"), + ResultMetricExport("metric", "results__metric"), ResultExport("result", "results"), GroupExport("group", "results__results__group"), GroupResultExport("result_group", "results__results"), diff --git a/hawc/apps/study/exports.py b/hawc/apps/study/exports.py index 3e6f1a4a8b..c721c017b5 100644 --- a/hawc/apps/study/exports.py +++ b/hawc/apps/study/exports.py @@ -3,6 +3,7 @@ from django.db.models import Q from ..common.exports import ModelExport +from ..common.helper import cleanHTML from ..common.models import sql_display, sql_format, str_m2m from ..lit.constants import ReferenceDatabase from .constants import CoiReported @@ -57,4 +58,7 @@ def prepare_df(self, df): df[key] = pd.to_numeric(df[key], errors="coerce") for key in [self.get_column_name("doi")]: df[key] = df[key].replace("", np.nan) + df.loc[:, self.get_column_name("summary")] = df[self.get_column_name("summary")].apply( + cleanHTML + ) return df From 4eabe9e19e2ed435fa67e64e56adead57b209224 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Wed, 4 Oct 2023 12:00:22 -0400 Subject: [PATCH 03/15] changes --- hawc/apps/common/exports.py | 38 ++-- hawc/apps/epi/exports.py | 370 ++++++++++++++++++++++++++++++++++-- hawc/apps/study/exports.py | 11 +- 3 files changed, 372 insertions(+), 47 deletions(-) diff --git a/hawc/apps/common/exports.py b/hawc/apps/common/exports.py index 6693a06ad2..08340b0ded 100644 --- a/hawc/apps/common/exports.py +++ b/hawc/apps/common/exports.py @@ -1,8 +1,7 @@ import pandas as pd +from django.conf import settings from django.db.models import QuerySet -from .helper import FlatExport - class ModelExport: """Model level export module for use in Exporter class.""" @@ -11,21 +10,13 @@ def __init__( self, key_prefix: str = "", query_prefix: str = "", - include: tuple[str, ...] | None = None, - exclude: tuple[str, ...] | None = None, + include: tuple | None = None, + exclude: tuple | None = None, ): - """Instantiate an exporter instance for a given django model. - - Args: - key_prefix (str, optional): The model name to prepend to data frame columns. - query_prefix (str, optional): The model prefix in the ORM. - include (tuple | None, optional): If included, only these items are added. - exclude (tuple | None, optional): If specified, items are removed from base. - """ self.key_prefix = key_prefix + "-" if key_prefix else key_prefix self.query_prefix = query_prefix + "__" if query_prefix else query_prefix - self.include = (key_prefix + field for field in include) if include else tuple() - self.exclude = (key_prefix + field for field in exclude) if exclude else tuple() + self.include = tuple(self.key_prefix + field for field in include) if include else tuple() + self.exclude = tuple(self.key_prefix + field for field in exclude) if exclude else tuple() @property def value_map(self) -> dict: @@ -153,6 +144,14 @@ def prepare_df(self, df: pd.DataFrame) -> pd.DataFrame: """ return df + def format_time(self, df: pd.DataFrame) -> pd.DataFrame: + for key in [self.get_column_name("created"), self.get_column_name("last_updated")]: + if key in df.columns: + df.loc[:, key] = df[key].apply( + lambda x: x.tz_convert(settings.TIME_ZONE).isoformat() + ) + return df + def get_df(self, qs: QuerySet) -> pd.DataFrame: """Get dataframe export from queryset. @@ -207,14 +206,3 @@ def get_df(self, qs: QuerySet) -> pd.DataFrame: for module in self._modules: df = module.prepare_df(df) return df - - @classmethod - def flat_export(cls, qs: QuerySet, filename: str) -> FlatExport: - """Return an instance of a FlatExport. - - Args: - qs (QuerySet): the initial QuerySet - filename (str): the filename for the export - """ - df = cls().get_df(qs) - return FlatExport(df=df, filename=filename) diff --git a/hawc/apps/epi/exports.py b/hawc/apps/epi/exports.py index aea3eb5973..ff4c786695 100644 --- a/hawc/apps/epi/exports.py +++ b/hawc/apps/epi/exports.py @@ -1,26 +1,16 @@ +import math + import pandas as pd -from django.db.models import Case, CharField, Q, When -from django.db.models.functions import Cast +from django.db.models import Case, Q, When from ..common.exports import Exporter, ModelExport from ..common.helper import FlatFileExporter -from ..common.models import sql_display, sql_format, str_m2m, to_display_array +from ..common.models import sql_display, sql_format, str_m2m from ..materialized.models import FinalRiskOfBiasScore from ..study.exports import StudyExport -from ..study.models import Study from . import constants, models -def format_time(model_export: ModelExport, df): - df.loc[:, model_export.get_column_name("created")] = df[ - model_export.get_column_name("created") - ].apply(lambda x: x.isoformat()) - df.loc[:, model_export.get_column_name("last_updated")] = df[ - model_export.get_column_name("last_updated") - ].apply(lambda x: x.isoformat()) - return df - - class StudyPopulationExport(ModelExport): def get_value_map(self): return { @@ -64,7 +54,7 @@ def get_annotation_map(self, query_prefix): } def prepare_df(self, df): - return df + return self.format_time(df) class OutcomeExport(ModelExport): @@ -93,6 +83,9 @@ def get_annotation_map(self, query_prefix): "diagnostic_display": sql_display(query_prefix + "diagnostic", constants.Diagnostic), } + def prepare_df(self, df): + return self.format_time(df) + class ExposureExport(ModelExport): def get_value_map(self): @@ -127,6 +120,9 @@ def get_annotation_map(self, query_prefix): "url": sql_format("/epi/exposure/{}/", query_prefix + "id"), # hardcoded URL } + def prepare_df(self, df): + return self.format_time(df) + class ComparisonSetExport(ModelExport): def get_value_map(self): @@ -144,6 +140,9 @@ def get_annotation_map(self, query_prefix): "url": sql_format("/epi/comparison-set/{}/", query_prefix + "id"), # hardcoded URL } + def prepare_df(self, df): + return self.format_time(df) + class ResultMetricExport(ModelExport): def get_value_map(self): @@ -178,11 +177,12 @@ def get_value_map(self): "comments": "comments", "created": "created", "last_updated": "last_updated", + "tags": "tags", } def get_annotation_map(self, query_prefix): return { - "dose_response_type": sql_display( + "dose_response_display": sql_display( query_prefix + "dose_response", constants.DoseResponse ), "adjustment_factors": str_m2m( @@ -202,8 +202,12 @@ def get_annotation_map(self, query_prefix): "variance_type_display": sql_display( query_prefix + "variance_type", constants.VarianceType ), + "tags": str_m2m(query_prefix + "resulttags__name"), } + def prepare_df(self, df): + return self.format_time(df) + class GroupExport(ModelExport): def get_value_map(self): @@ -230,6 +234,9 @@ def get_annotation_map(self, query_prefix): "ethnicities": str_m2m(query_prefix + "ethnicities__name"), } + def prepare_df(self, df): + return self.format_time(df) + class GroupResultExport(ModelExport): def get_value_map(self): @@ -270,6 +277,37 @@ def get_annotation_map(self, query_prefix): ), } + def prepare_df(self, df): + return self.format_time(df) + + +class CentralTendencyExport(ModelExport): + def get_value_map(self): + return { + "estimate": "estimate", + "estimate_type": "estimate_type", + "variance": "variance", + "variance_type": "variance_type", + "lower_bound_interval": "lower_bound_interval", + "upper_bound_interval": "upper_bound_interval", + "lower_ci": "lower_ci", + "upper_ci": "upper_ci", + "lower_range": "lower_range", + "upper_range": "upper_range", + } + + def get_annotation_map(self, query_prefix): + return { + "lower_bound_interval": Case( + When(**{query_prefix + "lower_ci": None}, then=query_prefix + "lower_range"), + default=query_prefix + "lower_ci", + ), + "upper_bound_interval": Case( + When(**{query_prefix + "upper_ci": None}, then=query_prefix + "upper_range"), + default=query_prefix + "upper_ci", + ), + } + class EpiExporter(Exporter): def build_modules(self) -> list[ModelExport]: @@ -280,7 +318,7 @@ def build_modules(self) -> list[ModelExport]: ExposureExport("exposure", "results__comparison_set__exposure"), ComparisonSetExport("cs", "results__comparison_set"), ResultMetricExport("metric", "results__metric"), - ResultExport("result", "results"), + ResultExport("result", "results", exclude=("tags",)), GroupExport("group", "results__results__group"), GroupResultExport("result_group", "results__results"), ] @@ -296,6 +334,304 @@ def build_df(self) -> pd.DataFrame: return EpiExporter().get_df(self.queryset) +class EpiDataPivotExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport( + "study", + "study_population__study", + include=("id", "short_citation", "study_identifier", "published"), + ), + StudyPopulationExport( + "sp", "study_population", include=("id", "name", "age_profile", "source", "design") + ), + OutcomeExport( + "outcome", + "", + include=( + "id", + "name", + "system", + "effect", + "effect_subtype", + "diagnostic", + "age_of_measurement", + "effects", + ), + ), + ComparisonSetExport("cs", "results__comparison_set", include=("id", "name")), + ExposureExport( + "exposure", + "results__comparison_set__exposure", + include=( + "id", + "name", + "metric", + "measured", + "metric_units_name", + "age_of_exposure", + ), + ), + CentralTendencyExport( + "ct", + "results__comparison_set__exposure__central_tendencies", + include=( + "estimate", + "estimate_type", + "variance", + "variance_type", + "lower_bound_interval", + "upper_bound_interval", + "lower_ci", + "upper_ci", + "lower_range", + "upper_range", + ), + ), + ResultExport( + "result", + "results", + include=( + "id", + "name", + "population_description", + "tags", + "metric_description", + "comments", + "dose_response", + "statistical_power", + "statistical_test_results", + "ci_units", + "estimate_type", + "variance_type", + ), + ), + ResultMetricExport("metric", "results__metric", include=("name", "abbreviation")), + GroupExport( + "group", + "results__results__group", + include=("group_id", "name", "comparative_name", "numeric", "isControl"), + ), + GroupResultExport( + "result_group", + "results__results", + include=( + "id", + "n", + "estimate", + "lower_ci", + "upper_ci", + "lower_range", + "upper_range", + "lower_bound_interval", + "upper_bound_interval", + "variance", + "p_value", + "p_value_qualifier", + "is_main_finding", + "main_finding_support", + ), + ), + ] + + +def percent_control(n_1, mu_1, sd_1, n_2, mu_2, sd_2): + mean = low = high = None + + if mu_1 and mu_2 and mu_1 != 0: + mean = (mu_2 - mu_1) / mu_1 * 100.0 + if sd_1 and sd_2 and n_1 and n_2: + sd = math.sqrt( + pow(mu_1, -2) + * ((pow(sd_2, 2) / n_2) + (pow(mu_2, 2) * pow(sd_1, 2)) / (n_1 * pow(mu_1, 2))) + ) + ci = (1.96 * sd) * 100 + rng = sorted([mean - ci, mean + ci]) + low = rng[0] + high = rng[1] + + return mean, low, high + + +class OutcomeDataPivot2(FlatFileExporter): + def _add_percent_control(self, df: pd.DataFrame) -> pd.DataFrame: + def _get_stdev(x: pd.Series): + return models.GroupResult.stdev( + x["result-variance_type"], x["result_group-variance"], x["result_group-n"] + ) + + def _apply_results(_df1: pd.DataFrame): + controls = _df1.loc[_df1["group-isControl"] is True] + control = _df1.iloc[0] if controls.empty else controls.iloc[0] + n_1 = control["result_group-n"] + mu_1 = control["result_group-estimate"] + sd_1 = _get_stdev(control) + + def _apply_result_groups(_df2: pd.DataFrame): + row = _df2.iloc[0] + if control["result-estimate_type"] in ["median", "mean"] and control[ + "result-variance_type" + ] in [ + "SD", + "SE", + "SEM", + ]: + n_2 = row["result_group-n"] + mu_2 = row["result_group-estimate"] + sd_2 = _get_stdev(row) + mean, low, high = percent_control(n_1, mu_1, sd_1, n_2, mu_2, sd_2) + return pd.DataFrame( + [[mean, low, high]], + columns=[ + "percent control mean", + "percent control low", + "percent control high", + ], + index=[row["result_group-id"]], + ) + return pd.DataFrame() + + rgs = _df1.groupby("result_group-id", group_keys=False) + return rgs.apply(_apply_result_groups) + + results = df.groupby("result-id", group_keys=False) + computed_df = results.apply(_apply_results) + return df.join(computed_df, on="result_group-id").drop( + columns=["result-estimate_type", "result-variance_type", "group-isControl"] + ) + + def build_df(self) -> pd.DataFrame: + df = EpiDataPivotExporter().get_df(self.queryset.order_by("id", "results__results")) + outcome_ids = list(df["outcome-id"].unique()) + rob_headers, rob_data = FinalRiskOfBiasScore.get_dp_export( + self.queryset.first().assessment_id, + outcome_ids, + "epi", + ) + rob_df = pd.DataFrame( + data=[ + [rob_data[(outcome_id, metric_id)] for metric_id in rob_headers.keys()] + for outcome_id in outcome_ids + ], + columns=list(rob_headers.values()), + index=outcome_ids, + ) + df = df.join(rob_df, on="outcome-id") + + df["Reference/Exposure group"] = ( + df["study-short_citation"] + + " (" + + df["group-name"] + + ", n=" + + df["result_group-n"].astype(str) + + ")" + ) + df["Result, summary numerical"] = ( + df["result_group-estimate"].astype(str) + + " (" + + df["result_group-lower_ci"].astype(str) + + " - " + + df["result_group-upper_ci"].astype(str) + + ")" + ) + df["key"] = df["result_group-id"] + df["statistical significance"] = df.apply( + lambda x: x["result_group-p_value_qualifier"] + if pd.isna(x["result_group-p_value"]) + else f"{x['result_group-p_value']:g}" + if x["result_group-p_value_qualifier"] in ["=", "-", "n.s."] + else f"{x['result_group-p_value_qualifier']}{x['result_group-p_value']:g}", + axis="columns", + ) + df = df.drop(columns="result_group-p_value_qualifier") + + df = self._add_percent_control(df) + + df = df.rename( + columns={ + "study-id": "study id", + "study-short_citation": "study name", + "study-study_identifier": "study identifier", + "study-published": "study published", + "sp-id": "study population id", + "sp-name": "study population name", + "sp-age_profile": "study population age profile", + "sp-source": "study population source", + "sp-design": "design", + "outcome-id": "outcome id", + "outcome-name": "outcome name", + "outcome-system": "outcome system", + "outcome-effect": "outcome effect", + "outcome-effect_subtype": "outcome effect subtype", + "outcome-diagnostic": "diagnostic", + "outcome-age_of_measurement": "age of outcome measurement", + "outcome-effects": "tags", + }, + errors="raise", + ) + df = df.rename( + columns={ + "cs-id": "comparison set id", + "cs-name": "comparison set name", + "exposure-id": "exposure id", + "exposure-name": "exposure name", + "exposure-metric": "exposure metric", + "exposure-measured": "exposure measured", + "exposure-metric_units_name": "dose units", + "exposure-age_of_exposure": "age of exposure", + "ct-estimate": "exposure estimate", + "ct-estimate_type": "exposure estimate type", + "ct-variance": "exposure variance", + "ct-variance_type": "exposure variance type", + "ct-lower_bound_interval": "exposure lower bound interval", + "ct-upper_bound_interval": "exposure upper bound interval", + "ct-lower_ci": "exposure lower ci", + "ct-upper_ci": "exposure upper ci", + "ct-lower_range": "exposure lower range", + "ct-upper_range": "exposure upper range", + "result-id": "result id", + "result-name": "result name", + "result-population_description": "result population description", + "result-tags": "result tags", + "metric-name": "statistical metric", + "metric-abbreviation": "statistical metric abbreviation", + "result-metric_description": "statistical metric description", + "result-comments": "result summary", + "result-dose_response": "dose response", + "result-statistical_power": "statistical power", + "result-statistical_test_results": "statistical test results", + "result-ci_units": "CI units", + "group-group_id": "exposure group order", + "group-name": "exposure group name", + "group-comparative_name": "exposure group comparison name", + "group-numeric": "exposure group numeric", + # "Reference/Exposure group", # format string + # "Result, summary numerical", # format string + # "key", # copy of result_group-id + "result_group-id": "result group id", + "result_group-n": "N", + "result_group-estimate": "estimate", + "result_group-lower_ci": "lower CI", + "result_group-upper_ci": "upper CI", + "result_group-lower_range": "lower range", + "result_group-upper_range": "upper range", + "result_group-lower_bound_interval": "lower bound interval", + "result_group-upper_bound_interval": "upper bound interval", + "result_group-variance": "variance", + # "result_group-p_value_text":"statistical significance", # add this; its computed + "result_group-p_value": "statistical significance (numeric)", + "result_group-is_main_finding": "main finding", + "result_group-main_finding_support": "main finding support", + # "percent control mean", # some function + # "percent control low", # some function + # "percent control high", # some function + }, + errors="raise", + ) + + return df + + class OutcomeDataPivot(FlatFileExporter): def _get_header_row(self): if self.queryset.first() is None: diff --git a/hawc/apps/study/exports.py b/hawc/apps/study/exports.py index c721c017b5..8f8cc2289e 100644 --- a/hawc/apps/study/exports.py +++ b/hawc/apps/study/exports.py @@ -55,10 +55,11 @@ def get_annotation_map(self, query_prefix): def prepare_df(self, df): for key in [self.get_column_name("pubmed_id"), self.get_column_name("hero_id")]: - df[key] = pd.to_numeric(df[key], errors="coerce") + if key in df.columns: + df[key] = pd.to_numeric(df[key], errors="coerce") for key in [self.get_column_name("doi")]: - df[key] = df[key].replace("", np.nan) - df.loc[:, self.get_column_name("summary")] = df[self.get_column_name("summary")].apply( - cleanHTML - ) + if key in df.columns: + df[key] = df[key].replace("", np.nan) + if (key := self.get_column_name("summary")) in df.columns: + df.loc[:, key] = df[key].apply(cleanHTML) return df From 1e11a971d1b68e86502387f85d2c4a348f2f01d8 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Wed, 4 Oct 2023 12:34:00 -0400 Subject: [PATCH 04/15] changes --- hawc/apps/epi/exports.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/hawc/apps/epi/exports.py b/hawc/apps/epi/exports.py index ff4c786695..9dddc93d45 100644 --- a/hawc/apps/epi/exports.py +++ b/hawc/apps/epi/exports.py @@ -285,9 +285,9 @@ class CentralTendencyExport(ModelExport): def get_value_map(self): return { "estimate": "estimate", - "estimate_type": "estimate_type", + "estimate_type": "estimate_type_display", "variance": "variance", - "variance_type": "variance_type", + "variance_type": "variance_type_display", "lower_bound_interval": "lower_bound_interval", "upper_bound_interval": "upper_bound_interval", "lower_ci": "lower_ci", @@ -298,6 +298,12 @@ def get_value_map(self): def get_annotation_map(self, query_prefix): return { + "estimate_type_display": sql_display( + query_prefix + "estimate_type", constants.EstimateType + ), + "variance_type_display": sql_display( + query_prefix + "variance_type", constants.VarianceType + ), "lower_bound_interval": Case( When(**{query_prefix + "lower_ci": None}, then=query_prefix + "lower_range"), default=query_prefix + "lower_ci", @@ -453,7 +459,7 @@ def percent_control(n_1, mu_1, sd_1, n_2, mu_2, sd_2): return mean, low, high -class OutcomeDataPivot2(FlatFileExporter): +class OutcomeDataPivot(FlatFileExporter): def _add_percent_control(self, df: pd.DataFrame) -> pd.DataFrame: def _get_stdev(x: pd.Series): return models.GroupResult.stdev( @@ -489,7 +495,14 @@ def _apply_result_groups(_df2: pd.DataFrame): ], index=[row["result_group-id"]], ) - return pd.DataFrame() + return pd.DataFrame( + [], + columns=[ + "percent control mean", + "percent control low", + "percent control high", + ], + ) rgs = _df1.groupby("result_group-id", group_keys=False) return rgs.apply(_apply_result_groups) @@ -632,7 +645,7 @@ def build_df(self) -> pd.DataFrame: return df -class OutcomeDataPivot(FlatFileExporter): +class OutcomeDataPivot2(FlatFileExporter): def _get_header_row(self): if self.queryset.first() is None: self.rob_headers, self.rob_data = {}, {} From 3bed1b656069f49b82c988fcae55a2745d71f7a9 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Wed, 4 Oct 2023 12:40:43 -0400 Subject: [PATCH 05/15] fix --- hawc/apps/epi/exports.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hawc/apps/epi/exports.py b/hawc/apps/epi/exports.py index 9dddc93d45..6949d6494e 100644 --- a/hawc/apps/epi/exports.py +++ b/hawc/apps/epi/exports.py @@ -467,7 +467,7 @@ def _get_stdev(x: pd.Series): ) def _apply_results(_df1: pd.DataFrame): - controls = _df1.loc[_df1["group-isControl"] is True] + controls = _df1.loc[_df1["group-isControl"] == True] # noqa: E712 control = _df1.iloc[0] if controls.empty else controls.iloc[0] n_1 = control["result_group-n"] mu_1 = control["result_group-estimate"] From 43e9af186a5c92db464ea41543d4bc51036319d4 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Wed, 4 Oct 2023 12:45:05 -0400 Subject: [PATCH 06/15] added back something accidentally deleted --- hawc/apps/common/exports.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hawc/apps/common/exports.py b/hawc/apps/common/exports.py index 08340b0ded..ac257e7fe2 100644 --- a/hawc/apps/common/exports.py +++ b/hawc/apps/common/exports.py @@ -2,6 +2,8 @@ from django.conf import settings from django.db.models import QuerySet +from .helper import FlatExport + class ModelExport: """Model level export module for use in Exporter class.""" @@ -206,3 +208,13 @@ def get_df(self, qs: QuerySet) -> pd.DataFrame: for module in self._modules: df = module.prepare_df(df) return df + + @classmethod + def flat_export(cls, qs: QuerySet, filename: str) -> FlatExport: + """Return an instance of a FlatExport. + Args: + qs (QuerySet): the initial QuerySet + filename (str): the filename for the export + """ + df = cls().get_df(qs) + return FlatExport(df=df, filename=filename) From 069c07ed854341e3d164f17761a0410f3742e6ea Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Wed, 4 Oct 2023 13:06:04 -0400 Subject: [PATCH 07/15] moved code --- hawc/apps/epi/exports.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/hawc/apps/epi/exports.py b/hawc/apps/epi/exports.py index 6949d6494e..ba0dd4a836 100644 --- a/hawc/apps/epi/exports.py +++ b/hawc/apps/epi/exports.py @@ -11,6 +11,24 @@ from . import constants, models +def percent_control(n_1, mu_1, sd_1, n_2, mu_2, sd_2): + mean = low = high = None + + if mu_1 and mu_2 and mu_1 != 0: + mean = (mu_2 - mu_1) / mu_1 * 100.0 + if sd_1 and sd_2 and n_1 and n_2: + sd = math.sqrt( + pow(mu_1, -2) + * ((pow(sd_2, 2) / n_2) + (pow(mu_2, 2) * pow(sd_1, 2)) / (n_1 * pow(mu_1, 2))) + ) + ci = (1.96 * sd) * 100 + rng = sorted([mean - ci, mean + ci]) + low = rng[0] + high = rng[1] + + return mean, low, high + + class StudyPopulationExport(ModelExport): def get_value_map(self): return { @@ -441,24 +459,6 @@ def build_modules(self) -> list[ModelExport]: ] -def percent_control(n_1, mu_1, sd_1, n_2, mu_2, sd_2): - mean = low = high = None - - if mu_1 and mu_2 and mu_1 != 0: - mean = (mu_2 - mu_1) / mu_1 * 100.0 - if sd_1 and sd_2 and n_1 and n_2: - sd = math.sqrt( - pow(mu_1, -2) - * ((pow(sd_2, 2) / n_2) + (pow(mu_2, 2) * pow(sd_1, 2)) / (n_1 * pow(mu_1, 2))) - ) - ci = (1.96 * sd) * 100 - rng = sorted([mean - ci, mean + ci]) - low = rng[0] - high = rng[1] - - return mean, low, high - - class OutcomeDataPivot(FlatFileExporter): def _add_percent_control(self, df: pd.DataFrame) -> pd.DataFrame: def _get_stdev(x: pd.Series): From 54bf8294e0b552cde2c0b0fa751ab35db520440d Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Wed, 4 Oct 2023 13:08:21 -0400 Subject: [PATCH 08/15] fix test --- tests/data/api/api-dp-data-epi.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/data/api/api-dp-data-epi.json b/tests/data/api/api-dp-data-epi.json index 0e90856514..e30adfc574 100644 --- a/tests/data/api/api-dp-data-epi.json +++ b/tests/data/api/api-dp-data-epi.json @@ -52,7 +52,7 @@ "result name": "partial PTSD", "result population description": "", "result summary": "", - "result tags": "|tag2|", + "result tags": "tag2", "statistical metric": "other", "statistical metric abbreviation": "oth", "statistical metric description": "count", @@ -68,7 +68,7 @@ "study population name": "Tokyo subway victims", "study population source": "", "study published": true, - "tags": "|tag2|", + "tags": "tag2", "upper CI": null, "upper bound interval": null, "upper range": null, @@ -127,7 +127,7 @@ "result name": "partial PTSD", "result population description": "", "result summary": "", - "result tags": "|tag2|", + "result tags": "tag2", "statistical metric": "other", "statistical metric abbreviation": "oth", "statistical metric description": "count", @@ -143,7 +143,7 @@ "study population name": "Tokyo subway victims", "study population source": "", "study published": true, - "tags": "|tag2|", + "tags": "tag2", "upper CI": null, "upper bound interval": null, "upper range": null, @@ -202,7 +202,7 @@ "result name": "partial PTSD", "result population description": "", "result summary": "", - "result tags": "|tag2|", + "result tags": "tag2", "statistical metric": "other", "statistical metric abbreviation": "oth", "statistical metric description": "count", @@ -218,7 +218,7 @@ "study population name": "Tokyo subway victims", "study population source": "", "study published": true, - "tags": "|tag2|", + "tags": "tag2", "upper CI": null, "upper bound interval": null, "upper range": null, From 3e29c8c9b47f2e0d18f90be325790042ce52734c Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Wed, 4 Oct 2023 13:33:17 -0400 Subject: [PATCH 09/15] remove old stuff --- hawc/apps/epi/exports.py | 223 ------------------------- hawc/apps/epi/models.py | 352 --------------------------------------- 2 files changed, 575 deletions(-) diff --git a/hawc/apps/epi/exports.py b/hawc/apps/epi/exports.py index ba0dd4a836..841bb12e0e 100644 --- a/hawc/apps/epi/exports.py +++ b/hawc/apps/epi/exports.py @@ -643,226 +643,3 @@ def build_df(self) -> pd.DataFrame: ) return df - - -class OutcomeDataPivot2(FlatFileExporter): - def _get_header_row(self): - if self.queryset.first() is None: - self.rob_headers, self.rob_data = {}, {} - else: - outcome_ids = set(self.queryset.values_list("id", flat=True)) - self.rob_headers, self.rob_data = FinalRiskOfBiasScore.get_dp_export( - self.queryset.first().assessment_id, - outcome_ids, - "epi", - ) - - headers = [ - "study id", - "study name", - "study identifier", - "study published", - "study population id", - "study population name", - "study population age profile", - "study population source", - "design", - "outcome id", - "outcome name", - "outcome system", - "outcome effect", - "outcome effect subtype", - "diagnostic", - "age of outcome measurement", - "tags", - ] - - headers.extend(list(self.rob_headers.values())) - - headers.extend( - [ - "comparison set id", - "comparison set name", - "exposure id", - "exposure name", - "exposure metric", - "exposure measured", - "dose units", - "age of exposure", - "exposure estimate", - "exposure estimate type", - "exposure variance", - "exposure variance type", - "exposure lower bound interval", - "exposure upper bound interval", - "exposure lower ci", - "exposure upper ci", - "exposure lower range", - "exposure upper range", - "result id", - "result name", - "result population description", - "result tags", - "statistical metric", - "statistical metric abbreviation", - "statistical metric description", - "result summary", - "dose response", - "statistical power", - "statistical test results", - "CI units", - "exposure group order", - "exposure group name", - "exposure group comparison name", - "exposure group numeric", - "Reference/Exposure group", - "Result, summary numerical", - "key", - "result group id", - "N", - "estimate", - "lower CI", - "upper CI", - "lower range", - "upper range", - "lower bound interval", - "upper bound interval", - "variance", - "statistical significance", - "statistical significance (numeric)", - "main finding", - "main finding support", - "percent control mean", - "percent control low", - "percent control high", - ] - ) - - return headers - - def _get_data_rows(self): - rows = [] - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - row = [ - ser["study_population"]["study"]["id"], - ser["study_population"]["study"]["short_citation"], - ser["study_population"]["study"]["study_identifier"], - ser["study_population"]["study"]["published"], - ser["study_population"]["id"], - ser["study_population"]["name"], - ser["study_population"]["age_profile"], - ser["study_population"]["source"], - ser["study_population"]["design"], - ser["id"], - ser["name"], - ser["system"], - ser["effect"], - ser["effect_subtype"], - ser["diagnostic"], - ser["age_of_measurement"], - self.get_flattened_tags(ser, "effects"), - ] - outcome_robs = [ - self.rob_data[(ser["id"], metric_id)] for metric_id in self.rob_headers.keys() - ] - row.extend(outcome_robs) - - for res in ser["results"]: - row_copy = list(row) - - # comparison set - row_copy.extend([res["comparison_set"]["id"], res["comparison_set"]["name"]]) - - # exposure (may be missing) - if res["comparison_set"]["exposure"]: - row_copy.extend( - [ - res["comparison_set"]["exposure"]["id"], - res["comparison_set"]["exposure"]["name"], - res["comparison_set"]["exposure"]["metric"], - res["comparison_set"]["exposure"]["measured"], - res["comparison_set"]["exposure"]["metric_units"]["name"], - res["comparison_set"]["exposure"]["age_of_exposure"], - ] - ) - - num_rows_for_ct = len(res["comparison_set"]["exposure"]["central_tendencies"]) - if num_rows_for_ct == 0: - row_copy.extend(["-"] * 10) - self.addOutcomesAndGroupsToRowAndAppend(rows, res, ser, row_copy) - else: - for ct in res["comparison_set"]["exposure"]["central_tendencies"]: - row_copy_ct = list(row_copy) - row_copy_ct.extend( - [ - ct["estimate"], - ct["estimate_type"], - ct["variance"], - ct["variance_type"], - ct["lower_bound_interval"], - ct["upper_bound_interval"], - ct["lower_ci"], - ct["upper_ci"], - ct["lower_range"], - ct["upper_range"], - ] - ) - self.addOutcomesAndGroupsToRowAndAppend(rows, res, ser, row_copy_ct) - - else: - row_copy.extend(["-"] * (6 + 10)) # exposure + exposure.central_tendencies - self.addOutcomesAndGroupsToRowAndAppend(rows, res, ser, row_copy) - - return rows - - def addOutcomesAndGroupsToRowAndAppend(self, rows, res, ser, row): - # outcome details - row.extend( - [ - res["id"], - res["name"], - res["population_description"], - self.get_flattened_tags(res, "resulttags"), - res["metric"]["metric"], - res["metric"]["abbreviation"], - res["metric_description"], - res["comments"], - res["dose_response"], - res["statistical_power"], - res["statistical_test_results"], - res["ci_units"], - ] - ) - - for rg in res["results"]: - row_copy = list(row) - row_copy.extend( - [ - rg["group"]["group_id"], - rg["group"]["name"], - rg["group"]["comparative_name"], - rg["group"]["numeric"], - f'{ser["study_population"]["study"]["short_citation"]} ({rg["group"]["name"]}, n={rg["n"]})', - f'{rg["estimate"]} ({rg["lower_ci"]} - {rg["upper_ci"]})', - rg["id"], - rg["id"], # repeat for data-pivot key - rg["n"], - rg["estimate"], - rg["lower_ci"], - rg["upper_ci"], - rg["lower_range"], - rg["upper_range"], - rg["lower_bound_interval"], - rg["upper_bound_interval"], - rg["variance"], - rg["p_value_text"], - rg["p_value"], - rg["is_main_finding"], - rg["main_finding_support"], - rg["percentControlMean"], - rg["percentControlLow"], - rg["percentControlHigh"], - ] - ) - rows.append(row_copy) diff --git a/hawc/apps/epi/models.py b/hawc/apps/epi/models.py index b7eab9f6ac..e73b0549b0 100644 --- a/hawc/apps/epi/models.py +++ b/hawc/apps/epi/models.py @@ -206,57 +206,6 @@ class StudyPopulation(models.Model): BREADCRUMB_PARENT = "study" - @staticmethod - def flat_complete_header_row(): - return ( - "sp-id", - "sp-url", - "sp-name", - "sp-design", - "sp-age_profile", - "sp-source", - "sp-countries", - "sp-region", - "sp-state", - "sp-eligible_n", - "sp-invited_n", - "sp-participant_n", - "sp-inclusion_criteria", - "sp-exclusion_criteria", - "sp-confounding_criteria", - "sp-comments", - "sp-created", - "sp-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - def getCriteriaList(lst, filt): - return "|".join( - [d["description"] for d in [d for d in lst if d["criteria_type"] == filt]] - ) - - return ( - ser["id"], - ser["url"], - ser["name"], - ser["design"], - ser["age_profile"], - ser["source"], - "|".join([c["name"] for c in ser["countries"]]), - ser["region"], - ser["state"], - ser["eligible_n"], - ser["invited_n"], - ser["participant_n"], - getCriteriaList(ser["criteria"], "Inclusion"), - getCriteriaList(ser["criteria"], "Exclusion"), - getCriteriaList(ser["criteria"], "Confounding"), - ser["comments"], - ser["created"], - ser["last_updated"], - ) - class Meta: ordering = ("name",) @@ -379,44 +328,6 @@ def get_absolute_url(self): def can_create_sets(self): return not self.study_population.can_create_sets() - @staticmethod - def flat_complete_header_row(): - return ( - "outcome-id", - "outcome-url", - "outcome-name", - "outcome-effects", - "outcome-system", - "outcome-effect", - "outcome-effect_subtype", - "outcome-diagnostic", - "outcome-diagnostic_description", - "outcome-age_of_measurement", - "outcome-outcome_n", - "outcome-summary", - "outcome-created", - "outcome-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["url"], - ser["name"], - "|".join([str(d["name"]) for d in ser["effects"]]), - ser["system"], - ser["effect"], - ser["effect_subtype"], - ser["diagnostic"], - ser["diagnostic_description"], - ser["age_of_measurement"], - ser["outcome_n"], - ser["summary"], - ser["created"], - ser["last_updated"], - ) - def get_study(self): return self.study_population.get_study() @@ -486,28 +397,6 @@ def get_assessment(self): def __str__(self): return self.name - @staticmethod - def flat_complete_header_row(): - return ( - "cs-id", - "cs-url", - "cs-name", - "cs-description", - "cs-created", - "cs-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["url"], - ser["name"], - ser["description"], - ser["created"], - ser["last_updated"], - ) - def get_study(self): if self.study_population: return self.study_population.get_study() @@ -590,44 +479,6 @@ def get_assessment(self): def __str__(self): return self.name - @staticmethod - def flat_complete_header_row(): - return ( - "group-id", - "group-group_id", - "group-name", - "group-numeric", - "group-comparative_name", - "group-sex", - "group-ethnicities", - "group-eligible_n", - "group-invited_n", - "group-participant_n", - "group-isControl", - "group-comments", - "group-created", - "group-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["group_id"], - ser["name"], - ser["numeric"], - ser["comparative_name"], - ser["sex"], - "|".join([d["name"] for d in ser["ethnicities"]]), - ser["eligible_n"], - ser["invited_n"], - ser["participant_n"], - ser["isControl"], - ser["comments"], - ser["created"], - ser["last_updated"], - ) - class Exposure(models.Model): objects = managers.ExposureManager() @@ -771,65 +622,6 @@ def get_absolute_url(self): def delete_caches(cls, ids): SerializerHelper.delete_caches(cls, ids) - @staticmethod - def flat_complete_header_row(): - return ( - "exposure-id", - "exposure-url", - "exposure-name", - "exposure-inhalation", - "exposure-dermal", - "exposure-oral", - "exposure-in_utero", - "exposure-iv", - "exposure-unknown_route", - "exposure-measured", - "exposure-metric", - "exposure-metric_units_id", - "exposure-metric_units_name", - "exposure-metric_description", - "exposure-analytical_method", - "exposure-sampling_period", - "exposure-age_of_exposure", - "exposure-duration", - "exposure-n", - "exposure-exposure_distribution", - "exposure-description", - "exposure-created", - "exposure-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - if ser is None: - ser = {} - units = ser.get("metric_units", {}) - return ( - ser.get("id"), - ser.get("url"), - ser.get("name"), - ser.get("inhalation"), - ser.get("dermal"), - ser.get("oral"), - ser.get("in_utero"), - ser.get("iv"), - ser.get("unknown_route"), - ser.get("measured"), - ser.get("metric"), - units.get("id"), - units.get("name"), - ser.get("metric_description"), - ser.get("analytical_method"), - ser.get("sampling_period"), - ser.get("age_of_exposure"), - ser.get("duration"), - ser.get("n"), - ser.get("exposure_distribution"), - ser.get("description"), - ser.get("created"), - ser.get("last_updated"), - ) - def get_study(self): return self.study_population.get_study() @@ -891,42 +683,6 @@ class Meta: def __str__(self): return f"{{CT id={self.id}, exposure={self.exposure}}}" - @staticmethod - def flat_complete_header_row(): - return ( - "central_tendency-id", - "central_tendency-estimate", - "central_tendency-estimate_type", - "central_tendency-variance", - "central_tendency-variance_type", - "central_tendency-lower_ci", - "central_tendency-upper_ci", - "central_tendency-lower_range", - "central_tendency-upper_range", - "central_tendency-description", - "central_tendency-lower_bound_interval", - "central_tendency-upper_bound_interval", - ) - - @staticmethod - def flat_complete_data_row(ser): - if ser is None: - ser = {} - return ( - ser.get("id"), - ser.get("estimate"), - ser.get("estimate_type"), - ser.get("variance"), - ser.get("variance_type"), - ser.get("lower_ci"), - ser.get("upper_ci"), - ser.get("lower_range"), - ser.get("upper_range"), - ser.get("description"), - ser.get("lower_bound_interval"), - ser.get("upper_bound_interval"), - ) - class GroupNumericalDescriptions(models.Model): objects = managers.GroupNumericalDescriptionsManager() @@ -1131,72 +887,6 @@ def get_assessment(self): def get_absolute_url(self): return reverse("epi:result_detail", args=(self.pk,)) - @staticmethod - def flat_complete_header_row(): - return ( - "metric-id", - "metric-name", - "metric-abbreviation", - "result-id", - "result-name", - "result-metric_description", - "result-metric_units", - "result-data_location", - "result-population_description", - "result-dose_response", - "result-dose_response_details", - "result-prevalence_incidence", - "result-statistical_power", - "result-statistical_power_details", - "result-statistical_test_results", - "result-trend_test", - "result-adjustment_factors", - "result-adjustment_factors_considered", - "result-estimate_type", - "result-variance_type", - "result-ci_units", - "result-comments", - "result-created", - "result-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - def getFactorList(lst, isIncluded): - return "|".join( - [ - d["description"] - for d in [d for d in lst if d["included_in_final_model"] == isIncluded] - ] - ) - - return ( - ser["metric"]["id"], - ser["metric"]["metric"], - ser["metric"]["abbreviation"], - ser["id"], - ser["name"], - ser["metric_description"], - ser["metric_units"], - ser["data_location"], - ser["population_description"], - ser["dose_response"], - ser["dose_response_details"], - ser["prevalence_incidence"], - ser["statistical_power"], - ser["statistical_power_details"], - ser["statistical_test_results"], - ser["trend_test"], - getFactorList(ser["factors"], True), - getFactorList(ser["factors"], False), - ser["estimate_type"], - ser["variance_type"], - ser["ci_units"], - ser["comments"], - ser["created"], - ser["last_updated"], - ) - def get_study(self): return self.outcome.get_study() @@ -1426,48 +1116,6 @@ def lower_bound_interval(self): def upper_bound_interval(self): return self.upper_range if self.upper_ci is None else self.upper_ci - @staticmethod - def flat_complete_header_row(): - return ( - "result_group-id", - "result_group-n", - "result_group-estimate", - "result_group-variance", - "result_group-lower_ci", - "result_group-upper_ci", - "result_group-lower_range", - "result_group-upper_range", - "result_group-lower_bound_interval", - "result_group-upper_bound_interval", - "result_group-p_value_qualifier", - "result_group-p_value", - "result_group-is_main_finding", - "result_group-main_finding_support", - "result_group-created", - "result_group-last_updated", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["n"], - ser["estimate"], - ser["variance"], - ser["lower_ci"], - ser["upper_ci"], - ser["lower_range"], - ser["upper_range"], - ser["lower_bound_interval"], - ser["upper_bound_interval"], - ser["p_value_qualifier_display"], - ser["p_value"], - ser["is_main_finding"], - ser["main_finding_support"], - ser["created"], - ser["last_updated"], - ) - @staticmethod def stdev(variance_type, variance, n): # calculate stdev given re From f6bd1459d346c3aadf5fdf9f9cd3c8c7bc571596 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Thu, 5 Oct 2023 15:48:01 -0400 Subject: [PATCH 10/15] cleanup: --- hawc/apps/epi/exports.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/hawc/apps/epi/exports.py b/hawc/apps/epi/exports.py index 841bb12e0e..4656266a41 100644 --- a/hawc/apps/epi/exports.py +++ b/hawc/apps/epi/exports.py @@ -579,8 +579,7 @@ def build_df(self) -> pd.DataFrame: "outcome-diagnostic": "diagnostic", "outcome-age_of_measurement": "age of outcome measurement", "outcome-effects": "tags", - }, - errors="raise", + } ) df = df.rename( columns={ @@ -618,9 +617,6 @@ def build_df(self) -> pd.DataFrame: "group-name": "exposure group name", "group-comparative_name": "exposure group comparison name", "group-numeric": "exposure group numeric", - # "Reference/Exposure group", # format string - # "Result, summary numerical", # format string - # "key", # copy of result_group-id "result_group-id": "result group id", "result_group-n": "N", "result_group-estimate": "estimate", @@ -631,15 +627,10 @@ def build_df(self) -> pd.DataFrame: "result_group-lower_bound_interval": "lower bound interval", "result_group-upper_bound_interval": "upper bound interval", "result_group-variance": "variance", - # "result_group-p_value_text":"statistical significance", # add this; its computed "result_group-p_value": "statistical significance (numeric)", "result_group-is_main_finding": "main finding", "result_group-main_finding_support": "main finding support", - # "percent control mean", # some function - # "percent control low", # some function - # "percent control high", # some function - }, - errors="raise", + } ) return df From 07e9bf77c0bdc2287ea26f5b443666dc2801e3f9 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Wed, 11 Oct 2023 14:03:24 -0400 Subject: [PATCH 11/15] epimeta export rewrite --- hawc/apps/epimeta/exports.py | 234 ++++++++++++++++++++++++++++++----- 1 file changed, 204 insertions(+), 30 deletions(-) diff --git a/hawc/apps/epimeta/exports.py b/hawc/apps/epimeta/exports.py index 1c29ede1b2..3d73cc922c 100644 --- a/hawc/apps/epimeta/exports.py +++ b/hawc/apps/epimeta/exports.py @@ -1,6 +1,166 @@ +import pandas as pd + +from ..common.exports import Exporter, ModelExport from ..common.helper import FlatFileExporter -from ..study.models import Study -from . import models +from ..common.models import sql_display, sql_format, str_m2m +from ..study.exports import StudyExport +from . import constants + + +class MetaProtocolExport(ModelExport): + def get_value_map(self): + return { + "pk": "pk", + "url": "url", + "name": "name", + "protocol_type": "protocol_type", + "lit_search_strategy": "lit_search_strategy", + "lit_search_notes": "lit_search_notes", + "lit_search_start_date": "lit_search_start_date", + "lit_search_end_date": "lit_search_end_date", + "total_references": "total_references", + "inclusion_criteria": "inclusion_criteria", + "exclusion_criteria": "exclusion_criteria", + "total_studies_identified": "total_studies_identified", + "notes": "notes", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi-meta/protocol/{}/", query_prefix + "id"), # hardcoded URL + "protocol_type": sql_display(query_prefix + "protocol_type", constants.MetaProtocol), + "lit_search_strategy": sql_display( + query_prefix + "lit_search_strategy", constants.MetaLitSearch + ), + "inclusion_criteria": str_m2m(query_prefix + "inclusion_criteria__description"), + "exclusion_criteria": str_m2m(query_prefix + "exclusion_criteria__description"), + } + + def prepare_df(self, df): + for key in [ + self.get_column_name("lit_search_start_date"), + self.get_column_name("lit_search_end_date"), + ]: + if key in df.columns: + df.loc[:, key] = df[key].apply(lambda x: x.isoformat() if not pd.isna(x) else x) + return df + + +class MetaResultExport(ModelExport): + def get_value_map(self): + return { + "pk": "pk", + "url": "url", + "label": "label", + "data_location": "data_location", + "health_outcome": "health_outcome", + "health_outcome_notes": "health_outcome_notes", + "exposure_name": "exposure_name", + "exposure_details": "exposure_details", + "number_studies": "number_studies", + "statistical_metric": "metric__metric", + "statistical_notes": "statistical_notes", + "n": "n", + "estimate": "estimate", + "lower_ci": "lower_ci", + "upper_ci": "upper_ci", + "ci_units": "ci_units", + "heterogeneity": "heterogeneity", + "adjustment_factors": "adjustment_factors_str", + "notes": "notes", + } + + def get_annotation_map(self, query_prefix): + return { + "url": sql_format("/epi-meta/result/{}/", query_prefix + "id"), # hardcoded URL + "adjustment_factors_str": str_m2m(query_prefix + "adjustment_factors__description"), + } + + +class SingleResultExport(ModelExport): + def get_value_map(self): + return { + "pk": "pk", + "study": "study_id", + "exposure_name": "exposure_name", + "weight": "weight", + "n": "n", + "estimate": "estimate", + "lower_ci": "lower_ci", + "upper_ci": "upper_ci", + "ci_units": "ci_units", + "notes": "notes", + } + + +class EpiMetaExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport("study", "protocol__study"), + MetaProtocolExport("meta_protocol", "protocol"), + MetaResultExport("meta_result", ""), + SingleResultExport("single_result", "single_results"), + ] + + +class ResultMetricExport(ModelExport): + def get_value_map(self): + return { + "id": "id", + "name": "metric", + "abbreviation": "abbreviation", + } + + +class EpiMetaDataPivotExporter(Exporter): + def build_modules(self) -> list[ModelExport]: + return [ + StudyExport( + "study", + "protocol__study", + include=( + "id", + "short_citation", + "published", + ), + ), + MetaProtocolExport( + "meta_protocol", + "protocol", + include=( + "pk", + "name", + "protocol_type", + "total_references", + "total_studies_identified", + ), + ), + MetaResultExport( + "meta_result", + "", + include=( + "pk", + "label", + "health_outcome", + "exposure_name", + "number_studies", + "n", + "estimate", + "lower_ci", + "upper_ci", + "ci_units", + "heterogeneity", + ), + ), + ResultMetricExport( + "metric", + "metric", + include=( + "name", + "abbreviation", + ), + ), + ] class MetaResultFlatComplete(FlatFileExporter): @@ -9,36 +169,50 @@ class MetaResultFlatComplete(FlatFileExporter): epidemiological meta-result study type from scratch. """ - def _get_header_row(self): - header = [] - header.extend(Study.flat_complete_header_row()) - header.extend(models.MetaProtocol.flat_complete_header_row()) - header.extend(models.MetaResult.flat_complete_header_row()) - header.extend(models.SingleResult.flat_complete_header_row()) - return header + def build_df(self) -> pd.DataFrame: + return EpiMetaExporter().get_df(self.queryset) - def _get_data_rows(self): - rows = [] - identifiers_df = Study.identifiers_df(self.queryset, "protocol__study_id") - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - row = [] - row.extend(Study.flat_complete_data_row(ser["protocol"]["study"], identifiers_df)) - row.extend(models.MetaProtocol.flat_complete_data_row(ser["protocol"])) - row.extend(models.MetaResult.flat_complete_data_row(ser)) - - if len(ser["single_results"]) == 0: - # print one-row with no single-results - row.extend([None] * 10) - rows.append(row) - else: - # print each single-result as a new row - for sr in ser["single_results"]: - row_copy = list(row) # clone - row_copy.extend(models.SingleResult.flat_complete_data_row(sr)) - rows.append(row_copy) - return rows +class MetaResultFlatDataPivot2(FlatFileExporter): + """ + Return a subset of frequently-used data for generation of data-pivot + visualizations. + + Note: data pivot does not currently include study confidence. Could be added if needed. + """ + + def build_df(self) -> pd.DataFrame: + df = EpiMetaDataPivotExporter().get_df(self.queryset) + + df["key"] = df["meta_result-pk"] + + df = df.rename( + columns={ + "study-id": "study id", + "study-short_citation": "study name", + "study-published": "study published", + "meta_protocol-pk": "protocol id", + "meta_protocol-name": "protocol name", + "meta_protocol-protocol_type": "protocol type", + "meta_protocol-total_references": "total references", + "meta_protocol-total_studies_identified": "identified references", + "meta_result-pk": "meta result id", + "meta_result-label": "meta result label", + "meta_result-health_outcome": "health outcome", + "meta_result-exposure_name": "exposure", + "meta_result-number_studies": "result references", + "metric-name": "statistical metric", + "metric-abbreviation": "statistical metric abbreviation", + "meta_result-n": "N", + "meta_result-estimate": "estimate", + "meta_result-lower_ci": "lower CI", + "meta_result-upper_ci": "upper CI", + "meta_result-ci_units": "CI units", + "meta_result-heterogeneity": "heterogeneity", + }, + errors="raise", + ) + return df class MetaResultFlatDataPivot(FlatFileExporter): From 8b534d03767d89fbad1c4966a5fdd69882b3d4fd Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Wed, 11 Oct 2023 14:09:10 -0400 Subject: [PATCH 12/15] remove obsolete code --- hawc/apps/epimeta/exports.py | 69 +------------------- hawc/apps/epimeta/models.py | 120 ----------------------------------- 2 files changed, 1 insertion(+), 188 deletions(-) diff --git a/hawc/apps/epimeta/exports.py b/hawc/apps/epimeta/exports.py index 3d73cc922c..229f133511 100644 --- a/hawc/apps/epimeta/exports.py +++ b/hawc/apps/epimeta/exports.py @@ -173,7 +173,7 @@ def build_df(self) -> pd.DataFrame: return EpiMetaExporter().get_df(self.queryset) -class MetaResultFlatDataPivot2(FlatFileExporter): +class MetaResultFlatDataPivot(FlatFileExporter): """ Return a subset of frequently-used data for generation of data-pivot visualizations. @@ -213,70 +213,3 @@ def build_df(self) -> pd.DataFrame: errors="raise", ) return df - - -class MetaResultFlatDataPivot(FlatFileExporter): - """ - Return a subset of frequently-used data for generation of data-pivot - visualizations. - - Note: data pivot does not currently include study confidence. Could be added if needed. - """ - - def _get_header_row(self): - return [ - "study id", - "study name", - "study published", - "protocol id", - "protocol name", - "protocol type", - "total references", - "identified references", - "key", - "meta result id", - "meta result label", - "health outcome", - "exposure", - "result references", - "statistical metric", - "statistical metric abbreviation", - "N", - "estimate", - "lower CI", - "upper CI", - "CI units", - "heterogeneity", - ] - - def _get_data_rows(self): - rows = [] - for obj in self.queryset: - ser = obj.get_json(json_encode=False) - row = [ - ser["protocol"]["study"]["id"], - ser["protocol"]["study"]["short_citation"], - ser["protocol"]["study"]["published"], - ser["protocol"]["id"], - ser["protocol"]["name"], - ser["protocol"]["protocol_type"], - ser["protocol"]["total_references"], - ser["protocol"]["total_studies_identified"], - ser["id"], # repeat for data-pivot key - ser["id"], - ser["label"], - ser["health_outcome"], - ser["exposure_name"], - ser["number_studies"], - ser["metric"]["metric"], - ser["metric"]["abbreviation"], - ser["n"], - ser["estimate"], - ser["lower_ci"], - ser["upper_ci"], - ser["ci_units"], - ser["heterogeneity"], - ] - rows.append(row) - - return rows diff --git a/hawc/apps/epimeta/models.py b/hawc/apps/epimeta/models.py index 16b1c2c7e7..f9951f0e8b 100644 --- a/hawc/apps/epimeta/models.py +++ b/hawc/apps/epimeta/models.py @@ -69,42 +69,6 @@ def get_absolute_url(self): def get_json(self, json_encode=True): return SerializerHelper.get_serialized(self, json=json_encode, from_cache=False) - @staticmethod - def flat_complete_header_row(): - return ( - "meta_protocol-pk", - "meta_protocol-url", - "meta_protocol-name", - "meta_protocol-protocol_type", - "meta_protocol-lit_search_strategy", - "meta_protocol-lit_search_notes", - "meta_protocol-lit_search_start_date", - "meta_protocol-lit_search_end_date", - "meta_protocol-total_references", - "meta_protocol-inclusion_criteria", - "meta_protocol-exclusion_criteria", - "meta_protocol-total_studies_identified", - "meta_protocol-notes", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["url"], - ser["name"], - ser["protocol_type"], - ser["lit_search_strategy"], - ser["lit_search_notes"], - ser["lit_search_start_date"], - ser["lit_search_end_date"], - ser["total_references"], - "|".join(ser["inclusion_criteria"]), - "|".join(ser["exclusion_criteria"]), - ser["total_studies_identified"], - ser["notes"], - ) - def get_study(self): return self.study @@ -191,54 +155,6 @@ def get_qs_json(queryset, json_encode=True): else: return results - @staticmethod - def flat_complete_header_row(): - return ( - "meta_result-pk", - "meta_result-url", - "meta_result-label", - "meta_result-data_location", - "meta_result-health_outcome", - "meta_result-health_outcome_notes", - "meta_result-exposure_name", - "meta_result-exposure_details", - "meta_result-number_studies", - "meta_result-statistical_metric", - "meta_result-statistical_notes", - "meta_result-n", - "meta_result-estimate", - "meta_result-lower_ci", - "meta_result-upper_ci", - "meta_result-ci_units", - "meta_result-heterogeneity", - "meta_result-adjustment_factors", - "meta_result-notes", - ) - - @staticmethod - def flat_complete_data_row(ser): - return ( - ser["id"], - ser["url"], - ser["label"], - ser["data_location"], - ser["health_outcome"], - ser["health_outcome_notes"], - ser["exposure_name"], - ser["exposure_details"], - ser["number_studies"], - ser["metric"]["metric"], - ser["statistical_notes"], - ser["n"], - ser["estimate"], - ser["lower_ci"], - ser["upper_ci"], - ser["ci_units"], - ser["heterogeneity"], - "|".join(ser["adjustment_factors"]), - ser["notes"], - ) - def get_study(self): if self.protocol is not None: return self.protocol.get_study() @@ -317,42 +233,6 @@ def estimate_formatted(self): txt += f" ({self.lower_ci}, {self.upper_ci})" return txt - @staticmethod - def flat_complete_header_row(): - return ( - "single_result-pk", - "single_result-study", - "single_result-exposure_name", - "single_result-weight", - "single_result-n", - "single_result-estimate", - "single_result-lower_ci", - "single_result-upper_ci", - "single_result-ci_units", - "single_result-notes", - ) - - @staticmethod - def flat_complete_data_row(ser): - study = None - try: - study = ser["study"]["id"] - except TypeError: - pass - - return ( - ser["id"], - study, - ser["exposure_name"], - ser["weight"], - ser["n"], - ser["estimate"], - ser["lower_ci"], - ser["upper_ci"], - ser["ci_units"], - ser["notes"], - ) - def get_study(self): if self.meta_result is not None: return self.meta_result.get_study() From 19f7b8aa8b9b6db5044e97c42037117e96c1899b Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Wed, 11 Oct 2023 14:33:44 -0400 Subject: [PATCH 13/15] changes --- hawc/apps/epimeta/exports.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/hawc/apps/epimeta/exports.py b/hawc/apps/epimeta/exports.py index 229f133511..426a17b1d4 100644 --- a/hawc/apps/epimeta/exports.py +++ b/hawc/apps/epimeta/exports.py @@ -3,6 +3,7 @@ from ..common.exports import Exporter, ModelExport from ..common.helper import FlatFileExporter from ..common.models import sql_display, sql_format, str_m2m +from ..epi.exports import ResultMetricExport from ..study.exports import StudyExport from . import constants @@ -103,15 +104,6 @@ def build_modules(self) -> list[ModelExport]: ] -class ResultMetricExport(ModelExport): - def get_value_map(self): - return { - "id": "id", - "name": "metric", - "abbreviation": "abbreviation", - } - - class EpiMetaDataPivotExporter(Exporter): def build_modules(self) -> list[ModelExport]: return [ From a2f64a21fffbca633e339a2d3aed05b4820685b8 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Thu, 12 Oct 2023 10:22:11 -0400 Subject: [PATCH 14/15] merge fix --- hawc/apps/common/exports.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hawc/apps/common/exports.py b/hawc/apps/common/exports.py index 553213aed6..35da26388d 100644 --- a/hawc/apps/common/exports.py +++ b/hawc/apps/common/exports.py @@ -1,5 +1,4 @@ import pandas as pd -from django.conf import settings from django.db.models import QuerySet from django.utils import timezone @@ -13,8 +12,8 @@ def __init__( self, key_prefix: str = "", query_prefix: str = "", - include: tuple | None = None, - exclude: tuple | None = None, + include: tuple[str, ...] | None = None, + exclude: tuple[str, ...] | None = None, ): self.key_prefix = key_prefix + "-" if key_prefix else key_prefix self.query_prefix = query_prefix + "__" if query_prefix else query_prefix From 8d75187f6e76de969178da3d4eb89077db264063 Mon Sep 17 00:00:00 2001 From: Andy Shapiro Date: Thu, 12 Oct 2023 19:13:26 -0400 Subject: [PATCH 15/15] update admin site to browse data pivot by evidence type --- hawc/apps/summary/admin.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/hawc/apps/summary/admin.py b/hawc/apps/summary/admin.py index a384ffece6..0f8ecfd38d 100644 --- a/hawc/apps/summary/admin.py +++ b/hawc/apps/summary/admin.py @@ -29,7 +29,6 @@ def show_url(self, obj): return format_html(f"{obj.id}") -@admin.register(models.DataPivotUpload, models.DataPivotQuery) class DataPivotAdmin(admin.ModelAdmin): list_display = ( "title", @@ -40,7 +39,7 @@ class DataPivotAdmin(admin.ModelAdmin): "created", "last_updated", ) - list_filter = ("published", ("assessment", admin.RelatedOnlyFieldListFilter)) + list_filter = ("published", ("evidence_type", admin.RelatedOnlyFieldListFilter)) search_fields = ("assessment__name", "title") @admin.display(description="URL") @@ -48,6 +47,10 @@ def show_url(self, obj): return format_html(f"{obj.id}") +class DataPivotQueryAdmin(DataPivotAdmin): + list_filter = ("published", "evidence_type") + + @admin.register(models.SummaryText) class SummaryTextAdmin(TreeAdmin): list_display = ( @@ -69,3 +72,7 @@ class SummaryTableAdmin(VersionAdmin): ) list_filter = ("table_type", "published", ("assessment", admin.RelatedOnlyFieldListFilter)) + + +admin.site.register(models.DataPivotUpload, DataPivotAdmin) +admin.site.register(models.DataPivotQuery, DataPivotQueryAdmin)