diff --git a/.github/workflows/compare_db.yaml b/.github/workflows/compare_db.yaml index d0a3e65..bae14b7 100644 --- a/.github/workflows/compare_db.yaml +++ b/.github/workflows/compare_db.yaml @@ -85,14 +85,14 @@ jobs: export PYTHONPATH=${GITHUB_WORKSPACE}/baseold cd ${GITHUB_WORKSPACE}/baseold/METdbLoad/ush python met_db_load.py ${GITHUB_WORKSPACE}/headnew/METdbLoad/test/load_met_gha_prod.xml - mysql -e 'SHOW TABLE STATUS WHERE `rows` > 0;' -uroot -proot mv_ci_prod + #mysql -e 'SHOW TABLE STATUS WHERE `rows` > 0;' -uroot -proot mv_ci_prod - name: run METdbload new shell: bash run: | export PYTHONPATH=${GITHUB_WORKSPACE}/headnew cd ${GITHUB_WORKSPACE}/headnew/METdbLoad/ush python met_db_load.py ${GITHUB_WORKSPACE}/headnew/METdbLoad/test/load_met_gha_new.xml - mysql -e 'SHOW TABLE STATUS WHERE `rows` > 0;' -uroot -proot mv_ci_new + #mysql -e 'SHOW TABLE STATUS WHERE `rows` > 0;' -uroot -proot mv_ci_new - name: run test_tables to compare tables in 2 databases shell: bash run: python ${GITHUB_WORKSPACE}/headnew/METdbLoad/test/test_tables.py diff --git a/METdbLoad/conftest.py b/METdbLoad/conftest.py index b1c1b38..81a42fe 100644 --- a/METdbLoad/conftest.py +++ b/METdbLoad/conftest.py @@ -8,9 +8,9 @@ from METdataio.METdbLoad.ush.run_sql import RunSql from METdataio.METdbLoad.test.utils import ( get_xml_test_file, - POINT_STAT_DATA_DIR, + POINT_STAT_DATA_DIR ) - +from METdbLoad.ush.read_load_xml import XmlLoadFile # add METdataio directory to path so packages can be found TOP_DIR = str(Path(__file__).parents[1]) @@ -129,4 +129,24 @@ def load_and_read_xml( @pytest.fixture def mock_logger(): - return MagicMock() \ No newline at end of file + return MagicMock() + +@pytest.fixture +def get_specified_xml_loadfile( ) -> XmlLoadFile: + """ + Retrieve the specified XML load specification filee. This is useful for using different XML + specification file for validating against recursive payloads, large payloads, etc. + + Args: + xml_filename: The name of the XML file of interest + Returns: + XML_LOAD_FILE: The XmlLoadFile instance corresponding to the XML specification file specified by path + and filename + + """ + def get_xml_spec_file(xml_path:str, xml_filename:str): + full_xml_filename = os.path.join(xml_path, xml_filename) + XML_LOAD_FILE = XmlLoadFile(full_xml_filename) + + return XML_LOAD_FILE + return get_xml_spec_file diff --git a/METdbLoad/test/full_example.xml b/METdbLoad/test/full_example.xml new file mode 100644 index 0000000..6151ee9 --- /dev/null +++ b/METdbLoad/test/full_example.xml @@ -0,0 +1,67 @@ + + + mohawk.rap.ucar.edu:3306 + mv_rtps_href_spring_2022 + mvuser + mvuser + + + + 2022050100 + 2022051200 + 86400 + yyyyMMddHH + + + 2022050100 + 2022051200 + 0600 + yyyyMMddHH + + + false + 1 + False + false + FALSE + true + True + false + false + Regional Ensemble + + /var/autofs/mnt/mandan_d2/projects/RRFS/prototype/met_out/{config}/{fcst_init}/{mem}/metprd/{met_out}/ + + + + + + + + + + HREF_lag_offset + RTPS + + + mem01 + mem02 + mem03 + mem04 + mem05 + mem06 + mem07 + mem08 + mem09 + mem10 + + + grid_stat_cmn + point_stat_cmn + + + + true + Load HREF and RTPS data for Spring 2022. + + diff --git a/METdbLoad/test/load_met_gha_new.xml b/METdbLoad/test/load_met_gha_new.xml index 4700811..a1a5fd4 100644 --- a/METdbLoad/test/load_met_gha_new.xml +++ b/METdbLoad/test/load_met_gha_new.xml @@ -1,42 +1,43 @@ - - mysql - localhost:3306 - mv_ci_new - root - root - - - /home/runner/work/METdataio/METdataio/metdata/met_out/{met_tool} - - - - true - 1 - true - true - true - false - false - - true - true - true - false - true - - - - ensemble_stat - grid_stat - mode - point_stat - stat_analysis - wavelet_stat - - - - METplus-Training - MET output generated by make test. + + mysql + localhost:3306 + mv_ci_new + root + root + + + + + + true + 1 + true + true + true + false + false + + + true + true + true + false + true + METplus-Training + MET output generated by make test. + + /home/runner/work/METdataio/METdataio/metdata/met_out/{met_tool} + + + ensemble_stat + grid_stat + mode + point_stat + stat_analysis + wavelet_stat + + + diff --git a/METdbLoad/test/load_met_gha_prod.xml b/METdbLoad/test/load_met_gha_prod.xml index 0298553..1bcfb21 100644 --- a/METdbLoad/test/load_met_gha_prod.xml +++ b/METdbLoad/test/load_met_gha_prod.xml @@ -1,42 +1,42 @@ - - mysql - localhost:3306 - mv_ci_prod - root - root - + + mysql + localhost:3306 + mv_ci_prod + root + root + - /home/runner/work/METdataio/METdataio/metdata/met_out/{met_tool} - + - true - 1 - true - true - true - false - false + true + 1 + true + true + true + false + false - true - true - true - false - true + true + true + true + false + true + METplus-Training + MET output generated by make test. - - - ensemble_stat - grid_stat - mode - point_stat - stat_analysis - wavelet_stat - - + /home/runner/work/METdataio/METdataio/metdata/met_out/{met_tool} + + + ensemble_stat + grid_stat + mode + point_stat + stat_analysis + wavelet_stat + + - METplus-Training - MET output generated by make test. diff --git a/METdbLoad/test/modified_example.xml b/METdbLoad/test/modified_example.xml new file mode 100644 index 0000000..4f0fa8d --- /dev/null +++ b/METdbLoad/test/modified_example.xml @@ -0,0 +1,58 @@ + + + mohawk.rap.ucar.edu:3306 + mv_rtps_href_spring_2022 + mvuser + mvuser + + + + 2022050100 + 2022051200 + 86400 + yyyyMMddHH + + + false + 1 + False + false + FALSE + true + True + false + false + Regional Ensemble + + /var/autofs/mnt/mandan_d2/projects/RRFS/prototype/met_out/{config}/{mem}/{fcst_init}/{met_out} + + + + HREF_lag_offset + RTPS + + + mem01 + mem02 + mem03 + mem04 + mem05 + mem06 + mem07 + mem08 + mem09 + mem10 + + + grid_stat_cmn + point_stat_cmn + + + + + + + true + Load HREF and RTPS data for Spring 2022. + + diff --git a/METdbLoad/test/test_load_specification.xml b/METdbLoad/test/test_load_specification.xml index 8556b16..8b8f49f 100644 --- a/METdbLoad/test/test_load_specification.xml +++ b/METdbLoad/test/test_load_specification.xml @@ -7,7 +7,7 @@ root_password - /METdataio/METreformat/test/data/point_stat + true 1 true @@ -20,11 +20,13 @@ true true true + false + Testing + testing DB load + /METdataio/METreformat/test/data/{met_tool} - point_stat + mode - Testing - testing DB load - + diff --git a/METdbLoad/test/test_recursive_payload_fields.xml b/METdbLoad/test/test_recursive_payload_fields.xml new file mode 100644 index 0000000..4febe60 --- /dev/null +++ b/METdbLoad/test/test_recursive_payload_fields.xml @@ -0,0 +1,130 @@ + + + mariadb + localhost:3306 + mv_test + root + root_password + + + true + 1 + True + false + false + false + false + true + true + true + true + true + Testing + testing DB load + + /METdataio/METreformat/test/data/{config}/{mem}/{met_out}/{text}/{blah}/{foo} + + + HREF_lag_offset + RTPS + + + HREF_lag_offset + RTPS + + + mem01 + mem02 + mem03 + mem04 + mem05 + mem06 + mem07 + mem08 + mem09 + mem10 + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + HREF_lag_offset + RTPS + + + mem01 + mem02 + mem03 + mem04 + mem05 + mem06 + mem07 + mem08 + mem09 + mem10 + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + grid_stat_cmn + point_stat_cmn + + + + + diff --git a/METdbLoad/test/test_recursive_payload_vals.xml b/METdbLoad/test/test_recursive_payload_vals.xml new file mode 100644 index 0000000..a42334f --- /dev/null +++ b/METdbLoad/test/test_recursive_payload_vals.xml @@ -0,0 +1,141 @@ + + + mariadb + localhost:3306 + mv_test + root + root_password + + + true + 1 + True + false + false + false + false + true + true + true + true + true + Testing + testing DB load + + /METdataio/METreformat/test/data/{config}/{mem}/{met_out} + + + HREF_lag_offset + RTPS + + + mem01 + mem02 + mem03 + mem04 + mem05 + mem06 + mem07 + mem08 + mem09 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + mem10 + + + grid_stat_cmn + point_stat_cmn + + + + + diff --git a/METdbLoad/test/test_size_payload.xml b/METdbLoad/test/test_size_payload.xml new file mode 100644 index 0000000..9b52887 --- /dev/null +++ b/METdbLoad/test/test_size_payload.xml @@ -0,0 +1,32 @@ + + + mariadb + + localhost-abcdefghijklmnopqrstuvwxyz-zyxwvutsrqponmlkjihgfedcba_999_123_rst_puytrewq:3306 + mv_test + root + root_password + + + /METdataio/METreformat/test/data/point_stat + + true + 1 + True + false + false + false + false + true + true + true + true + true + + + point_stat + + + Testing + testing DB load + diff --git a/METdbLoad/test/test_xml.py b/METdbLoad/test/test_xml.py index c303c89..4857def 100644 --- a/METdbLoad/test/test_xml.py +++ b/METdbLoad/test/test_xml.py @@ -1,6 +1,15 @@ #!/usr/bin/env python3 + +import os +import pytest + +import METdbLoad.test.utils as utils + """Test reading XML file.""" +# Location of the XML specification files that are used to test XML validation +TEST_XML_SPECIFICATION_FILEPATH = os.path.join(os.path.dirname(os.path.abspath(__file__))) + def test_loadflags(tmp_path, get_xml_loadfile): """Read various flags from XML file.""" XML_LOADFILE = get_xml_loadfile(tmp_path) @@ -37,3 +46,108 @@ def test_insertsize(tmp_path, get_xml_loadfile): """Read insert_size from XML file.""" XML_LOADFILE = get_xml_loadfile(tmp_path) assert XML_LOADFILE.insert_size == 1 + +# @pytest.mark.skip +def test_validation_recursive_payload_fields(get_specified_xml_loadfile): + """ + Test validation against attempted recursive payload, ValueError should be raised for + the test_recursive_payload_fields.xml XML-specification file because the max allowed number + of field elements is 5 and the test config file has 6. + """ + # Get the XML specification file that has a recursive payload + xml_spec_filename = "test_recursive_payload_fields.xml" + xml_load_file_obj = get_specified_xml_loadfile(TEST_XML_SPECIFICATION_FILEPATH, xml_spec_filename) + with pytest.raises(ValueError): + xml_load_file_obj.read_xml() + + +def test_validation_recursive_payload_vals(get_specified_xml_loadfile): + """ + Test validation against attempted recursive payload, ValueError should be raised for + the test_recursive_payload.xml XML-specification file. + """ + # Get the XML specification file that has a recursive payload + xml_spec_filename = "test_recursive_payload_vals.xml" + xml_load_file_obj = get_specified_xml_loadfile(TEST_XML_SPECIFICATION_FILEPATH, xml_spec_filename) + with pytest.raises(ValueError): + xml_load_file_obj.read_xml() + + +def test_validation_large_payload(get_specified_xml_loadfile): + """ + Test validation against attempted "large" payload, ValueError should be raised for + the test_size_payload.xml XML specification file. + """ + # Get the XML specification file that has a recursive payload + xml_spec_filename = "test_size_payload.xml" + xml_load_file_obj = get_specified_xml_loadfile(TEST_XML_SPECIFICATION_FILEPATH, xml_spec_filename) + with pytest.raises(ValueError): + xml_load_file_obj.read_xml() + + +def test_validation_simple_xml(get_specified_xml_loadfile): + """ + Test validation against a simple, valid XML specification file. + ValueError should be NOT be raised for + the full_example.xml specification file which has been used on real data. + """ + + xml_spec_filename = "test_load_specification.xml" + xml_load_file_obj = get_specified_xml_loadfile(TEST_XML_SPECIFICATION_FILEPATH, xml_spec_filename) + try: + xml_load_file_obj.read_xml() + except ValueError: + msg = f"Unexpected ValueError when validating {os.path.join(TEST_XML_SPECIFICATION_FILEPATH,xml_spec_filename)}" + pytest.fail(msg) + +@pytest.mark.parametrize("xmlconfig", ["modified_example.xml", "full_example.xml"]) +def test_validation_real_xml(get_specified_xml_loadfile, xmlconfig): + """ + Test validation against an XML specification file that is in use by a project. + ValueError should be NOT be raised for + the specification file that has been used on real data. + """ + + xml_spec_filename = xmlconfig + xml_load_file_obj = get_specified_xml_loadfile(TEST_XML_SPECIFICATION_FILEPATH, xml_spec_filename) + try: + xml_load_file_obj.read_xml() + except ValueError: + msg = f"Unexpected ValueError when validating {os.path.join(TEST_XML_SPECIFICATION_FILEPATH,xml_spec_filename)}" + pytest.fail(msg) + +def test_tmp_xml(get_specified_xml_loadfile): + """ + Test validation against an XML specification file that was created as a fixture. + ValueError should be NOT be raised for + the full_example.xml specification file which has been used on real data. + """ + # Get the XML specification file that has a recursive payload + + xml_spec_filename = "tmp.xml" + xml_load_file_obj = get_specified_xml_loadfile(TEST_XML_SPECIFICATION_FILEPATH, xml_spec_filename) + try: + xml_load_file_obj.read_xml() + except ValueError: + msg = (f"Unexpected ValueError when validating {os.path.join(TEST_XML_SPECIFICATION_FILEPATH,xml_spec_filename)} against " + f"schema {utils.LOAD_SPECIFICATION_SCHEMA}") + pytest.fail(msg) + +@pytest.mark.parametrize("xml_config", ["load_met_gha_prod.xml", "load_met_gha_new.xml"]) +def test_db_xml(get_specified_xml_loadfile, xml_config): + """ + Test validation against an XML specification file that is used in another test that compares two + databases. + ValueError should be NOT be raised for + the load_met_gha_prod/new specification files which are used on real data. + """ + + xml_spec_filename = xml_config + xml_load_file_obj = get_specified_xml_loadfile(TEST_XML_SPECIFICATION_FILEPATH, xml_spec_filename) + try: + xml_load_file_obj.read_xml() + except ValueError: + msg = (f"Unexpected ValueError when validating {os.path.join(TEST_XML_SPECIFICATION_FILEPATH,xml_spec_filename)} against " + f"schema {utils.LOAD_SPECIFICATION_SCHEMA}") + pytest.fail(msg) + diff --git a/METdbLoad/test/tmp.xml b/METdbLoad/test/tmp.xml new file mode 100644 index 0000000..e1e3f4a --- /dev/null +++ b/METdbLoad/test/tmp.xml @@ -0,0 +1,33 @@ + + + mysql + localhost:3306 + mv_test + root + root_password + True + + + + true + 1 + true + false + false + false + false + true + true + true + true + true + Testing + testing with pytest + /Users/minnawin/feature_internal_56_METdataio_validate_payloads/METdataio/METreformat/test/data/{met_tool} + + + point_stat + grod_stat + + + \ No newline at end of file diff --git a/METdbLoad/test/utils.py b/METdbLoad/test/utils.py index d587021..8e5360b 100644 --- a/METdbLoad/test/utils.py +++ b/METdbLoad/test/utils.py @@ -1,11 +1,14 @@ from pathlib import Path from argparse import Namespace +import os def abs_path(rel_path): """Turn a relative path into abs path""" return str(Path(str(Path(__file__).parents[2])) / rel_path) +# XML Schema for the load specification XML +LOAD_SPECIFICATION_SCHEMA = os.path.join(abs_path("METdbLoad/ush/"), "load_specification_schema.xsd") # Use data from METreformat where available ENSEMBLE_STAT_DATA_DIR = abs_path("METreformat/test/data/ensemble_stat") @@ -70,17 +73,17 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS {local_infile} - {met_data_dir} true 1 {flags} + Testing + testing with pytest + {met_data_dir} {met_tool} - Testing - testing with pytest """ @@ -99,6 +102,7 @@ def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}, local_inf xml_path = tmp_path / "test_load_specification.xml" with open(xml_path, "w") as text_file: text_file.write(populate_xml_load_spec(met_data_dir, met_tool, load_flags, local_infile)) + return xml_path diff --git a/METdbLoad/ush/load_specification_schema.xsd b/METdbLoad/ush/load_specification_schema.xsd new file mode 100644 index 0000000..009254e --- /dev/null +++ b/METdbLoad/ush/load_specification_schema.xsd @@ -0,0 +1,163 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/METdbLoad/ush/read_load_xml.py b/METdbLoad/ush/read_load_xml.py index 24be1fc..ea716bf 100644 --- a/METdbLoad/ush/read_load_xml.py +++ b/METdbLoad/ush/read_load_xml.py @@ -17,11 +17,14 @@ import sys import os +import datetime from pathlib import Path import pandas as pd from lxml import etree import METreformat.util as util from METdbLoad.ush import constants as CN +from lxml import etree +from METdbLoad.test import utils as dbload_util class XmlLoadFile: @@ -99,6 +102,21 @@ def read_xml(self): sys.exit("*** XML file " + self.xmlfilename + " can not be found!") + # Validate the XML file + self.logger.info( + f"Validating the {self.xmlfilename} against the {dbload_util.LOAD_SPECIFICATION_SCHEMA}") + + if self.validate_xml() is False: + msg = ( + f"{self.xmlfilename} is not valid and may contain a recursive payload or an excessively large payload") + self.logger.error(msg) + print(f"{msg}") + raise ValueError(msg) + else: + msg = f"{self.xmlfilename} is valid " + self.logger.info(msg) + print(f"{msg}") + # parse the XML file self.logger.info('Reading XML Load file') parser = etree.XMLParser( @@ -188,6 +206,40 @@ def read_xml(self): "*** %s occurred in read_xml function ***", sys.exc_info()[0]) sys.exit("*** Error reading XML") + def validate_xml(self): + """ + Validate the XML specification file against the XML schema, which validates the payload, checking + for excessive payload size and recursive payloads. + + Args: None + + Returns: True if valid, False otherwise + """ + # Load the schema file, which resides in the same directory as this module + # self.logger.info(f"validating against schema: {os.path.dirname(os.path.abspath(__file__))}") + start = datetime.datetime.now() + self.logger.info(f"Validating against schema: {dbload_util.LOAD_SPECIFICATION_SCHEMA}") + xsd_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), dbload_util.LOAD_SPECIFICATION_SCHEMA) + with open(xsd_file, 'rb') as schema_file: + xmlschema_doc = etree.parse(schema_file) + xmlschema = etree.XMLSchema(xmlschema_doc) + + xml_document = etree.parse(self.xmlfilename) + # Validate the XML document against the schema + is_valid = xmlschema.validate(xml_document) + + total_time = datetime.datetime.now() - start + self.logger.info(f"Validation complete, took {total_time} seconds") + if is_valid: + self.logger.info(f"{self.xmlfilename} is valid") + print(f"xml file {self.xmlfilename} is valid against {schema_file}") + return True + else: + self.logger.info(f"{self.xmlfilename} NOT VALID") + for error in xmlschema.error_log: + print(f"{error.message} at line: {error.line} and column: {error.column}") + return False + def read_file_info(self, root): """! Gather info on file template, fill-in values, and dates Returns: