Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature #318 tests for read data files #329

Merged
merged 2 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions METdbLoad/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
TOP_DIR = str(Path(__file__).parents[1])
sys.path.insert(0, os.path.abspath(TOP_DIR))


def parse_sql(filename):
"""Parse a .sql file and return a list of SQL statements"""
data = open(filename, "r").readlines()
Expand Down Expand Up @@ -112,18 +113,13 @@ def testRunSql():


@pytest.fixture
def point_stat_xml_file(tmp_path):
"""Get xml load file for point_stat test data."""
data_path = Path(TOP_DIR) / POINT_STAT_DATA_DIR
return get_xml_test_file(tmp_path, data_path, "point_stat")


@pytest.fixture
def get_xml_loadfile(point_stat_xml_file):
def load_and_read_xml():
def get_xml_loadfile():
def load_and_read_xml(
tmp_path, data_dir=POINT_STAT_DATA_DIR, met_tool="point_stat"
):
from METdataio.METdbLoad.ush.read_load_xml import XmlLoadFile

XML_FILE = point_stat_xml_file
XML_FILE = get_xml_test_file(tmp_path, data_dir, met_tool)
XML_LOADFILE = XmlLoadFile(XML_FILE)
XML_LOADFILE.read_xml()
return XML_LOADFILE
Expand Down
24 changes: 24 additions & 0 deletions METdbLoad/test/data/vsdb/gfs_20140802.vsdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
V01 GFS 45 2014080221 STAGE2 G218/LMV FHO>.50 APCP/03 SFC = 5789 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/APL FHO>.50 APCP/03 SFC = 2814 0.00000 0.00000 0.00249
V01 GFS 45 2014080221 STAGE2 G218/NEC FHO>.50 APCP/03 SFC = 3052 0.00197 0.00000 0.00229
V01 GFS 45 2014080221 STAGE2 G218/SEC FHO>.50 APCP/03 SFC = 3524 0.03490 0.00028 0.00738
V01 GFS 45 2014080221 STAGE2 G218/GMC FHO>.50 APCP/03 SFC = 3524 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE4 G218/RFC FHO>.50 APCP/03 SFC = 60582 0.00213 0.00002 0.00271
V01 GFS 45 2014080221 STAGE2 G218/NWC FHO>.75 APCP/03 SFC = 2556 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/SWC FHO>.75 APCP/03 SFC = 1422 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/NMT FHO>.75 APCP/03 SFC = 6251 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/GRB FHO>.75 APCP/03 SFC = 3400 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/SMT FHO>.75 APCP/03 SFC = 3802 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/SPL FHO>1.0 APCP/03 SFC = 5484 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/MDW FHO>1.0 APCP/03 SFC = 10240 0.00000 0.00000 0.00283
V01 GFS 45 2014080221 STAGE2 G218/LMV FHO>1.0 APCP/03 SFC = 5789 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/APL FHO>1.0 APCP/03 SFC = 2814 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/NEC FHO>1.0 APCP/03 SFC = 3052 0.00000 0.00000 0.00033
V01 GFS 45 2014080221 STAGE2 G218/SEC FHO>1.0 APCP/03 SFC = 3524 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE2 G218/GMC FHO>1.0 APCP/03 SFC = 3524 0.00000 0.00000 0.00000
V01 GFS 45 2014080221 STAGE4 G218/RFC FHO>1.0 APCP/03 SFC = 60582 0.00000 0.00000 0.00050
V01 GFS 45 2014080221 STAGE2 G218/NWC SL1L2 APCP/03 SFC= 2556 0.2401E-01 0.6913E-01 0.0000E+00 0.4112E-01 0.1017E+00
V01 GFS 45 2014080221 STAGE2 G218/SWC SL1L2 APCP/03 SFC= 1422 0.7749E-01 0.2632E+00 0.2126E-01 0.1783E+00 0.8898E+00
V01 GFS 45 2014080221 STAGE2 G218/NMT SL1L2 APCP/03 SFC= 6251 0.6989E-01 0.3756E-01 0.1510E-02 0.1398E+00 0.6695E-01
V01 GFS 45 2014080221 STAGE2 G218/GRB SL1L2 APCP/03 SFC= 3400 0.1266E-01 0.4206E-02 0.1994E-04 0.1205E-01 0.3085E-02
V01 GFS 45 2014080221 STAGE2 G218/SMT SL1L2 APCP/03 SFC= 3802 0.6522E+00 0.2165E+00 0.4879E+00 0.1883E+01 0.7141E+00
13 changes: 9 additions & 4 deletions METdbLoad/test/test_met_db_load.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import argparse
import pytest
from pathlib import Path
from METdbLoad.conftest import TOP_DIR
from METdbLoad.ush.met_db_load import main as load_main

from METdataio.METdbLoad.test.utils import (
Expand All @@ -13,6 +11,7 @@
MTD_DATA_DIR,
MODE_DATA_DIR,
TCDIAG_DATA_DIR,
VSDB_DATA_DIR,
)


Expand Down Expand Up @@ -94,6 +93,14 @@ def assert_count_rows(cur, table, expected_count):
"mode_obj_single": 6,
},
),
(
VSDB_DATA_DIR,
"vsdb",
{
"line_data_ctc": 19,
"line_data_sl1l2": 5,
},
),
],
)
def test_met_db_table_counts(
Expand All @@ -104,8 +111,6 @@ def test_met_db_table_counts(
met_tool,
expected_counts,
):

met_data_dir = str(Path(TOP_DIR) / met_data_dir)
test_data = {
"xmlfile": str(get_xml_test_file(tmp_path, met_data_dir, met_tool)),
"index": "true",
Expand Down
68 changes: 62 additions & 6 deletions METdbLoad/test/test_read_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,79 @@
import pytest

from METdataio.METdbLoad.ush.read_data_files import ReadDataFiles
from METdataio.METdbLoad.test.utils import (
POINT_STAT_DATA_DIR,
MTD_DATA_DIR,
)


def test_counts(get_xml_loadfile):
def test_counts(tmp_path, get_xml_loadfile):
"""Count parts of the files loaded in."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path, POINT_STAT_DATA_DIR)

# Read all of the data from the data files into a dataframe
FILE_DATA = ReadDataFiles()

# read in the data files, with options specified by XML flags
FILE_DATA.read_data(XML_LOADFILE.flags,
XML_LOADFILE.load_files,
XML_LOADFILE.line_types)
FILE_DATA.read_data(
XML_LOADFILE.flags, XML_LOADFILE.load_files, XML_LOADFILE.line_types
)

# number of files
assert len(XML_LOADFILE.load_files) == 2
# number of lines of data
assert FILE_DATA.stat_data.shape[0] == 94
# number of line types
assert FILE_DATA.stat_data.line_type.unique().size == 7


def test_mtd_loads(tmp_path, get_xml_loadfile):
XML_LOADFILE = get_xml_loadfile(tmp_path, MTD_DATA_DIR)

# Read all of the data from the data files into a dataframe
FILE_DATA = ReadDataFiles()

# read in the data files, with options specified by XML flags
FILE_DATA.read_data(
XML_LOADFILE.flags, XML_LOADFILE.load_files, XML_LOADFILE.line_types
)

# number of files
assert len(XML_LOADFILE.load_files) == 2
# number of lines of data
assert FILE_DATA.mtd_2d_data.shape == (278, 43)
assert FILE_DATA.mtd_3d_single_data.shape == (8, 48)


def test_mtd_loads_revision(tmp_path, get_xml_loadfile):
# Create a test MTD 2D revision file
data = (
"""VERSION MODEL DESC FCST_LEAD FCST_VALID OBS_LEAD OBS_VALID T_DELTA FCST_T_BEG FCST_T_END FCST_RAD FCST_THR OBS_T_BEG OBS_T_END OBS_RAD OBS_THR FCST_VAR FCST_UNITS FCST_LEV OBS_VAR OBS_UNITS OBS_LEV OBJECT_ID OBJECT_CAT TIME_INDEX AREA CENTROID_X CENTROID_Y CENTROID_LAT CENTROID_LON AXIS_ANG INTENSITY_10 INTENSITY_25 INTENSITY_50 INTENSITY_75 INTENSITY_90 INTENSITY_99\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 F001 CF001 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.10 0.99 2.91 5.59 20.83\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF002 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.99 0.99 2.99 5.99 99.00\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF001 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.10 0.99 2.91 5.59 20.83\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF002 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.99 0.99 2.99 5.99 99.00\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF001 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.10 0.99 2.91 5.59 20.83\n"""
"""V12.0.0 FCST NA 010000 20100517_010000 010000 20100517_010000 010000 -1 1 2 >=0.5 -1 1 2 >=0.5 APCP_01 kg/m^2 A01 APCP_01 kg/m^2 A01 new CF002 0 3640 420.52 167.55 35.53 -85.21 5.46 0.00 0.99 0.99 2.99 5.99 99.00"""
)
tmp_mtd_dir = tmp_path / "mtd_revision"
tmp_mtd_dir.mkdir()

with open(tmp_mtd_dir / "mtd_REVISION_TEST_2d.txt", "w") as f:
f.write(data)

XML_LOADFILE = get_xml_loadfile(tmp_path, tmp_mtd_dir)
FILE_DATA = ReadDataFiles()
FILE_DATA.read_data(
XML_LOADFILE.flags, XML_LOADFILE.load_files, XML_LOADFILE.line_types
)

assert len(XML_LOADFILE.load_files) == 1
assert FILE_DATA.mtd_2d_data.shape == (10, 43)
assert FILE_DATA.mtd_3d_single_data.shape == (0, 0)

# Check revision have been correctly labeled
revs = FILE_DATA.mtd_2d_data["fcst_var"] == "REV_APCP_01"
assert sum(revs) == 4
revs = FILE_DATA.mtd_2d_data["obs_var"] == "REV_APCP_01"
assert sum(revs) == 4
16 changes: 8 additions & 8 deletions METdbLoad/test/test_xml.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python3
"""Test reading XML file."""

def test_loadflags(get_xml_loadfile):
def test_loadflags(tmp_path, get_xml_loadfile):
"""Read various flags from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.flags['load_stat']
assert XML_LOADFILE.flags['load_mode']
assert XML_LOADFILE.flags['load_mtd']
Expand All @@ -18,22 +18,22 @@ def test_loadflags(get_xml_loadfile):
assert XML_LOADFILE.flags['force_dup_file']
assert XML_LOADFILE.flags['load_xml']

def test_loadgroup(get_xml_loadfile):
def test_loadgroup(tmp_path, get_xml_loadfile):
"""Read group and description from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.group == "Testing"
assert XML_LOADFILE.description == "testing with pytest"

def test_connection(get_xml_loadfile):
def test_connection(tmp_path, get_xml_loadfile):
"""Read connection tags from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.connection['db_host'] == "localhost"
assert XML_LOADFILE.connection['db_port'] == 3306
assert XML_LOADFILE.connection['db_database'] == "mv_test"
assert XML_LOADFILE.connection['db_user'] == "root"
assert XML_LOADFILE.connection['db_management_system'] == "mysql"

def test_insertsize(get_xml_loadfile):
def test_insertsize(tmp_path, get_xml_loadfile):
"""Read insert_size from XML file."""
XML_LOADFILE = get_xml_loadfile()
XML_LOADFILE = get_xml_loadfile(tmp_path)
assert XML_LOADFILE.insert_size == 1
28 changes: 19 additions & 9 deletions METdbLoad/test/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
from pathlib import Path


def abs_path(rel_path):
"""Turn a relative path into abs path"""
return str(Path(str(Path(__file__).parents[2])) / rel_path)


# Use data from METreformat where available
ENSEMBLE_STAT_DATA_DIR = "METreformat/test/data/ensemble_stat"
GRID_STAT_DATA_DIR = "METreformat/test/data/grid_stat/mctc_mcts"
MPR_DATA_DIR = "METreformat/test/data/mpr/climo_data"
POINT_STAT_DATA_DIR = "METreformat/test/data/point_stat"
TCDIAG_DATA_DIR = "METreformat/test/data/tcdiag_tcmpr"
ENSEMBLE_STAT_DATA_DIR = abs_path("METreformat/test/data/ensemble_stat")
GRID_STAT_DATA_DIR = abs_path("METreformat/test/data/grid_stat/mctc_mcts")
MPR_DATA_DIR = abs_path("METreformat/test/data/mpr/climo_data")
POINT_STAT_DATA_DIR = abs_path("METreformat/test/data/point_stat")
TCDIAG_DATA_DIR = abs_path("METreformat/test/data/tcdiag_tcmpr")

# This data is copied from MET test data
# https://hub.docker.com/r/dtcenter/met-data-output
MTD_DATA_DIR = "METdbLoad/test/data/mtd/"
MODE_DATA_DIR = "METdbLoad/test/data/mode/"
MTD_DATA_DIR = abs_path("METdbLoad/test/data/mtd/")
MODE_DATA_DIR = abs_path("METdbLoad/test/data/mode/")

# Very small data sample for testing
VSDB_DATA_DIR = abs_path("METdbLoad/test/data/vsdb/")

DEFAULT_LOAD_FLAGS = {
"stat_header_db_check": "true",
Expand Down Expand Up @@ -72,9 +82,9 @@ def populate_xml_load_spec(met_data_dir, met_tool, load_flags=DEFAULT_LOAD_FLAGS

def get_xml_test_file(tmp_path, met_data_dir, met_tool, load_flags={}):
"""Write test_load_specification.xml and return path

Args:
tmp_path (Path): Path to write test file to.
tmp_path (Path): Path to write test file to.
met_data_dir (str): directory containing MET files to load
met_tool (str): Name of MET tool that generated files, e.g. "point_stat"
load_flags (dict): Optional.
Expand Down
Loading