diff --git a/tools/landusedata/README.md b/tools/landusedata/README.md
new file mode 100644
index 0000000000..60305c7ad1
--- /dev/null
+++ b/tools/landusedata/README.md
@@ -0,0 +1,55 @@
+# FATES LUH2 data tool README
+
+## Purpose
+
+This tool takes the raw Land Use Harmonization (https://luh.umd.edu/), or LUH2, data files as
+input and prepares them for use with FATES.  The tool concatenates the various raw data sets into
+a single file and provides the ability to regrid the source data resolution to a target
+resolution that the user designates.  The output data is then usable by FATES, mediated through
+a host land model (currently either CTSM or E3SM).
+
+For more information on how FATES utilizes this information see https://github.com/NGEET/fates/pull/1040.
+
+## Installation
+
+This tool requires the usage of conda with python3.  See https://docs.conda.io/en/latest/miniconda.html#installing
+for information on installing conda on your system.  To install the conda environment necessary to run the tool
+execute the following commands:
+
+conda env create -f conda-luh2.yml
+
+This will create a conda environment named "luh2".  To activate this environment run:
+
+conda activate luh2
+
+For more information on creating conda environments see
+https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file
+
+Note that it is planned that a subset of host land model (hlm) and hlm supported machines will incoporate this tool into the surface dataset workflow.
+As such, if you are working on one of these machines, the output from this tool may be precomputed and available for the grid resolution of interest.
+
+## Usage
+
+After activating the "luh2" environment the tool can be run from the command line with the following minimum required inputs:
+
+python luh2.py -l <raw-luh2-datafile> -s <luh2-static-datafile> -r <regrid-targetfile> -w <regridder-output> -o <outputfile>
+
+The description of the minimum required input arguments is as follows:
+- raw-luh2-datafile: this is one of three raw luh2 datafiles, either states, transitions, or management.  This is the data to be regridded and used by FATES.
+- luh2-static-datafile: supplementary 0.25 deg resolution static data used in the construction of the raw luh2 datafiles.  This is utilized to help set the gridcell mask for the output file.
+- regrid-targetfile: host land model surface data file intended to be used in conjunction with the fates run at a specific grid resolution.  This is used as the regridder target resolution.
+- regridder-output: the path and filename to write out the regridding weights file or to use an existing regridding weights file.
+- outputfile: the path and filename to which the output is written
+
+The tool is intended to be run three times, sequentially, to concatenate the raw states, transitions, and management data into a single file.  After the first run of
+the tool, a merge option should also be included in the argument list pointing to the most recent output file.  This will ensure that the previous regridding run
+will be merged into the current run as well as reusing the previously output regridding weights file (to help reduce duplicate computation).
+The luh2.sh file in this directory provides an example shell script in using the python tool in this sequential manner.  The python tool itself provides additional
+help by passing the `--help` option argument to the command line call.
+
+## Description of directory contents
+
+- luh2.py: main luh2 python script
+- luh2mod.py: python module source file for the functions called in luh2.py
+- luh2.sh: example bash shell script file demonstrating how to call luh2.py
+- conda-luh2.yml: conda enviroment yaml file which defines the minimum set of package dependencies for luh2.py
diff --git a/tools/landusedata/pyproject.toml b/tools/landusedata/pyproject.toml
new file mode 100644
index 0000000000..70ca046c63
--- /dev/null
+++ b/tools/landusedata/pyproject.toml
@@ -0,0 +1,20 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "landusedata"
+version = "0.0.0"
+
+[tool.setuptools]
+package-dir = {"" = "src"}
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+# [tool.pytest.ini_options]
+# addopts = "-ra -q"
+# testpaths = [
+#     "tests",
+#     "integration",
+# ]
diff --git a/tools/landusedata/pytest.ini b/tools/landusedata/pytest.ini
new file mode 100644
index 0000000000..7e0cc474e3
--- /dev/null
+++ b/tools/landusedata/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+testpaths = tests
+addopts = --verbose --durations=10 --color=yes
+# addopts = --pep8 --flakes --verbose --durations=10 --color=yes
diff --git a/tools/landusedata/src/landusedata/__init__.py b/tools/landusedata/src/landusedata/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tools/landusedata/src/landusedata/__main__.py b/tools/landusedata/src/landusedata/__main__.py
new file mode 100644
index 0000000000..a1d5e07628
--- /dev/null
+++ b/tools/landusedata/src/landusedata/__main__.py
@@ -0,0 +1,5 @@
+from landusedata._main import main
+
+# Gaurd against import time side effects
+if __name__ == '__main__':
+    raise SystemExit(main())
diff --git a/tools/landusedata/src/landusedata/_main.py b/tools/landusedata/src/landusedata/_main.py
new file mode 100644
index 0000000000..552e38e3a3
--- /dev/null
+++ b/tools/landusedata/src/landusedata/_main.py
@@ -0,0 +1,80 @@
+import argparse
+
+from landusedata.luh2 import main as luh2main
+from landusedata.landusepft import main as lupftmain
+
+def main(argv=None):
+
+    # Define top level parser
+    parser = argparse.ArgumentParser(description="FATES landuse data tool")
+
+    # Target regrid file - is there a nice way to share this between subparsers?
+    # parser.add_argument('regrid_target_file', help='target surface data file with desired grid resolution')
+
+    # Define subparser option for luh2 or landuse x pft data tool subcommands
+    subparsers = parser.add_subparsers(required=True, title='subcommands',
+                                       help='landuse data tool subcommand options')
+    luh2_parser = subparsers.add_parser('luh2', prog='luh2',
+                                        help='generate landuse harmonization timeseries data output')
+    lupft_parser = subparsers.add_parser('lupft', prog='lupft',
+                                         help='generate landuse x pft static data map output')
+
+    # Set the default called function for the subparser command
+    luh2_parser.set_defaults(func=luh2main)
+    lupft_parser.set_defaults(func=lupftmain)
+
+    # LUH2 subparser arguments
+    luh2_parser.add_argument('regrid_target_file',
+                             help='target surface data file with desired grid resolution')
+    luh2_parser.add_argument("luh2_static_file",
+                             help = "luh2 static data file")
+    luh2_parser.add_argument('luh2_states_file',
+                             help = "full path of luh2 raw states file")
+    luh2_parser.add_argument('luh2_transitions_file',
+                             help = "full path of luh2 raw transitions file")
+    luh2_parser.add_argument('luh2_management_file',
+                             help = "full path of luh2 raw management file")
+    luh2_parser.add_argument("-w", "--regridder_weights",
+                             default = 'regridder.nc',
+                             help = "filename of regridder weights to save")
+    luh2_parser.add_argument("-b","--begin",
+                             type = int,
+                             default = None,
+                             help = "beginning of date range of interest")
+    luh2_parser.add_argument("-e","--end",
+                             type = int,
+                             default = None,
+                             help = "ending of date range to slice")
+    luh2_parser.add_argument("-o","--output",
+                             default = 'LUH2_timeseries.nc',
+                             help = "output filename")
+
+    # Landuse x pft subparser arguments
+    lupft_parser.add_argument('regrid_target_file',
+                             help='target surface data file with desired grid resolution')
+    lupft_parser.add_argument('luh2_static_file',
+                             help = "luh2 static data file")
+    lupft_parser.add_argument('clm_luhforest_file',
+                              help = "CLM5_current_luhforest_deg025.nc")
+    lupft_parser.add_argument('clm_luhpasture_file',
+                              help = "CLM5_current_luhpasture_deg025.nc")
+    lupft_parser.add_argument('clm_luhother_file',
+                              help = "CLM5_current_luhother_deg025.nc")
+    lupft_parser.add_argument('clm_surface_file',
+                              help = "CLM5_current_surf_deg025.nc")
+    lupft_parser.add_argument("-o","--output",
+                              default = 'fates_landuse_pft_map.nc',
+                              help = "output filename")
+
+    # Parse the arguments
+    args = parser.parse_args(argv)
+
+    # Call the default function for the given subcommand
+    args.func(args)
+
+    # Return successful completion
+    return 0
+
+# Gaurd against import time side effects
+if __name__ == '__main__':
+    raise SystemExit(main())
diff --git a/tools/landusedata/src/landusedata/conda-luh2.yml b/tools/landusedata/src/landusedata/conda-luh2.yml
new file mode 100644
index 0000000000..12d4a35c65
--- /dev/null
+++ b/tools/landusedata/src/landusedata/conda-luh2.yml
@@ -0,0 +1,11 @@
+# This yaml file is intended for users who wish to utilize the luh2.py tool on their own machines.
+# The file is not yet tested regularly to determine if the latest versions of the dependencies will
+# always work.  This regular testing is expected to be implemented in the future.
+name: luh2
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - xesmf
+  # xarray which is autodownloaded as xesmf dependency, uses scipy, which needs netcdf4 to open datasets
+  - netcdf4
diff --git a/tools/landusedata/src/landusedata/landusepft.py b/tools/landusedata/src/landusedata/landusepft.py
new file mode 100644
index 0000000000..6a396163e7
--- /dev/null
+++ b/tools/landusedata/src/landusedata/landusepft.py
@@ -0,0 +1,96 @@
+import argparse, os, sys
+import xarray as xr
+import xesmf as xe
+
+from landusedata.landusepftmod import ImportLandusePFTFile, AddLatLonCoordinates, RenormalizePFTs
+from landusedata.utils import ImportLUH2StaticFile, ImportRegridTarget
+from landusedata.utils import SetMaskRegridTarget, DefineStaticMask
+
+def main(args):
+
+    # Open the files
+    ds_static  = ImportLUH2StaticFile(args.luh2_static_file)
+    filelist = [args.clm_surface_file,
+                args.clm_luhforest_file,
+                args.clm_luhpasture_file,
+                args.clm_luhother_file]
+    ds_landusepfts = []
+    for filename in filelist:
+        ds_landusepfts.append(ImportLandusePFTFile(filename))
+
+    # Add lat/lon coordinates to the CLM5 landuse data
+    for dataset in ds_landusepfts:
+        AddLatLonCoordinates(dataset)
+
+    # Define static luh2 ice/water mask
+    mask_icwtr = DefineStaticMask(ds_static)
+
+    # Calculate the bareground percentage after initializing data array list
+    # Normalize the percentages
+    percent = []
+    percent_bareground = ds_landusepfts[0].PCT_NAT_PFT.isel(natpft=0)
+    percent_bareground = (percent_bareground / 100.0) * mask_icwtr
+    percent.append(percent_bareground)
+
+    # Renormalize the PCT_NAT_PFT for each dataset using the mask
+    for data_array in ds_landusepfts:
+        percent.append(RenormalizePFTs(data_array))
+
+    # Calculate the primary and secondary PFT fractions as the forest
+    # and nonforest-weighted averages of the forest and other PFT datasets.
+    percent[2] = ds_static.fstnf * percent[2] + (1. - ds_static.fstnf) * percent[-1]
+
+    # Note that the list order is:
+    # bareground, surface data, primary, pasture, rangeland (other)
+    ds_var_names = ['frac_brgnd','frac_csurf','frac_primr','frac_pastr','frac_range']
+
+    # Open and prepare the target dataset
+    ds_target = ImportRegridTarget(args.regrid_target_file)
+
+    # Set the mask for the regrid target
+    ds_target = SetMaskRegridTarget(ds_target)
+
+    # Create an output dataset to contain individually regridded landuse percent datasets
+    ds_output = xr.Dataset()
+
+    # Loop through percentage list and regrid each entry
+    for index,data_array in enumerate(percent):
+
+        # Get the name for the new variable
+        varname = ds_var_names[index]
+
+        # Convert current percent data array into temporary dataset
+        ds_percent = data_array.to_dataset(name=varname)
+
+        # Apply mask for the current dataset
+        ds_percent['mask'] = mask_icwtr
+        if (varname != 'frac_brgnd'):
+            mask_zeropercent = xr.where(ds_percent[varname].sum(dim='natpft') == 0.,0,1)
+            ds_percent['mask'] = ds_percent.mask * mask_zeropercent
+
+        # Regrid current dataset
+        print('Regridding {}'.format(varname))
+        regridder = xe.Regridder(ds_percent, ds_target, "conservative_normed")
+        ds_regrid = regridder(ds_percent)
+
+        # Drop mask to avoid conflicts when merging
+        if (varname != 'frac_brgnd'):
+            # There is no mask currently on the bareground
+            ds_regrid = ds_regrid.drop_vars(['mask'])
+
+        # Append the new dataset to the output dataset
+        ds_output = ds_output.merge(ds_regrid)
+
+    # Duplicate the 'primary' data array into a 'secondary' data array.  Eventually
+    # this will contain different data from a future CLM landuse x pft update
+    ds_output['frac_secnd'] = ds_output.frac_primr.copy(deep=True)
+
+    # ds_regrid = ds_regrid.rename_dims(dims_dict={'lat':'lsmlat','lon':'lsmlon'})
+
+    # Output dataset to netcdf file
+    print('Writing fates landuse x pft dataset to file')
+    output_file = os.path.join(os.getcwd(),args.output)
+    ds_output.to_netcdf(output_file)
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/landusedata/src/landusedata/landusepftmod.py b/tools/landusedata/src/landusedata/landusepftmod.py
new file mode 100644
index 0000000000..5eea90e9f7
--- /dev/null
+++ b/tools/landusedata/src/landusedata/landusepftmod.py
@@ -0,0 +1,31 @@
+import xarray as xr
+
+# Open the CLM5 landuse x pft data file
+def ImportLandusePFTFile(filename):
+    dataset = xr.open_dataset(filename)
+
+    # Check to see if the imported dataset has the the percent
+    # natural pft variable present.
+    if 'PCT_NAT_PFT' not in list(dataset.var()):
+        raise TypeError("incorrect file, must be CLM5 landuse file")
+
+    # change the percent natural pft from single to double precision
+    # for downstream calculations
+    dataset['PCT_NAT_PFT'] = dataset.PCT_NAT_PFT.astype('float64')
+    return dataset
+
+# Add lat/lon coordinates to the CLM5 landuse dataset
+# While the lat and lon are available as variables, they are
+# not defined as 'coords' in the imported dataset
+def AddLatLonCoordinates(dataset):
+    dataset['lon'] = dataset.LON
+    dataset['lat'] = dataset.LAT
+    return dataset
+
+# Renormalize the pft percentages without the bareground
+def RenormalizePFTs(dataset):
+    # Remove the bareground pft index from the dataset
+    percent = dataset.PCT_NAT_PFT.isel(natpft=slice(1,None))
+    # Normalize
+    percent = percent / percent.sum(dim='natpft')
+    return percent
diff --git a/tools/landusedata/src/landusedata/luh2.py b/tools/landusedata/src/landusedata/luh2.py
new file mode 100644
index 0000000000..0ed8edf47c
--- /dev/null
+++ b/tools/landusedata/src/landusedata/luh2.py
@@ -0,0 +1,105 @@
+import argparse, os, sys
+
+import xarray as xr
+
+from landusedata.luh2mod import ImportLUH2TimeSeries, CorrectStateSum
+from landusedata.regrid import RegridConservative, RegridLoop
+from landusedata.utils import ImportLUH2StaticFile, ImportRegridTarget
+from landusedata.utils import SetMaskRegridTarget, DefineStaticMask
+
+
+# Add version checking here in case environment.yml not used
+def main(args):
+
+    # Import and prep the LUH2 datasets and regrid target
+    filelist = [args.luh2_states_file,
+                args.luh2_transitions_file,
+                args.luh2_management_file]
+    ds_luh2 = []
+    for filename in filelist:
+        ds_luh2.append(ImportLUH2TimeSeries(filename,start=args.begin,stop=args.end))
+
+    # Import the LUH2 static data to use for masking
+    ds_luh2_static = ImportLUH2StaticFile(args.luh2_static_file)
+
+    # Create new variable where the ice water fraction is inverted w
+    ds_luh2_static["landfrac"] = 1 - ds_luh2_static.icwtr
+
+    # Define static luh2 ice/water mask
+    mask_icwtr = DefineStaticMask(ds_luh2_static)
+
+    # Mask all LUH2 input data using the ice/water fraction for the LUH2 static data
+    ds_luh2_static['mask'] = mask_icwtr
+    for dataset in ds_luh2:
+        dataset['mask'] = mask_icwtr
+
+    # Import and prep the regrid target surface dataset
+    ds_regrid_target = ImportRegridTarget(args.regrid_target_file)
+
+    # Mask the regrid target
+    ds_regrid_target = SetMaskRegridTarget(ds_regrid_target)
+
+    # Create an output dataset to contain individually regridded landuse percent datasets
+    ds_output = xr.Dataset()
+
+    # Loop through the data set list, regrid and merge the sets.
+    # At the beginning of the loop, there is no regrid weights file to reuse
+    regrid_reuse = False
+    for dataset in ds_luh2:
+
+        # Regrid the luh2 data to the target grid
+        # TO DO: provide a check for the save argument based on the input arguments
+        regrid_luh2,regridder_luh2 = RegridConservative(dataset, ds_regrid_target,
+                                                        args.regridder_weights, regrid_reuse)
+
+        # Regrid the inverted ice/water fraction data to the target grid
+        regrid_land_fraction = regridder_luh2(ds_luh2_static)
+
+        # Adjust the luh2 data by the land fraction
+        # TO DO: determine if this is necessary for the transitions and management data
+        regrid_luh2 = regrid_luh2 / regrid_land_fraction.landfrac
+
+        # Correct the state sum (checks if argument passed is state file in the function)
+        regrid_luh2 = CorrectStateSum(regrid_luh2)
+
+        # Add additional required variables for the host land model
+        # Add 'YEAR' as a variable.
+        # If we are merging, we might not need to do this, so check to see if its there already
+        # This is a requirement of the HLM dyn_subgrid module and should be the actual year.
+        # Note that the time variable from the LUH2 data is 'years since ...' so we need to
+        # add the input data year
+        if (not "YEAR" in list(regrid_luh2.variables)):
+            regrid_luh2["YEAR"] = regrid_luh2.time + dataset.timesince
+            regrid_luh2["LONGXY"] = ds_regrid_target["LONGXY"] # TO DO: double check if this is strictly necessary
+            regrid_luh2["LATIXY"] = ds_regrid_target["LATIXY"] # TO DO: double check if this is strictly necessary
+
+        # Rename the dimensions for the output.  This needs to happen after the "LONGXY/LATIXY" assignment
+        if (not 'lsmlat' in list(regrid_luh2.dims)):
+            regrid_luh2 = regrid_luh2.rename_dims({'lat':'lsmlat','lon':'lsmlon'})
+
+        # Reapply the coordinate attributes.  This is a workaround for an xarray bug (#8047)
+        # Currently only need time
+        regrid_luh2.time.attrs = dataset.time.attrs
+        regrid_luh2.lat.attrs = dataset.lat.attrs
+        regrid_luh2.lon.attrs = dataset.lon.attrs
+
+        # Merge previous regrided luh2 file with merge input target
+        # TO DO: check that the grid resolution matches
+        # We could do this with an append during the write phase instead of the merge
+        # What is the performance impact of writing to disk between dataset regrids
+        # versus holding the output in memory?
+        ds_output = ds_output.merge(regrid_luh2)
+
+        # If regrid_reuse is False, change it to True for the next loop so that
+        # the previously generated weights file is reused
+        if (not(regrid_reuse)):
+            regrid_reuse = True
+
+    # Write the files
+    # TO DO: add check to handle if the user enters the full path
+    output_file = os.path.join(os.getcwd(),args.output)
+    print("generating output: {}".format(output_file))
+    ds_output.to_netcdf(output_file)
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/landusedata/src/landusedata/luh2mod.py b/tools/landusedata/src/landusedata/luh2mod.py
new file mode 100644
index 0000000000..c4c3743a33
--- /dev/null
+++ b/tools/landusedata/src/landusedata/luh2mod.py
@@ -0,0 +1,110 @@
+import re, sys
+import numpy as np
+import xarray as xr
+import xesmf as xe
+
+# Import luh2 or surface data sets
+def ImportLUH2TimeSeries(input_file,start=None,stop=None):
+
+    # Open files
+    # Set decode_times to false as the luh2 raw data is outside the range
+    # of the standard NetCDF datetime format.
+    datasetout = xr.open_dataset(input_file, cache=False, decode_times=False)
+    print("Input file dataset opened: {}".format(input_file))
+
+    # Prep the input data for use
+    datasetout = PrepDataset(datasetout,start,stop)
+
+    return(datasetout)
+
+# Prepare the input_file to be used for regridding
+def PrepDataset(input_dataset,start=None,stop=None):
+
+    # Use the maximum span if start and stop are not present
+    # This assumes that the luh2 raw data will always use a
+    # 'years since' style format.
+
+    # Get the units to determine the file time
+    # It is expected that the units of time is 'years since ...'
+    time_since_array = input_dataset.time.units.split()
+    if (time_since_array[0] != 'years'):
+        sys.exit("FileTimeUnitsError: input file units of time is not 'years since ...'")
+
+    # Note that datetime package is not used as the date range might
+    # be beyond the bounds of the packages applicable bounds
+    time_since = int(time_since_array[2].split('-')[0])
+
+    # Get the time bounds of the input file
+    start_bound = input_dataset.time.values[0]
+    stop_bound = input_dataset.time.values[-1]
+
+    # If no input provided, simply get the bounds of the time
+    if (isinstance(start,type(None))):
+       start = start_bound + time_since
+
+    if (isinstance(stop,type(None))):
+       stop = stop_bound + time_since
+
+    # Convert the input dates to years since 0850
+    years_since_start = start - time_since
+    years_since_stop = stop - time_since
+
+    # Abort if the times provided are outside the applicable range
+    if (years_since_start < start_bound or years_since_stop < start_bound or
+        years_since_start > stop_bound or years_since_stop > stop_bound):
+        sys.exit("StartStopBoundError: the input start or stop date is outside the applicable range of {} to {}".format(time_since+start_bound,time_since+stop_bound))
+
+    # Truncate the data to the user defined range
+    # This might need some more error handling for when
+    # the start/stop is out of range
+    input_dataset = input_dataset.sel(time=slice(years_since_start,years_since_stop))
+
+    # Save the timesince as a variable for future use
+    input_dataset["timesince"] = time_since
+
+    # Correct the necessary variables for both datasets
+    input_dataset = _BoundsVariableFixLUH2(input_dataset)
+
+    return(input_dataset)
+
+# Create the necessary variable "lat_b" and "lon_b" for xESMF conservative regridding
+# Each lat/lon boundary array is a 2D array corresponding to the bounds of each
+# coordinate position (e.g. lat_boundary would be 90.0 and 89.75 for lat coordinate
+# of 89.875).
+def _BoundsVariableFixLUH2(input_dataset):
+
+    # Create lat and lon bounds as a single dimension array out of the LUH2 two dimensional_bounds array.
+    # Future todo: is it possible to have xESMF recognize and use the original 2D array?
+    input_dataset["lat_b"] = np.insert(input_dataset.lat_bounds[:,1].data,0,input_dataset.lat_bounds[0,0].data)
+    input_dataset["lon_b"] = np.insert(input_dataset.lon_bounds[:,1].data,0,input_dataset.lon_bounds[0,0].data)
+
+    # Drop the old boundary names to avoid confusion
+    input_dataset = input_dataset.drop(labels=['lat_bounds','lon_bounds'])
+
+    print("LUH2 dataset lat/lon boundary variables formatted and added as new variable for xESMF")
+
+    return(input_dataset)
+
+# Temporary: Add minor correction factor to assure states sum to one
+def CorrectStateSum(input_dataset):
+
+    # Only calculate the state sum to unity correction for the appropiate dataset
+    # TO DO: Update this to use the check function
+    if (not(any('irrig' in var for var in input_dataset) or
+            any('_to_' in var for var in input_dataset))):
+
+        # Drop the secma and secmb variables temporarily
+        temp_dataset = input_dataset.drop({'secma','secmb'})
+
+        # Sum the remaining state variables and normalize
+        state_sum = temp_dataset.to_array().sum(dim='variable')
+        state_sum = state_sum.where(state_sum != 0)
+        temp_dataset = temp_dataset / state_sum
+
+        # Update dataset with new scaled values
+        input_dataset.update(temp_dataset)
+
+        # Save the correction value
+        input_dataset["stscf"] = 1.0 / state_sum
+
+    return(input_dataset)
diff --git a/tools/landusedata/src/landusedata/regrid.py b/tools/landusedata/src/landusedata/regrid.py
new file mode 100644
index 0000000000..7074a05ed9
--- /dev/null
+++ b/tools/landusedata/src/landusedata/regrid.py
@@ -0,0 +1,64 @@
+import xesmf as xe
+
+def RegridConservative(ds_to_regrid, ds_regrid_target, regridder_weights, regrid_reuse):
+
+    # define the regridder transformation
+    regridder = GenerateRegridder(ds_to_regrid, ds_regrid_target, regridder_weights, regrid_reuse)
+
+    # Loop through the variables to regrid
+    ds_regrid = RegridLoop(ds_to_regrid, regridder)
+
+    return (ds_regrid, regridder)
+
+def GenerateRegridder(ds_to_regrid, ds_regrid_target, regridder_weights_file, regrid_reuse):
+
+    regrid_method = "conservative"
+    print("\nDefining regridder, method: ", regrid_method)
+
+    if (regrid_reuse):
+        regridder = xe.Regridder(ds_to_regrid, ds_regrid_target,
+                                 regrid_method, weights=regridder_weights_file)
+    else:
+        regridder = xe.Regridder(ds_to_regrid, ds_regrid_target, regrid_method)
+
+        # If we are not reusing the regridder weights file, then save the regridder
+        filename = regridder.to_netcdf(regridder_weights_file)
+        print("regridder saved to file: ", filename)
+
+    return(regridder)
+
+def RegridLoop(ds_to_regrid, regridder):
+
+    # To Do: implement this with dask
+    print("\nRegridding")
+
+    # Loop through the variables one at a time to conserve memory
+    ds_varnames = list(ds_to_regrid.variables.keys())
+    varlen = len(ds_to_regrid.variables)
+    first_var = False
+    for i in range(varlen-1):
+
+        # Skip time variable
+        if (not "time" in ds_varnames[i]):
+
+            # Only regrid variables that match the lat/lon shape.
+            if (ds_to_regrid[ds_varnames[i]][0].shape == (ds_to_regrid.lat.shape[0], ds_to_regrid.lon.shape[0])):
+                print("regridding variable {}/{}: {}".format(i+1, varlen, ds_varnames[i]))
+
+                # For the first non-coordinate variable, copy and regrid the dataset as a whole.
+                # This makes sure to correctly include the lat/lon in the regridding.
+                if (not(first_var)):
+                    ds_regrid = ds_to_regrid[ds_varnames[i]].to_dataset() # convert data array to dataset
+                    ds_regrid = regridder(ds_regrid)
+                    first_var = True
+
+                # Once the first variable has been included, then we can regrid by variable
+                else:
+                    ds_regrid[ds_varnames[i]] = regridder(ds_to_regrid[ds_varnames[i]])
+            else:
+                print("skipping variable {}/{}: {}".format(i+1, varlen, ds_varnames[i]))
+        else:
+            print("skipping variable {}/{}: {}".format(i+1, varlen, ds_varnames[i]))
+
+    print("\n")
+    return(ds_regrid)
diff --git a/tools/landusedata/src/landusedata/utils.py b/tools/landusedata/src/landusedata/utils.py
new file mode 100644
index 0000000000..addb5076b1
--- /dev/null
+++ b/tools/landusedata/src/landusedata/utils.py
@@ -0,0 +1,65 @@
+import xarray as xr
+
+def ImportRegridTarget(filename):
+    dataset = xr.open_dataset(filename)
+
+    # Check the file type
+    dim_list = list(dataset.dims)
+    if ('lsmlat' in list(dataset.dims)) != True:
+        raise TypeError("incorrect file, must be surface dataset")
+
+    # Prepare the the regrid dataset
+    dataset = _RegridTargetPrep(dataset)
+
+    return dataset
+
+def _RegridTargetPrep(regrid_target):
+    """
+    Rename the CLM surface data file regrid target lat/lon dimensions
+
+    The CLM surface datasets use a dimensions name that is not recognizable
+    by xesmf.  As such, this function renames the dimensions.  It also adds
+    lat/lon coordinates based on the LONGXY and LATIXY variables.
+    """
+    regrid_target = regrid_target.rename_dims(dims_dict={'lsmlat':'lat','lsmlon':'lon'})
+    regrid_target['lon'] = regrid_target.LONGXY.isel(lat=0)
+    regrid_target['lat'] = regrid_target.LATIXY.isel(lon=0)
+
+    return regrid_target
+
+# Open the LUH2 static data file
+def ImportLUH2StaticFile(filename):
+    dataset = xr.open_dataset(filename)
+
+    # Check to see if the imported dataset has correct variables
+    listcheck = ['ptbio', 'fstnf', 'carea', 'icwtr', 'ccode', 'lat_bounds', 'lon_bounds']
+    if list(dataset.var()) != listcheck:
+        raise TypeError("incorrect file, must be LUH2 static file")
+
+    # Convert all data from single to double precision
+    dataset = dataset.astype('float64')
+    return dataset
+
+# Define the land/ocean mask based on the ice/water data
+# from the LUH2 static data set
+def DefineStaticMask(dataset):
+    try:
+        mask = xr.where(dataset.icwtr != 1, 1, 0)
+        # mask = (1.-dataset.icwtr) / (1.-dataset.icwtr)
+    except AttributeError:
+        raise AttributeError("incorrect dataset, must be static luh2 dataset")
+    else:
+        return mask
+
+# Surface dataset specific masking sub-function
+def SetMaskRegridTarget(dataset):
+    try:
+        dataset["mask"] = xr.where(dataset.PCT_NATVEG > 0, 1, 0)
+    except AttributeError:
+        raise AttributeError("incorrect dataset, must be CLM surface dataset")
+    else:
+        return(dataset)
+
+# TODO: add the follow common functions
+# - write to files
+# - read dataset into list of datasets
diff --git a/tools/landusedata/tests/conftest.py b/tools/landusedata/tests/conftest.py
new file mode 100644
index 0000000000..78d647af13
--- /dev/null
+++ b/tools/landusedata/tests/conftest.py
@@ -0,0 +1,44 @@
+import os
+import pytest
+import xarray as xr
+
+@pytest.fixture(scope="module")
+def target_file_location():
+    return 'tests/resources/surfdata_4x5.nc'
+
+@pytest.fixture(scope="module")
+def static_file_location():
+    return 'tests/resources/staticData_quarterdeg.nc'
+
+@pytest.fixture(scope="module")
+def landusepft_file_location():
+    return 'tests/resources/CLM5_current_luhforest_deg025.nc'
+
+@pytest.fixture(scope="module")
+def lupftsurf_file_location():
+    return 'tests/resources/CLM5_current_surf_deg025.nc'
+
+@pytest.fixture(scope="function")
+def static_dataset(static_file_location):
+    dataset = xr.open_dataset(static_file_location)
+    dataset = dataset.astype('float64')
+    return dataset
+
+@pytest.fixture(scope="function")
+def target_dataset(target_file_location):
+    dataset = xr.open_dataset(target_file_location)
+    dataset = dataset.astype('float64')
+    return dataset
+
+@pytest.fixture(scope="function")
+def landusepft_dataset(landusepft_file_location):
+    dataset = xr.open_dataset(landusepft_file_location)
+    dataset = dataset.astype('float64',casting='safe')
+    return dataset
+
+@pytest.fixture(scope="function")
+def mock_mask(static_file_location):
+    dataset = xr.open_dataset(static_file_location)
+    # This is duplicative of the DefineMask function.  Should it simply be called?
+    mask = (1.-dataset.icwtr) / (1.-dataset.icwtr)
+    return mask
diff --git a/tools/landusedata/tests/landusepft_test.py b/tools/landusedata/tests/landusepft_test.py
new file mode 100644
index 0000000000..ee08c4f47b
--- /dev/null
+++ b/tools/landusedata/tests/landusepft_test.py
@@ -0,0 +1,91 @@
+import pytest
+import numpy as np
+
+from landusedata import landusepftmod
+
+# Positive test case for importing landuse x pft data file
+# TODO: parameterize this call to run all the landusepft files
+def test_posImportLandusePFTFile(landusepft_file_location):
+    data = landusepftmod.ImportLandusePFTFile(landusepft_file_location)
+    landusepft_variables = list(data.var())
+    assert landusepft_variables == ['EDGEN',
+                                   'EDGEE',
+                                   'EDGES',
+                                   'EDGEW',
+                                   'LAT',
+                                   'LATIXY',
+                                   'LON',
+                                   'LONGXY',
+                                   'LANDMASK',
+                                   'LANDFRAC',
+                                   'AREA',
+                                   'PCT_NAT_PFT']
+
+# Negative test case for importing incorrect file via CLM5 landuse file open function
+def test_negImportLandusePFTFile(static_file_location):
+    with pytest.raises(TypeError) as exp:
+        landusepftmod.ImportLandusePFTFile(static_file_location)
+    assert str(exp.value) == "incorrect file, must be CLM5 landuse file"
+
+# Confirm that the data type has been converted upon import from single to double precision
+def test_pos_ImportLandusePFTFileDtypeConversion(landusepft_file_location):
+    dataset = landusepftmod.ImportLandusePFTFile(landusepft_file_location)
+    assert np.dtype('float64') == dataset['PCT_NAT_PFT'].dtype
+
+# Unit tests to update CLM5 landuse datasets with lat/lon coordinates
+def test_latlon_posAddLatLonCoordinates(landusepft_dataset):
+    dataset_modified = landusepftmod.AddLatLonCoordinates(landusepft_dataset)
+    assert 'lat' in list(dataset_modified.coords) and 'lon' in list(dataset_modified.coords)
+
+# Test that the math of the function makes sense.  We can sum that lat/lon variables
+# to makes sure that the update is correct
+def test_latlonsum_posAddLatLonCoordinates(landusepft_dataset):
+    dataset_modified = landusepftmod.AddLatLonCoordinates(landusepft_dataset)
+    assert (sum(abs(dataset_modified.lon)).values.item() == 129600.
+           and sum(abs(dataset_modified.lat)).values.item() == 32400.)
+
+# Define Mask function unit tests
+# - Is there a better way to simply test that the mask matches all gridcells?
+# - Should we test what happens when the wrong dataset is used (i.e. landuse x pft)?
+# - Should this test that the mask is truly one or zero everywhere?
+
+# Make sure that the generated mask makes sense in that a known gridcell is nan
+def test_posDefineMask(static_dataset):
+    maskoutput = landusepftmod.DefineMask(static_dataset)
+    assert np.isnan(maskoutput[0][0])
+
+# Make sure that the generated mask is equal to 1 where it isn't nan
+def test_pos_ones_DefineMask(static_dataset):
+    maskoutput = landusepftmod.DefineMask(static_dataset)
+    assert (maskoutput.to_dataframe().dropna() == 1.).all().values.item()
+
+# Make sure a known gridcell that should be land is not nan
+# TODO: change this to use sel indexing
+def test_neg_output_DefineMask(static_dataset):
+    maskoutput = landusepftmod.DefineMask(static_dataset)
+    assert not(np.isnan(maskoutput[360][850]))
+
+def test_neg_input_DefineMask(landusepft_dataset):
+    with pytest.raises(AttributeError) as exp:
+        maskoutput = landusepftmod.DefineMask(landusepft_dataset)
+    assert str(exp.value) == "incorrect dataset, must be static luh2 dataset"
+
+# Bare ground fraction removal unit test
+# Make sure that the pft fractions sum to 100% on a known land gridcell
+def test_posRenormalizePFTs(landusepft_dataset):
+    percent = landusepftmod.RenormalizePFTs(landusepft_dataset)
+
+    # Sum along the natpft dimension only.  Use min_count to avoid
+    # NaNs from masking turning into zeros
+    percent = percent.sum(dim='natpft',min_count=1)
+
+    # Convert to a dataframe to stack lat and lon in one dimension
+    # and drop the NaNs.  Note that since percent is unassociated
+    # with a dataset we need to pass the name argument when converting
+    # to a dataframe.
+    percent = percent.to_dataframe(name="testpercent").dropna(how='all')
+    # breakpoint()
+
+    # Check that all the summations are unity within a specific tolerance
+    tolerance = 3.0e-16
+    assert (abs(percent - 1.0) < tolerance).all().values.item()
diff --git a/tools/landusedata/tests/luh2_test.py b/tools/landusedata/tests/luh2_test.py
new file mode 100644
index 0000000000..a5da26ae81
--- /dev/null
+++ b/tools/landusedata/tests/luh2_test.py
@@ -0,0 +1,3 @@
+import pytest
+
+from landusedata import luh2mod
diff --git a/tools/landusedata/tests/main_test.py b/tools/landusedata/tests/main_test.py
new file mode 100644
index 0000000000..c43be66323
--- /dev/null
+++ b/tools/landusedata/tests/main_test.py
@@ -0,0 +1,30 @@
+import pytest
+from argparse import ArgumentError
+from argparse import ArgumentParser as parser
+
+from landusedata._main import main
+
+# TODO: parameterize this
+def test_main(capsys):
+    """
+    Postive case testing for choice input
+    Note that the regridfile is a global argument to all landusedata subcommands
+    and thus must come last.
+    """
+    statefile = 'statefile'
+    regridfile = 'regridfile'
+    main(['luh2',statefile,regridfile])
+    out, err = capsys.readouterr()
+    assert out == 'calling luh2 code: {},{}\n'.format(regridfile,statefile)
+    assert err == ''
+
+def test_main_neg(capsys):
+    """
+    Negative case testing for choice input
+
+    This only tests the system exit. Testing the ArgumentError
+    is a little tricky to catch as it is upstream of SystemExit.
+    """
+    with pytest.raises(SystemExit) as exp:
+        main(['notallowed'])
+    assert str(exp.value) == "2"
diff --git a/tools/landusedata/tests/resources/README.md b/tools/landusedata/tests/resources/README.md
new file mode 100644
index 0000000000..6bb3681a1e
--- /dev/null
+++ b/tools/landusedata/tests/resources/README.md
@@ -0,0 +1,2 @@
+This resources directory is used to temporarily store 
+mock data for unit testing during test development
diff --git a/tools/landusedata/tests/utils_test.py b/tools/landusedata/tests/utils_test.py
new file mode 100644
index 0000000000..1e6130a222
--- /dev/null
+++ b/tools/landusedata/tests/utils_test.py
@@ -0,0 +1,47 @@
+import pytest
+
+from landusedata import utils
+
+def test_RegridTargetPrep_dims(target_dataset):
+    target_dataset = utils._RegridTargetPrep(target_dataset)
+    dim_list = list(target_dataset.dims)
+    latlon_list = ['lat','lon']
+    test_list = [dim for latlon in latlon_list for dim in dim_list
+                 if latlon in dim]
+    assert latlon_list == test_list
+
+def test_RegridTargetPrep_coords(target_dataset):
+    target_dataset = utils._RegridTargetPrep(target_dataset)
+    coords_list = list(target_dataset.coords)
+    latlon_list = ['lat','lon']
+    test_list = [coord for latlon in latlon_list for coord in coords_list
+                 if latlon in coord]
+    assert latlon_list == test_list
+
+def test_negImportRegridTarget(landusepft_file_location):
+    with pytest.raises(TypeError) as exp:
+        utils.ImportRegridTarget(landusepft_file_location)
+    assert str(exp.value) == "incorrect file, must be surface dataset"
+
+def test_negSetMaskRegridTarget(landusepft_dataset):
+    with pytest.raises(AttributeError) as exp:
+        maskoutput = utils.SetMaskRegridTarget(landusepft_dataset)
+    assert str(exp.value) == "incorrect dataset, must be CLM surface dataset"
+
+# Postive test case for importing LUH2 static data file
+def test_posImportLUH2StaticFile(static_file_location):
+    data = utils.ImportLUH2StaticFile(static_file_location)
+    static_variables = list(data.var())
+    assert static_variables == ['ptbio',
+                                'fstnf',
+                                'carea',
+                                'icwtr',
+                                'ccode',
+                                'lat_bounds',
+                                'lon_bounds']
+
+# Negative test case for importing incorrect file via static luh2 file open function
+def test_negImportLUH2StaticFile(landusepft_file_location):
+    with pytest.raises(TypeError) as exp:
+        utils.ImportLUH2StaticFile(landusepft_file_location)
+    assert str(exp.value) == "incorrect file, must be LUH2 static file"