Skip to content

Commit

Permalink
MTL Text representation parsing (#58)
Browse files Browse the repository at this point in the history
* Adding facilities to parse MTL metadata from MTL text format

* Logging parsing error

* Adding tests and data files for MTL text parsing

* Fixing MTL text parsing

* Splitting off MTL text based creation from the `create_item` function

* Fixing typing issues in MTL text parsing
Fixed formatting

* Fixing test to parse from MTL text

---------

Co-authored-by: Pete Gadomski <pete.gadomski@gmail.com>
  • Loading branch information
constantinius and gadomski authored Aug 21, 2023
1 parent 05ffe3a commit b842e9c
Show file tree
Hide file tree
Showing 6 changed files with 1,178 additions and 5 deletions.
69 changes: 67 additions & 2 deletions src/stactools/landsat/mtl_metadata.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from collections import defaultdict
from datetime import datetime
from typing import Any, Dict, List, Optional
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast

from lxml import etree
from lxml.etree import _Element as lxmlElement
from pyproj import Geod
from pystac.utils import map_opt, str_to_datetime
from stactools.core.io import ReadHrefModifier
from stactools.core.io import ReadHrefModifier, read_text
from stactools.core.io.xml import XmlElement
from stactools.core.projection import transform_from_bbox

Expand All @@ -13,6 +15,9 @@ class MTLError(Exception):
pass


MTLGroup = Dict[str, Union[str, "MTLGroup"]]


class MtlMetadata:
"""Parses a Collection 2 MTL XML file.
Expand Down Expand Up @@ -322,3 +327,63 @@ def from_file(
XmlElement.from_file(href, read_href_modifier),
href=href,
)

@classmethod
def from_text_file(
cls,
href: str,
read_href_modifier: Optional[ReadHrefModifier] = None,
) -> "MtlMetadata":
text = read_text(href, read_href_modifier)
lines = iter(text.split("\n"))
mtl = _parse_mtl_group(lines)
root_name, root_group = next(iter(mtl.items()))
return cls(
XmlElement(_mtl_group_to_element(root_name, cast(MTLGroup, root_group))),
href=href,
)


def _parse_mtl_group(lines: Iterator[str]) -> MTLGroup:
group: MTLGroup = {}
for line in lines:
value: Union[str, MTLGroup]
key, value = _parse_mtl_line(line)
if not key or key in ("END", "END_GROUP"):
break
elif key == "GROUP":
key = value
value = _parse_mtl_group(lines)
group[key] = value
return group


def _parse_mtl_line(line: str) -> Tuple[str, str]:
line = line.strip()
if not line or line == "END":
return ("END", "")

key, _, value = line.partition(" = ")
if value.startswith('"') and value.endswith('"'):
value = value[1:-1]

return key, value


def _mtl_value_element(tag: str, value: str) -> lxmlElement:
element: lxmlElement = etree.Element(tag)
element.text = value
return element


def _mtl_group_to_element(name: str, group: MTLGroup) -> lxmlElement:
element: lxmlElement = etree.Element(name)
element.extend(
[
_mtl_group_to_element(child_name, child)
if isinstance(child, dict)
else _mtl_value_element(child_name, child)
for child_name, child in group.items()
]
)
return element
71 changes: 69 additions & 2 deletions src/stactools/landsat/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,73 @@ def create_item(
Item: A STAC Item representing the Landsat scene.
"""
base_href = "_".join(mtl_xml_href.split("_")[:-1])
return create_item_from_mtl_metadata(
base_href,
MtlMetadata.from_file(mtl_xml_href, read_href_modifier),
use_usgs_geometry,
antimeridian_strategy,
read_href_modifier,
)


def create_item_from_mtl_text(
mtl_text_href: str,
use_usgs_geometry: bool = True,
antimeridian_strategy: Strategy = Strategy.SPLIT,
read_href_modifier: Optional[ReadHrefModifier] = None,
) -> Item:
"""Creates a STAC Item for Landsat 1-5 Collection 2 Level-1 or Landsat
4-5, 7-9 Collection 2 Level-2 scene data.
Args:
mtl_text_href (str): An href to an MTL text metadata file.
use_usgs_geometry (bool): Use the geometry from a USGS STAC file that is
stored alongside the text metadata file or pulled from the USGS STAC
API.
antimeridian_strategy (Antimeridian): Either split on -180 or
normalize geometries so all longitudes are either positive or
negative.
read_href_modifier (Callable[[str], str]): An optional function to
modify the MTL and USGS STAC hrefs (e.g., to add a token to a url).
mtl_metadata = MtlMetadata.from_file(mtl_xml_href, read_href_modifier)
Returns:
Item: A STAC Item representing the Landsat scene.
"""
base_href = "_".join(mtl_text_href.split("_")[:-1])
return create_item_from_mtl_metadata(
base_href,
MtlMetadata.from_text_file(mtl_text_href, read_href_modifier),
use_usgs_geometry,
antimeridian_strategy,
read_href_modifier,
)


def create_item_from_mtl_metadata(
base_href: str,
mtl_metadata: MtlMetadata,
use_usgs_geometry: bool = True,
antimeridian_strategy: Strategy = Strategy.SPLIT,
read_href_modifier: Optional[ReadHrefModifier] = None,
) -> Item:
"""Creates a STAC Item for Landsat 1-5 Collection 2 Level-1 or Landsat
4-5, 7-9 Collection 2 Level-2 scene data.
Args:
base_href (str):
mtl_metadata (MtlMetadata): The parsed MTL metadata.
use_usgs_geometry (bool): Use the geometry from a USGS STAC file that is
stored alongside the XML metadata file or pulled from the USGS STAC
API.
antimeridian_strategy (Antimeridian): Either split on -180 or
normalize geometries so all longitudes are either positive or
negative.
read_href_modifier (Callable[[str], str]): An optional function to
modify the MTL and USGS STAC hrefs (e.g., to add a token to a url).
Returns:
Item: A STAC Item representing the Landsat scene.
"""
sensor = Sensor(mtl_metadata.item_id[1])
satellite = int(mtl_metadata.item_id[2:4])
level = int(mtl_metadata.item_id[6])
Expand Down Expand Up @@ -163,7 +227,10 @@ def create_item(
projection.shape = mtl_metadata.sr_shape
projection.transform = mtl_metadata.sr_transform
centroid = shapely.geometry.shape(item.geometry).centroid
projection.centroid = {"lat": round(centroid.y, 5), "lon": round(centroid.x, 5)}
projection.centroid = {
"lat": round(centroid.y, 5),
"lon": round(centroid.x, 5),
}

item.stac_extensions.append(LANDSAT_EXTENSION_SCHEMA)
item.properties.update(**mtl_metadata.landsat_metadata)
Expand Down
Loading

0 comments on commit b842e9c

Please sign in to comment.