Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MTL Text representation parsing #58

Merged
merged 8 commits into from
Aug 21, 2023
Merged
69 changes: 67 additions & 2 deletions src/stactools/landsat/mtl_metadata.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from collections import defaultdict
from datetime import datetime
from typing import Any, Dict, List, Optional
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast

from lxml import etree
from lxml.etree import _Element as lxmlElement
from pyproj import Geod
from pystac.utils import map_opt, str_to_datetime
from stactools.core.io import ReadHrefModifier
from stactools.core.io import ReadHrefModifier, read_text
from stactools.core.io.xml import XmlElement
from stactools.core.projection import transform_from_bbox

Expand All @@ -13,6 +15,9 @@ class MTLError(Exception):
pass


MTLGroup = Dict[str, Union[str, "MTLGroup"]]


class MtlMetadata:
"""Parses a Collection 2 MTL XML file.

Expand Down Expand Up @@ -322,3 +327,63 @@ def from_file(
XmlElement.from_file(href, read_href_modifier),
href=href,
)

@classmethod
def from_text_file(
cls,
href: str,
read_href_modifier: Optional[ReadHrefModifier] = None,
) -> "MtlMetadata":
text = read_text(href, read_href_modifier)
lines = iter(text.split("\n"))
mtl = _parse_mtl_group(lines)
root_name, root_group = next(iter(mtl.items()))
return cls(
XmlElement(_mtl_group_to_element(root_name, cast(MTLGroup, root_group))),
href=href,
)


def _parse_mtl_group(lines: Iterator[str]) -> MTLGroup:
group: MTLGroup = {}
for line in lines:
value: Union[str, MTLGroup]
key, value = _parse_mtl_line(line)
if not key or key in ("END", "END_GROUP"):
break
elif key == "GROUP":
key = value
value = _parse_mtl_group(lines)
group[key] = value
return group


def _parse_mtl_line(line: str) -> Tuple[str, str]:
line = line.strip()
if not line or line == "END":
return ("END", "")

key, _, value = line.partition(" = ")
if value.startswith('"') and value.endswith('"'):
value = value[1:-1]

return key, value


def _mtl_value_element(tag: str, value: str) -> lxmlElement:
element: lxmlElement = etree.Element(tag)
element.text = value
return element


def _mtl_group_to_element(name: str, group: MTLGroup) -> lxmlElement:
element: lxmlElement = etree.Element(name)
element.extend(
[
_mtl_group_to_element(child_name, child)
if isinstance(child, dict)
else _mtl_value_element(child_name, child)
for child_name, child in group.items()
]
)
return element
71 changes: 69 additions & 2 deletions src/stactools/landsat/stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,73 @@ def create_item(
Item: A STAC Item representing the Landsat scene.
"""
base_href = "_".join(mtl_xml_href.split("_")[:-1])
return create_item_from_mtl_metadata(
base_href,
MtlMetadata.from_file(mtl_xml_href, read_href_modifier),
use_usgs_geometry,
antimeridian_strategy,
read_href_modifier,
)


def create_item_from_mtl_text(
mtl_text_href: str,
use_usgs_geometry: bool = True,
antimeridian_strategy: Strategy = Strategy.SPLIT,
read_href_modifier: Optional[ReadHrefModifier] = None,
) -> Item:
"""Creates a STAC Item for Landsat 1-5 Collection 2 Level-1 or Landsat
4-5, 7-9 Collection 2 Level-2 scene data.

Args:
mtl_text_href (str): An href to an MTL text metadata file.
use_usgs_geometry (bool): Use the geometry from a USGS STAC file that is
stored alongside the text metadata file or pulled from the USGS STAC
API.
antimeridian_strategy (Antimeridian): Either split on -180 or
normalize geometries so all longitudes are either positive or
negative.
read_href_modifier (Callable[[str], str]): An optional function to
modify the MTL and USGS STAC hrefs (e.g., to add a token to a url).

mtl_metadata = MtlMetadata.from_file(mtl_xml_href, read_href_modifier)
Returns:
Item: A STAC Item representing the Landsat scene.
"""
base_href = "_".join(mtl_text_href.split("_")[:-1])
return create_item_from_mtl_metadata(
base_href,
MtlMetadata.from_text_file(mtl_text_href, read_href_modifier),
use_usgs_geometry,
antimeridian_strategy,
read_href_modifier,
)


def create_item_from_mtl_metadata(
base_href: str,
mtl_metadata: MtlMetadata,
use_usgs_geometry: bool = True,
antimeridian_strategy: Strategy = Strategy.SPLIT,
read_href_modifier: Optional[ReadHrefModifier] = None,
) -> Item:
"""Creates a STAC Item for Landsat 1-5 Collection 2 Level-1 or Landsat
4-5, 7-9 Collection 2 Level-2 scene data.

Args:
base_href (str):
mtl_metadata (MtlMetadata): The parsed MTL metadata.
use_usgs_geometry (bool): Use the geometry from a USGS STAC file that is
stored alongside the XML metadata file or pulled from the USGS STAC
API.
antimeridian_strategy (Antimeridian): Either split on -180 or
normalize geometries so all longitudes are either positive or
negative.
read_href_modifier (Callable[[str], str]): An optional function to
modify the MTL and USGS STAC hrefs (e.g., to add a token to a url).

Returns:
Item: A STAC Item representing the Landsat scene.
"""
sensor = Sensor(mtl_metadata.item_id[1])
satellite = int(mtl_metadata.item_id[2:4])
level = int(mtl_metadata.item_id[6])
Expand Down Expand Up @@ -163,7 +227,10 @@ def create_item(
projection.shape = mtl_metadata.sr_shape
projection.transform = mtl_metadata.sr_transform
centroid = shapely.geometry.shape(item.geometry).centroid
projection.centroid = {"lat": round(centroid.y, 5), "lon": round(centroid.x, 5)}
projection.centroid = {
"lat": round(centroid.y, 5),
"lon": round(centroid.x, 5),
}

item.stac_extensions.append(LANDSAT_EXTENSION_SCHEMA)
item.properties.update(**mtl_metadata.landsat_metadata)
Expand Down
Loading
Loading