Skip to content

Commit

Permalink
feat(ingest/glue): add catalog alias to the urn
Browse files Browse the repository at this point in the history
  • Loading branch information
maiarareinaldo committed Mar 4, 2024
1 parent 290ff47 commit 9b8933b
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@


DEFAULT_PLATFORM = "glue"
AWS_DATA_CATALOG = "awsdatacatalog"
VALID_PLATFORMS = [DEFAULT_PLATFORM, "athena"]


Expand Down Expand Up @@ -161,6 +162,10 @@ class GlueSourceConfig(
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field(
default=None, description=""
)
catalog_alias: str = Field(
default=AWS_DATA_CATALOG,
description="The catalog alias to be used in the dataset URN.",
)

def is_profiling_enabled(self) -> bool:
return self.profiling is not None and is_profiling_enabled(
Expand Down Expand Up @@ -424,7 +429,7 @@ def process_dataflow_node(
# we know that the table will already be covered when ingesting Glue tables
node_urn = make_dataset_urn_with_platform_instance(
platform=self.platform,
name=full_table_name,
name=f"{self.source_config.catalog_alias}.{full_table_name}",
env=self.env,
platform_instance=self.source_config.platform_instance,
)
Expand Down Expand Up @@ -953,7 +958,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:

dataset_urn = make_dataset_urn_with_platform_instance(
platform=self.platform,
name=full_table_name,
name=f"{self.source_config.catalog_alias}.{full_table_name}",
env=self.env,
platform_instance=self.source_config.platform_instance,
)
Expand Down

0 comments on commit 9b8933b

Please sign in to comment.