-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(ingest): support stdin in datahub put
#9359
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
from expandvars import UnboundVariable, expandvars | ||
|
||
from datahub.configuration.common import ConfigurationError, ConfigurationMechanism | ||
from datahub.configuration.json_loader import JsonConfigurationMechanism | ||
from datahub.configuration.toml import TomlConfigurationMechanism | ||
from datahub.configuration.yaml import YamlConfigurationMechanism | ||
|
||
|
@@ -100,33 +101,42 @@ def load_config_file( | |
squirrel_original_config: bool = False, | ||
squirrel_field: str = "__orig_config", | ||
allow_stdin: bool = False, | ||
resolve_env_vars: bool = True, | ||
process_directives: bool = True, | ||
allow_remote: bool = True, # TODO: Change the default to False. | ||
resolve_env_vars: bool = True, # TODO: Change the default to False. | ||
Comment on lines
+104
to
+105
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's blocking us from doing this now? Especially There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need to tweak datahub-actions to explicitly set resolve_env_vars=True before we can change the default there we could change it for allow_remote right now, but didn't want to have too many unrelated changes in this PR right now |
||
process_directives: bool = False, | ||
) -> dict: | ||
config_mech: ConfigurationMechanism | ||
if allow_stdin and config_file == "-": | ||
# If we're reading from stdin, we assume that the input is a YAML file. | ||
# Note that YAML is a superset of JSON, so this will also read JSON files. | ||
config_mech = YamlConfigurationMechanism() | ||
raw_config_file = sys.stdin.read() | ||
else: | ||
config_file_path = pathlib.Path(config_file) | ||
if config_file_path.suffix in {".yaml", ".yml"}: | ||
config_mech = YamlConfigurationMechanism() | ||
elif config_file_path.suffix == ".json": | ||
config_mech = JsonConfigurationMechanism() | ||
elif config_file_path.suffix == ".toml": | ||
config_mech = TomlConfigurationMechanism() | ||
else: | ||
raise ConfigurationError( | ||
f"Only .toml and .yml are supported. Cannot process file type {config_file_path.suffix}" | ||
f"Only .toml, .yml, and .json are supported. Cannot process file type {config_file_path.suffix}" | ||
) | ||
|
||
url_parsed = parse.urlparse(str(config_file)) | ||
if url_parsed.scheme in ("http", "https"): # URLs will return http/https | ||
if allow_remote and url_parsed.scheme in ( | ||
"http", | ||
"https", | ||
): # URLs will return http/https | ||
# If the URL is remote, we need to fetch it. | ||
try: | ||
response = requests.get(str(config_file)) | ||
raw_config_file = response.text | ||
except Exception as e: | ||
raise ConfigurationError( | ||
f"Cannot read remote file {config_file_path}, error:{e}" | ||
) | ||
f"Cannot read remote file {config_file_path}: {e}" | ||
) from e | ||
else: | ||
if not config_file_path.is_file(): | ||
raise ConfigurationError(f"Cannot open config file {config_file_path}") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import json | ||
from typing import IO | ||
|
||
from datahub.configuration import ConfigurationMechanism | ||
|
||
|
||
class JsonConfigurationMechanism(ConfigurationMechanism): | ||
"""Ability to load configuration from json files""" | ||
|
||
def load_config(self, config_fp: IO) -> dict: | ||
return json.load(config_fp) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very nice