Skip to content

Commit

Permalink
Bigquery fixes (#32780)
Browse files Browse the repository at this point in the history
* Bigquery fixes

* Remove unnecessary comprehension loop

---------

Co-authored-by: Claude <cvandermerwe@google.com>
  • Loading branch information
claudevdm and Claude authored Oct 16, 2024
1 parent 7d0bfd0 commit 6b3a1b2
Show file tree
Hide file tree
Showing 2 changed files with 199 additions and 120 deletions.
19 changes: 14 additions & 5 deletions sdks/python/apache_beam/transforms/enrichment_handlers/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,14 @@ def _execute_query(self, query: str):
except RuntimeError as e:
raise RuntimeError(f"Could not complete the query request: {query}. {e}")

def create_row_key(self, row: beam.Row):
if self.condition_value_fn:
return tuple(self.condition_value_fn(row))
if self.fields:
row_dict = row._asdict()
return (tuple(row_dict[field] for field in self.fields))
raise ValueError("Either fields or condition_value_fn must be specified")

def __call__(self, request: Union[beam.Row, List[beam.Row]], *args, **kwargs):
if isinstance(request, List):
values = []
Expand All @@ -180,7 +188,7 @@ def __call__(self, request: Union[beam.Row, List[beam.Row]], *args, **kwargs):
raw_query = self.query_template
if batch_size > 1:
batched_condition_template = ' or '.join(
[self.row_restriction_template] * batch_size)
[fr'({self.row_restriction_template})'] * batch_size)
raw_query = self.query_template.replace(
self.row_restriction_template, batched_condition_template)
for req in request:
Expand All @@ -194,14 +202,15 @@ def __call__(self, request: Union[beam.Row, List[beam.Row]], *args, **kwargs):
"Make sure the values passed in `fields` are the "
"keys in the input `beam.Row`." + str(e))
values.extend(current_values)
requests_map.update((val, req) for val in current_values)
requests_map[self.create_row_key(req)] = req
query = raw_query.format(*values)

responses_dict = self._execute_query(query)
for response in responses_dict:
for value in response.values():
if value in requests_map:
responses.append((requests_map[value], beam.Row(**response)))
response_row = beam.Row(**response)
response_key = self.create_row_key(response_row)
if response_key in requests_map:
responses.append((requests_map[response_key], response_row))
return responses
else:
request_dict = request._asdict()
Expand Down
Loading

0 comments on commit 6b3a1b2

Please sign in to comment.