Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Document.from_batch_process_operation() fails on running operation #285

Open
holtskinner opened this issue Mar 14, 2024 · 2 comments
Open

Comments

@holtskinner
Copy link
Member

Output from a Jupyter Notebook

{
	"name": "TypeError",
	"message": "'NoneType' object is not iterable",
	"stack": "---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[29], line 1
----> 1 wrapped_documents = Document.from_batch_process_operation(location=\"us\", operation_name=operation.operation.name)

File /opt/homebrew/lib/python3.11/site-packages/google/cloud/documentai_toolbox/wrappers/document.py:623, in Document.from_batch_process_operation(cls, location, operation_name, timeout)
    586 @classmethod
    587 def from_batch_process_operation(
    588     cls: Type[\"Document\"],
   (...)
    591     timeout: Optional[float] = None,
    592 ) -> List[\"Document\"]:
    593     r\"\"\"Loads Documents from Cloud Storage, using the operation name returned from `batch_process_documents()`.
    594 
    595         .. code-block:: python
   (...)
    620             A list of wrapped documents from gcs. Each document corresponds to an input file.
    621     \"\"\"
    622     return cls.from_batch_process_metadata(
--> 623         metadata=_get_batch_process_metadata(
    624             operation_name=operation_name,
    625             location=location,
    626             timeout=timeout,
    627         )
    628     )

File /opt/homebrew/lib/python3.11/site-packages/google/cloud/documentai_toolbox/wrappers/document.py:190, in _get_batch_process_metadata(operation_name, location, timeout)
    182 # Poll Operation until complete.
    183 operation = operation_from_gapic(
    184     operation=client.get_operation(
    185         request=GetOperationRequest(name=operation_name),
   (...)
    188     result_type=documentai.BatchProcessResponse,
    189 )
--> 190 operation.result(timeout=timeout)
    192 operation_pb = operation.operation
    194 # Get Operation metadata.

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/future/polling.py:256, in PollingFuture.result(self, timeout, retry, polling)
    144 def result(self, timeout=_DEFAULT_VALUE, retry=None, polling=None):
    145     \"\"\"Get the result of the operation.
    146 
    147     This method will poll for operation status periodically, blocking if
   (...)
    253             the timeout is reached before the operation completes.
    254     \"\"\"
--> 256     self._blocking_poll(timeout=timeout, retry=retry, polling=polling)
    258     if self._exception is not None:
    259         # pylint: disable=raising-bad-type
    260         # Pylint doesn't recognize that this is valid in this case.
    261         raise self._exception

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/future/polling.py:137, in PollingFuture._blocking_poll(self, timeout, retry, polling)
    134     polling = polling.with_timeout(timeout)
    136 try:
--> 137     polling(self._done_or_raise)(retry=retry)
    138 except exceptions.RetryError:
    139     raise concurrent.futures.TimeoutError(
    140         f\"Operation did not complete within the designated timeout of \"
    141         f\"{polling.timeout} seconds.\"
    142     )

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/retry_unary.py:293, in Retry.__call__.<locals>.retry_wrapped_func(*args, **kwargs)
    289 target = functools.partial(func, *args, **kwargs)
    290 sleep_generator = exponential_sleep_generator(
    291     self._initial, self._maximum, multiplier=self._multiplier
    292 )
--> 293 return retry_target(
    294     target,
    295     self._predicate,
    296     sleep_generator,
    297     timeout=self._timeout,
    298     on_error=on_error,
    299 )

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/retry_unary.py:153, in retry_target(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)
    149 # pylint: disable=broad-except
    150 # This function explicitly must deal with broad exceptions.
    151 except Exception as exc:
    152     # defer to shared logic for handling errors
--> 153     _retry_error_helper(
    154         exc,
    155         deadline,
    156         sleep,
    157         error_list,
    158         predicate,
    159         on_error,
    160         exception_factory,
    161         timeout,
    162     )
    163     # if exception not raised, sleep before next attempt
    164     time.sleep(sleep)

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/retry_base.py:212, in _retry_error_helper(exc, deadline, next_sleep, error_list, predicate_fn, on_error_fn, exc_factory_fn, original_timeout)
    206 if not predicate_fn(exc):
    207     final_exc, source_exc = exc_factory_fn(
    208         error_list,
    209         RetryFailureReason.NON_RETRYABLE_ERROR,
    210         original_timeout,
    211     )
--> 212     raise final_exc from source_exc
    213 if on_error_fn is not None:
    214     on_error_fn(exc)

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/retry/retry_unary.py:144, in retry_target(target, predicate, sleep_generator, timeout, on_error, exception_factory, **kwargs)
    142 for sleep in sleep_generator:
    143     try:
--> 144         result = target()
    145         if inspect.isawaitable(result):
    146             warnings.warn(_ASYNC_RETRY_WARNING)

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/future/polling.py:119, in PollingFuture._done_or_raise(self, retry)
    117 def _done_or_raise(self, retry=None):
    118     \"\"\"Check if the future is done and raise if it's not.\"\"\"
--> 119     if not self.done(retry=retry):
    120         raise _OperationNotComplete()

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/operation.py:174, in Operation.done(self, retry)
    165 def done(self, retry=None):
    166     \"\"\"Checks to see if the operation is complete.
    167 
    168     Args:
   (...)
    172         bool: True if the operation is complete, False otherwise.
    173     \"\"\"
--> 174     self._refresh_and_update(retry)
    175     return self._operation.done

File /opt/homebrew/lib/python3.11/site-packages/google/api_core/operation.py:162, in Operation._refresh_and_update(self, retry)
    159 # If the currently cached operation is done, no need to make another
    160 # RPC as it will not change once done.
    161 if not self._operation.done:
--> 162     self._operation = self._refresh(retry=retry) if retry else self._refresh()
    163     self._set_result_from_operation()

File /opt/homebrew/lib/python3.11/site-packages/google/cloud/documentai_v1/services/document_processor_service/client.py:3572, in DocumentProcessorServiceClient.get_operation(self, request, retry, timeout, metadata)
   3564 rpc = gapic_v1.method.wrap_method(
   3565     self._transport.get_operation,
   3566     default_timeout=None,
   3567     client_info=DEFAULT_CLIENT_INFO,
   3568 )
   3570 # Certain fields should be provided within the metadata header;
   3571 # add these here.
-> 3572 metadata = tuple(metadata) + (
   3573     gapic_v1.routing_header.to_grpc_metadata(((\"name\", request.name),)),
   3574 )
   3576 # Validate the universe domain.
   3577 self._validate_universe_domain()

TypeError: 'NoneType' object is not iterable"
}
@spencerbraun
Copy link

Hi, wondering if there is any update or fix here?

@holtskinner
Copy link
Member Author

holtskinner commented Jul 2, 2024

Workaround for now, use Document.from_batch_process_metadata() instead

from google.cloud import documentai
from google.cloud.documentai_toolbox import document

operation = client.batch_process_documents(request)
operation.result(timeout=timeout)
metadata = documentai.BatchProcessMetadata(operation.metadata)
wrapped_documents = document.Document.from_batch_process_metadata(metadata)

holtskinner added a commit to GoogleCloudPlatform/document-ai-samples that referenced this issue Jul 2, 2024
holtskinner added a commit to GoogleCloudPlatform/document-ai-samples that referenced this issue Jul 2, 2024
…a()` (#849)

- Workaround for
googleapis/python-documentai-toolbox#285

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants