问题描述
尝试在 python 中从谷歌云实现文档 OCR 时出现此错误,如下所述:https://cloud.google.com/document-ai/docs/ocr
当我跑步时
result = client.process_document(request=request)
我收到此错误
Traceback (most recent call last):
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/grpc_helpers.py",line 73,in error_remapped_callable
return callable_(*args,**kwargs)
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/grpc/_channel.py",line 923,in __call__
return _end_unary_response_blocking(state,call,False,None)
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/grpc/_channel.py",line 826,in _end_unary_response_blocking
raise _InactiveRpcError(state)
grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that terminated with:
status = StatusCode.INVALID_ARGUMENT
details = "Request contains an invalid argument."
debug_error_string = "{"created":"@1614769280.332675000","description":"Error received from peer ipv4:142.250.180.138:443","file":"src/core/lib/surface/call.cc","file_line":1068,"grpc_message":"Request contains an invalid argument.","grpc_status":3}"
>
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<input>",line 1,in <module>
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/cloud/documentai_v1beta3/services/document_processor_service/client.py",line 327,in process_document
response = rpc(request,retry=retry,timeout=timeout,Metadata=Metadata,)
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/gapic_v1/method.py",line 145,in __call__
return wrapped_func(*args,**kwargs)
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/retry.py",line 281,in retry_wrapped_func
return retry_target(
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/retry.py",line 184,in retry_target
return target()
File "/Users/Niolo/Desktop/untitled/Desktop/lib/python3.8/site-packages/google/api_core/grpc_helpers.py",line 75,in error_remapped_callable
six.raise_from(exceptions.from_grpc_error(exc),exc)
File "<string>",line 3,in raise_from
google.api_core.exceptions.InvalidArgument: 400 Request contains an invalid argument.
我的完整代码:
import os
# Import the base64 encoding library.
project_id= 'your-project-id'
location = 'eu' # Format is 'us' or 'eu'
processor_id = 'your-processor-id' # Create processor in Cloud Console
file_path = '/file_path/invoice.pdf'
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/full_path/your_credentials.json"
def process_document_sample(
project_id: str,location: str,processor_id: str,file_path: str
):
from google.cloud import documentai_v1beta3 as documentai
# Instantiates a client
client = documentai.DocumentProcessorServiceClient()
# The full resource name of the processor,e.g.:
# projects/project-id/locations/location/processor/processor-id
# You must create new processors in the Cloud Console first
name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"
with open(file_path,"rb") as image:
image_content = image.read()
# Read the file into memory
document = {"content": image_content,"mime_type": "application/pdf"}
# Configure the process request
request = {"name": name,"document": document}
# Recognizes text entities in the PDF document
result = client.process_document(request=request)
document = result.document
print("Document processing complete.")
# For a full list of Document object attributes,please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document
document_pages = document.pages
# Read the text recognition output from the processor
print("The document contains the following paragraphs:")
for page in document_pages:
paragraphs = page.paragraphs
for paragraph in paragraphs:
paragraph_text = get_text(paragraph.layout,document)
print(f"Paragraph text: {paragraph_text}")
解决方法
client = documentai.DocumentProcessorServiceClient()
默认指向美国终点。
in: client = documentai.DocumentProcessorServiceClient()
in: print(client.DEFAULT_ENDPOINT)
out: us-documentai.googleapis.com
您需要将 api_endpoint 覆盖为 EU 才能使其工作。
from google.api_core.client_options import ClientOptions
# Set endpoint to EU
options = ClientOptions(api_endpoint="eu-documentai.googleapis.com:443")
# Instantiates a client
client = documentai.DocumentProcessorServiceClient(client_options=options)
完整代码如下:
import os
# TODO(developer): Uncomment these variables before running the sample.
project_id= 'your-project-id'
location = 'eu' # Format is 'us' or 'eu'
processor_id = 'your-processor-id' # Create processor in Cloud Console
file_path = '/file_path/invoice.pdf'
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/full_path/your_credentials.json"
def process_document_sample(
project_id: str,location: str,processor_id: str,file_path: str
):
from google.cloud import documentai_v1beta3 as documentai
from google.api_core.client_options import ClientOptions
# Set endpoint to EU
options = ClientOptions(api_endpoint="eu-documentai.googleapis.com:443")
# Instantiates a client
client = documentai.DocumentProcessorServiceClient(client_options=options)
# The full resource name of the processor,e.g.:
# projects/project-id/locations/location/processor/processor-id
# You must create new processors in the Cloud Console first
name = f"projects/{project_id}/locations/{location}/processors/{processor_id}"
with open(file_path,"rb") as image:
image_content = image.read()
# Read the file into memory
document = {"content": image_content,"mime_type": "application/pdf"}
# Configure the process request
request = {"name": name,"document": document}
# Recognizes text entities in the PDF document
result = client.process_document(request=request)
document = result.document
print("Document processing complete.")
# For a full list of Document object attributes,please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document
document_pages = document.pages
# Read the text recognition output from the processor
print("The document contains the following paragraphs:")
for page in document_pages:
paragraphs = page.paragraphs
for paragraph in paragraphs:
paragraph_text = get_text(paragraph.layout,document)
print(f"Paragraph text: {paragraph_text}")