# google_docai.py import json import os from typing import Optional from google.api_core.client_options import ClientOptions from google.cloud import documentai if not os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): # 이미 설정되어 있지 않다면 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ( "/home/jackjack/test/doc_ai/workspace/drawingpdfocr-461103-2441e0b34216.json" # 이 경로가 API 서버 실행 시점에서 유효해야 함 ) def process_document_from_content( # 함수 이름 및 파라미터 변경 project_id: str, location: str, processor_id: str, file_content: bytes, # file_path 대신 file_content (bytes) mime_type: str, field_mask: Optional[str] = None, processor_version_id: Optional[str] = None, ) -> documentai.Document: opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") client = documentai.DocumentProcessorServiceClient(client_options=opts) if processor_version_id: name = client.processor_version_path( project_id, location, processor_id, processor_version_id ) else: name = client.processor_path(project_id, location, processor_id) # 파일 읽기 부분이 사라지고, file_content를 직접 사용 raw_document = documentai.RawDocument(content=file_content, mime_type=mime_type) # 예시: 첫 페이지만 처리 (필요에 따라 수정) process_options = documentai.ProcessOptions( individual_page_selector=documentai.ProcessOptions.IndividualPageSelector( pages=[1] ) ) request = documentai.ProcessRequest( name=name, raw_document=raw_document, field_mask=field_mask, process_options=process_options, ) result = client.process_document(request=request) document = result.document return document def extract_and_convert_to_json( document: documentai.Document, ) -> str: extracted_entities = [] if document and document.entities: for entity in document.entities: if ( hasattr(entity, "type_") and hasattr(entity, "mention_text") and entity.type_ and entity.mention_text ): extracted_entities.append( {"type": entity.type_, "mention_text": entity.mention_text} ) return json.dumps(extracted_entities, ensure_ascii=False, indent=2)