|
| 1 | +import os |
| 2 | +import base64 |
| 3 | +from typing import * |
| 4 | + |
| 5 | +from veryfi.client_base import Client |
| 6 | + |
| 7 | + |
| 8 | +class PDFSplit: |
| 9 | + def __init__(self, client: Client): |
| 10 | + self.client = client |
| 11 | + |
| 12 | + def get_pdf(self, **kwargs): |
| 13 | + """ |
| 14 | + Get a Submitted PDF endpoint allows you to retrieve a collection of previously processed. |
| 15 | + https://docs.veryfi.com/api/receipts-invoices/get-submitted-pdf/ |
| 16 | +
|
| 17 | + :param kwargs: Additional query parameters. |
| 18 | + :return: The processed Document response. |
| 19 | + """ |
| 20 | + endpoint_name = "/documents-set/" |
| 21 | + return self.client._request("GET", endpoint_name, {}, kwargs) |
| 22 | + |
| 23 | + def get_documents_from_pdf(self, document_id: int): |
| 24 | + """ |
| 25 | + Get Documents from PDF endpoint allows you to retrieve a collection of previously processed documents. |
| 26 | + https://docs.veryfi.com/api/receipts-invoices/get-documents-from-pdf/ |
| 27 | + :param document_id: ID of the document you'd like to retrieve |
| 28 | + :return: The processed Document response. |
| 29 | + """ |
| 30 | + endpoint_name = f"/documents-set/{document_id}" |
| 31 | + return self.client._request("GET", endpoint_name, {}) |
| 32 | + |
| 33 | + def split_and_process_pdf( |
| 34 | + self, |
| 35 | + file_path: str, |
| 36 | + categories: Optional[List] = None, |
| 37 | + **kwargs, |
| 38 | + ) -> Dict: |
| 39 | + """ |
| 40 | + Process a document and extract all the fields from it |
| 41 | + https://docs.veryfi.com/api/receipts-invoices/split-and-process-a-pdf/ |
| 42 | +
|
| 43 | + :param file_path: Path on disk to a file to submit for data extraction |
| 44 | + :param categories: List of categories Veryfi can use to categorize the document |
| 45 | + :param kwargs: Additional body parameters |
| 46 | + :return: Data extracted from the document |
| 47 | + """ |
| 48 | + endpoint_name = "/documents-set/" |
| 49 | + categories = categories or [] |
| 50 | + file_name = os.path.basename(file_path) |
| 51 | + with open(file_path, "rb") as image_file: |
| 52 | + base64_encoded_string = base64.b64encode(image_file.read()).decode("utf-8") |
| 53 | + request_arguments = { |
| 54 | + "file_name": file_name, |
| 55 | + "file_data": base64_encoded_string, |
| 56 | + "categories": categories, |
| 57 | + } |
| 58 | + request_arguments.update(kwargs) |
| 59 | + return self.client._request("POST", endpoint_name, request_arguments) |
| 60 | + |
| 61 | + def split_and_process_pdf_url( |
| 62 | + self, |
| 63 | + file_url: Optional[str] = None, |
| 64 | + categories: Optional[List[str]] = None, |
| 65 | + max_pages_to_process: Optional[int] = None, |
| 66 | + file_urls: Optional[List[str]] = None, |
| 67 | + **kwargs, |
| 68 | + ) -> Dict: |
| 69 | + """Process Document from url and extract all the fields from it. |
| 70 | + https://docs.veryfi.com/api/receipts-invoices/split-and-process-a-pdf/ |
| 71 | +
|
| 72 | + :param file_url: Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg". |
| 73 | + :param file_urls: Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"] |
| 74 | + :param categories: List of categories to use when categorizing the document |
| 75 | + :param max_pages_to_process: When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1. |
| 76 | + :param kwargs: Additional body parameters |
| 77 | + :return: Data extracted from the document. |
| 78 | + """ |
| 79 | + endpoint_name = "/documents-set/" |
| 80 | + categories = categories or [] |
| 81 | + request_arguments = { |
| 82 | + "categories": categories, |
| 83 | + "file_url": file_url, |
| 84 | + "file_urls": file_urls, |
| 85 | + "max_pages_to_process": max_pages_to_process, |
| 86 | + } |
| 87 | + request_arguments.update(kwargs) |
| 88 | + return self.client._request("POST", endpoint_name, request_arguments) |
0 commit comments