Skip to content

Commit

Permalink
[documentintelligence] Code fixes (Azure#38920)
Browse files Browse the repository at this point in the history
* fix samples

* fix content type

* remove models patch

* fix content type logic + update tests

* update docs

* pylint

* support bufferedreader input

---------

Co-authored-by: catalinaperalta <[email protected]>
  • Loading branch information
catalinaperalta and cperaltah authored Dec 17, 2024
1 parent da4f140 commit 13e01ff
Show file tree
Hide file tree
Showing 15 changed files with 38 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

### Other Changes

- Changed the default service API version to `2024-11-30`.
- No need to pass `content-type` when analyze_request is a stream in `begin_analyze_document()` and `begin_classify_document()`.

## 1.0.0b4 (2024-09-05)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -951,7 +951,7 @@ client = DocumentIntelligenceAdministrationClient(endpoint=endpoint, credential=
# The `send_request` method can send custom HTTP requests that share the client's existing pipeline,
# Now let's use the `send_request` method to make a resource details fetching request.
# The URL of the request should be absolute, and append the API version used for the request.
request = HttpRequest(method="GET", url=f"{endpoint}/documentintelligence/info?api-version=2024-07-31-preview")
request = HttpRequest(method="GET", url=f"{endpoint}/documentintelligence/info?api-version=2024-11-30")
response = client.send_request(request)
response.raise_for_status()
response_body = response.json()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"AssetsRepo": "Azure/azure-sdk-assets",
"AssetsRepoPrefixPath": "python",
"TagPrefix": "python/documentintelligence/azure-ai-documentintelligence",
"Tag": "python/documentintelligence/azure-ai-documentintelligence_faf458f6e7"
"Tag": "python/documentintelligence/azure-ai-documentintelligence_bfcdb2d242"
}
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ def begin_analyze_document(
:param model_id: Unique document model name. Required.
:type model_id: str
:param body: Analyze request parameters. Default value is None.
:param body: Analyze request parameters. Required.
:type body: JSON
:keyword pages: 1-based page numbers to analyze. Ex. "1-3,5,7-9". Default value is None.
:paramtype pages: str
Expand Down Expand Up @@ -501,7 +501,7 @@ def begin_analyze_document(
:param model_id: Unique document model name. Required.
:type model_id: str
:param body: Analyze request parameters. Default value is None.
:param body: Analyze request parameters. Required.
:type body: IO[bytes]
:keyword pages: 1-based page numbers to analyze. Ex. "1-3,5,7-9". Default value is None.
:paramtype pages: str
Expand Down Expand Up @@ -553,7 +553,7 @@ def begin_analyze_document(
:param model_id: Unique document model name. Required.
:type model_id: str
:param body: Analyze request parameters. Is one of the following types:
AnalyzeDocumentRequest, JSON, IO[bytes] Default value is None.
AnalyzeDocumentRequest, JSON, IO[bytes] Required.
:type body: ~azure.ai.documentintelligence.models.AnalyzeDocumentRequest or JSON or
IO[bytes]
:keyword pages: 1-based page numbers to analyze. Ex. "1-3,5,7-9". Default value is None.
Expand Down Expand Up @@ -585,15 +585,13 @@ def begin_analyze_document(
_headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
_params = kwargs.pop("params", {}) or {}

content_type: Optional[str] = kwargs.pop(
"content_type", _headers.pop("content-type", "application/octet-stream")
)
content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("content-type", None))
cls: ClsType[_models.AnalyzeResult] = kwargs.pop("cls", None)
polling: Union[bool, PollingMethod] = kwargs.pop("polling", True)
lro_delay = kwargs.pop("polling_interval", self._config.polling_interval)
cont_token: Optional[str] = kwargs.pop("continuation_token", None)
if cont_token is None:
if isinstance(body, io.BytesIO):
if isinstance(body, (bytes, io.BytesIO, io.BufferedReader)):
content_type = "application/octet-stream"
raw_result = self._analyze_document_initial(
model_id=model_id,
Expand Down Expand Up @@ -683,10 +681,8 @@ def begin_classify_document(
:raises ~azure.core.exceptions.HttpResponseError:
"""
_headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
content_type: Optional[str] = kwargs.pop(
"content_type", _headers.pop("content-type", "application/octet-stream")
)
if isinstance(body, io.BytesIO):
content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("content-type", None))
if isinstance(body, (bytes, io.BytesIO, io.BufferedReader)):
content_type = "application/octet-stream"
return super().begin_classify_document( # type: ignore[arg-type, misc]
classifier_id=classifier_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class DocumentIntelligenceClient(DIClientGenerated):
:type credential: ~azure.core.credentials.AzureKeyCredential or
~azure.core.credentials.TokenCredential
:keyword api_version: The API version to use for this operation. Default value is
"2024-07-31-preview". Note that overriding this default value may result in unsupported
"2024-11-30". Note that overriding this default value may result in unsupported
behavior.
:paramtype api_version: str
:keyword int polling_interval: Default waiting time between two polls for LRO operations if no
Expand Down Expand Up @@ -58,7 +58,7 @@ class DocumentIntelligenceAdministrationClient(DIAClientGenerated):
:type credential: ~azure.core.credentials.AzureKeyCredential or
~azure.core.credentials.TokenCredential
:keyword api_version: The API version to use for this operation. Default value is
"2024-07-31-preview". Note that overriding this default value may result in unsupported
"2024-11-30". Note that overriding this default value may result in unsupported
behavior.
:paramtype api_version: str
:keyword int polling_interval: Default waiting time between two polls for LRO operations if no
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -566,15 +566,13 @@ async def begin_analyze_document( # type: ignore[override]
_headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
_params = kwargs.pop("params", {}) or {}

content_type: Optional[str] = kwargs.pop(
"content_type", _headers.pop("content-type", "application/octet-stream")
)
content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("content-type", None))
cls: ClsType[_models.AnalyzeResult] = kwargs.pop("cls", None)
polling: Union[bool, AsyncPollingMethod] = kwargs.pop("polling", True)
lro_delay = kwargs.pop("polling_interval", self._config.polling_interval)
cont_token: Optional[str] = kwargs.pop("continuation_token", None)
if cont_token is None:
if isinstance(body, io.BytesIO):
if isinstance(body, (bytes, io.BytesIO, io.BufferedReader)):
content_type = "application/octet-stream"
raw_result = await self._analyze_document_initial(
model_id=model_id,
Expand Down Expand Up @@ -665,10 +663,8 @@ async def begin_classify_document( # type: ignore[override]
:raises ~azure.core.exceptions.HttpResponseError:
"""
_headers = case_insensitive_dict(kwargs.pop("headers", {}) or {})
content_type: Optional[str] = kwargs.pop(
"content_type", _headers.pop("content-type", "application/octet-stream")
)
if isinstance(body, io.BytesIO):
content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("content-type", None))
if isinstance(body, (bytes, io.BytesIO, io.BufferedReader)):
content_type = "application/octet-stream"
return await super().begin_classify_document( # type: ignore[arg-type, misc]
classifier_id=classifier_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class DocumentIntelligenceClient(DIClientGenerated):
:type credential: ~azure.core.credentials.AzureKeyCredential or
~azure.core.credentials_async.AsyncTokenCredential
:keyword api_version: The API version to use for this operation. Default value is
"2024-07-31-preview". Note that overriding this default value may result in unsupported
"2024-11-30". Note that overriding this default value may result in unsupported
behavior.
:paramtype api_version: str
"""
Expand Down Expand Up @@ -59,7 +59,7 @@ class DocumentIntelligenceAdministrationClient(DIAClientGenerated):
:type credential: ~azure.core.credentials.AzureKeyCredential or
~azure.core.credentials_async.AsyncTokenCredential
:keyword api_version: The API version to use for this operation. Default value is
"2024-07-31-preview". Note that overriding this default value may result in unsupported
"2024-11-30". Note that overriding this default value may result in unsupported
behavior.
:paramtype api_version: str
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,9 @@
Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize
"""
from typing import List, Optional
from ._models import (
AnalyzeDocumentRequest as GeneratedAnalyzeDocumentRequest,
ClassifyDocumentRequest as GeneratedClassifyDocumentRequest,
)
from .._model_base import rest_field
from typing import List


class AnalyzeDocumentRequest(GeneratedAnalyzeDocumentRequest):
"""Document analysis parameters.
:ivar url_source: Document URL to analyze. Either url_source or bytes_source must be specified.
:vartype url_source: str
:ivar bytes_source: Document bytes to analyze. Either url_source or bytes_source must be specified.
:vartype bytes_source: bytes
"""

bytes_source: Optional[bytes] = rest_field(name="base64Source", format="base64")
"""Document bytes to analyze. Either url_source or bytes_source must be specified."""


class ClassifyDocumentRequest(GeneratedClassifyDocumentRequest):
"""Document classification parameters.
:ivar url_source: Document URL to classify. Either url_source or bytes_source must be
specified.
:vartype url_source: str
:ivar bytes_source: Document bytes to classify. Either url_source or bytes_source must be specified.
:vartype bytes_source: bytes
"""

bytes_source: Optional[bytes] = rest_field(name="base64Source", format="base64")
"""Document bytes to classify. Either url_source or bytes_source must be specified."""


__all__: List[str] = [
"AnalyzeDocumentRequest",
"ClassifyDocumentRequest",
] # Add all objects you want publicly available to users at this package level
__all__: List[str] = [] # Add all objects you want publicly available to users at this package level


def patch_sdk():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ async def analyze_batch_docs():

endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]
result_container_sas_url = os.environ["RESULT_SAS_URL"]
batch_training_data_container_sas_url = os.environ["TRAINING_DATA_SAS_URL"]
result_container_sas_url = os.environ["RESULT_CONTAINER_SAS_URL"]
batch_training_data_container_sas_url = os.environ["TRAINING_DATA_CONTAINER_SAS_URL"]

document_intelligence_client = DocumentIntelligenceClient(endpoint=endpoint, credential=AzureKeyCredential(key))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@


def format_price(price_dict):
if price_dict is None:
return "N/A"
return "".join([f"{p}" for p in price_dict.values()])


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ async def sample_send_request():
# The `send_request` method can send custom HTTP requests that share the client's existing pipeline,
# Now let's use the `send_request` method to make a resource details fetching request.
# The URL of the request should be absolute, and append the API version used for the request.
request = HttpRequest(method="GET", url=f"{endpoint}/documentintelligence/info?api-version=2024-07-31-preview")
request = HttpRequest(method="GET", url=f"{endpoint}/documentintelligence/info?api-version=2024-11-30")
response = await client.send_request(request)
response.raise_for_status()
response_body = response.json()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def analyze_batch_docs():

endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]
result_container_sas_url = os.environ["RESULT_SAS_URL"]
batch_training_data_container_sas_url = os.environ["TRAINING_DATA_SAS_URL"]
result_container_sas_url = os.environ["RESULT_CONTAINER_SAS_URL"]
batch_training_data_container_sas_url = os.environ["TRAINING_DATA_CONTAINER_SAS_URL"]

document_intelligence_client = DocumentIntelligenceClient(endpoint=endpoint, credential=AzureKeyCredential(key))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def analyze_receipts():
from azure.ai.documentintelligence.models import AnalyzeResult

def _format_price(price_dict):
if price_dict is None:
return "N/A"
return "".join([f"{p}" for p in price_dict.values()])

endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def sample_send_request():
# The `send_request` method can send custom HTTP requests that share the client's existing pipeline,
# Now let's use the `send_request` method to make a resource details fetching request.
# The URL of the request should be absolute, and append the API version used for the request.
request = HttpRequest(method="GET", url=f"{endpoint}/documentintelligence/info?api-version=2024-07-31-preview")
request = HttpRequest(method="GET", url=f"{endpoint}/documentintelligence/info?api-version=2024-11-30")
response = client.send_request(request)
response.raise_for_status()
response_body = response.json()
Expand Down
8 changes: 8 additions & 0 deletions sdk/documentintelligence/test-resources.json
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,14 @@
"type": "string",
"value": "[concat(reference(parameters('blobResourceId'), '2019-06-01').primaryEndpoints.blob, parameters('batchTrainingDataContainer'), '?', listServiceSas(parameters('blobResourceId'), '2019-06-01', parameters('batchTrainingResultSasProperties')).serviceSasToken)]"
},
"TRAINING_DATA_CONTAINER_SAS_URL": {
"type": "string",
"value": "[concat(reference(parameters('blobResourceId'), '2019-06-01').primaryEndpoints.blob, parameters('batchTrainingDataContainer'), '?', listServiceSas(parameters('blobResourceId'), '2019-06-01', parameters('batchTrainingSasProperties')).serviceSasToken)]"
},
"RESULT_CONTAINER_SAS_URL": {
"type": "string",
"value": "[concat(reference(parameters('blobResourceId'), '2019-06-01').primaryEndpoints.blob, parameters('batchTrainingDataContainer'), '?', listServiceSas(parameters('blobResourceId'), '2019-06-01', parameters('batchTrainingResultSasProperties')).serviceSasToken)]"
},
"DOCUMENTINTELLIGENCE_BATCH_TRAINING_ASYNC_RESULT_DATA_CONTAINER_SAS_URL": {
"type": "string",
"value": "[concat(reference(parameters('blobResourceId'), '2019-06-01').primaryEndpoints.blob, parameters('batchTrainingDataContainer'), '?', listServiceSas(parameters('blobResourceId'), '2019-06-01', parameters('batchTrainingAsyncResultSasProperties')).serviceSasToken)]"
Expand Down

0 comments on commit 13e01ff

Please sign in to comment.