Skip to content

Commit da27f07

Browse files
Update DocumentSource to enforce parameter count and refine polygon handling; remove page-only format examples from samples and tests
1 parent a77e3e5 commit da27f07

6 files changed

Lines changed: 17 additions & 66 deletions

File tree

sdk/contentunderstanding/azure-ai-contentunderstanding/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "java",
44
"TagPrefix": "java/contentunderstanding/azure-ai-contentunderstanding",
5-
"Tag": "java/contentunderstanding/azure-ai-contentunderstanding_670ad2966f"
5+
"Tag": "java/contentunderstanding/azure-ai-contentunderstanding_940a862f7e"
66
}

sdk/contentunderstanding/azure-ai-contentunderstanding/src/main/java/com/azure/ai/contentunderstanding/models/DocumentSource.java

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,18 @@
1111
import java.util.Objects;
1212

1313
/**
14-
* Represents a parsed document grounding source in the format {@code D(page,x1,y1,...,xN,yN)} or {@code D(page)}.
14+
* Represents a parsed document grounding source in the format {@code D(page,x1,y1,x2,y2,x3,y3,x4,y4)}.
1515
*
16-
* <p>The page number is 1-based. The polygon defines a region with three or more points
17-
* in the document's coordinate space. When only a page number is provided (no coordinates),
18-
* {@link #getPolygon()} and {@link #getBoundingBox()} return {@code null}.</p>
16+
* <p>The page number is 1-based. The polygon is a quadrilateral defined by four points
17+
* with coordinates in the document's coordinate space.</p>
1918
*
2019
* @see ContentSource
2120
*/
2221
@Immutable
2322
public final class DocumentSource extends ContentSource {
2423
private static final ClientLogger LOGGER = new ClientLogger(DocumentSource.class);
2524
private static final String PREFIX = "D(";
25+
private static final int EXPECTED_PARAM_COUNT = 9;
2626

2727
private final int pageNumber;
2828
private final List<PointF> polygon;
@@ -36,6 +36,11 @@ private DocumentSource(String source) {
3636
}
3737
String inner = source.substring(PREFIX.length(), source.length() - 1);
3838
String[] parts = inner.split(",");
39+
if (parts.length != EXPECTED_PARAM_COUNT) {
40+
throw LOGGER
41+
.logExceptionAsError(new IllegalArgumentException("Document source expected " + EXPECTED_PARAM_COUNT
42+
+ " parameters (page + 8 coordinates), got " + parts.length + ": '" + source + "'."));
43+
}
3944
try {
4045
this.pageNumber = Integer.parseInt(parts[0].trim());
4146
} catch (NumberFormatException e) {
@@ -46,23 +51,10 @@ private DocumentSource(String source) {
4651
throw LOGGER.logExceptionAsError(
4752
new IllegalArgumentException("Page number must be >= 1, got " + this.pageNumber + "."));
4853
}
49-
if (parts.length == 1) {
50-
// Page-only: D(page)
51-
this.polygon = null;
52-
this.boundingBox = null;
53-
return;
54-
}
55-
int coordCount = parts.length - 1;
56-
if (coordCount < 6 || coordCount % 2 != 0) {
57-
throw LOGGER.logExceptionAsError(new IllegalArgumentException(
58-
"Document source expected page-only (1 param) or page + at least 3 coordinate pairs (7+ params), got "
59-
+ parts.length + ": '" + source + "'."));
60-
}
61-
int pointCount = coordCount / 2;
62-
List<PointF> points = new ArrayList<>(pointCount);
54+
List<PointF> points = new ArrayList<>(4);
6355
float minX = Float.MAX_VALUE, minY = Float.MAX_VALUE;
6456
float maxX = -Float.MAX_VALUE, maxY = -Float.MAX_VALUE;
65-
for (int i = 0; i < pointCount; i++) {
57+
for (int i = 0; i < 4; i++) {
6658
int xIndex = 1 + (i * 2);
6759
int yIndex = 2 + (i * 2);
6860
float x, y;
@@ -98,20 +90,19 @@ public int getPageNumber() {
9890
}
9991

10092
/**
101-
* Gets the polygon coordinates defining the region, or {@code null} when only a page number is available.
93+
* Gets the polygon coordinates as four points defining a quadrilateral region.
10294
*
103-
* @return An unmodifiable list of {@link PointF} values, or {@code null} for page-only sources.
95+
* @return An unmodifiable list of four {@link PointF} values.
10496
*/
10597
public List<PointF> getPolygon() {
10698
return polygon;
10799
}
108100

109101
/**
110-
* Gets the axis-aligned bounding rectangle computed from the polygon coordinates,
111-
* or {@code null} when only a page number is available.
102+
* Gets the axis-aligned bounding rectangle computed from the polygon coordinates.
112103
* Useful for drawing highlight rectangles over extracted fields.
113104
*
114-
* @return The bounding box, or {@code null} for page-only sources.
105+
* @return The bounding box.
115106
*/
116107
public RectangleF getBoundingBox() {
117108
return boundingBox;
@@ -120,7 +111,7 @@ public RectangleF getBoundingBox() {
120111
/**
121112
* Parses a single document source segment.
122113
*
123-
* @param source The source string in the format {@code D(page)} or {@code D(page,x1,y1,...,xN,yN)}.
114+
* @param source The source string in the format {@code D(page,x1,y1,...,x4,y4)}.
124115
* @return A new {@link DocumentSource}.
125116
* @throws NullPointerException if {@code source} is null.
126117
* @throws IllegalArgumentException if the source string is not in the expected format.

sdk/contentunderstanding/azure-ai-contentunderstanding/src/samples/java/com/azure/ai/contentunderstanding/samples/Sample_Advanced_ContentSource.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,6 @@ private static void contentSourceParseRoundTrip(DocumentContent documentContent)
163163
}
164164
// AudioVisualSource would be handled here once the service returns AV sources.
165165
}
166-
167-
// --- Page-only format: D(page) ---
168-
// Both parseAll() and parse() support the page-only format with no coordinates.
169-
List<DocumentSource> pageOnly = DocumentSource.parse("D(1)");
170-
DocumentSource pageOnlyDoc = pageOnly.get(0);
171-
System.out.println("Page-only: page=" + pageOnlyDoc.getPageNumber()
172-
+ ", polygon=" + pageOnlyDoc.getPolygon()
173-
+ ", boundingBox=" + pageOnlyDoc.getBoundingBox());
174166
}
175167
// END: com.azure.ai.contentunderstanding.advanced.contentsource.parse
176168

sdk/contentunderstanding/azure-ai-contentunderstanding/src/samples/java/com/azure/ai/contentunderstanding/samples/Sample_Advanced_ContentSourceAsync.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -170,14 +170,6 @@ private static void contentSourceParseRoundTrip(DocumentContent documentContent)
170170
}
171171
// AudioVisualSource would be handled here once the service returns AV sources.
172172
}
173-
174-
// --- Page-only format: D(page) ---
175-
// Both parseAll() and parse() support the page-only format with no coordinates.
176-
List<DocumentSource> pageOnly = DocumentSource.parse("D(1)");
177-
DocumentSource pageOnlyDoc = pageOnly.get(0);
178-
System.out.println("Page-only: page=" + pageOnlyDoc.getPageNumber()
179-
+ ", polygon=" + pageOnlyDoc.getPolygon()
180-
+ ", boundingBox=" + pageOnlyDoc.getBoundingBox());
181173
}
182174
// END: com.azure.ai.contentunderstanding.advanced.contentsource.parse.async
183175

sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample_Advanced_ContentSourceAsyncTest.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import static org.junit.jupiter.api.Assertions.assertFalse;
2121
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
2222
import static org.junit.jupiter.api.Assertions.assertNotNull;
23-
import static org.junit.jupiter.api.Assertions.assertNull;
2423
import static org.junit.jupiter.api.Assertions.assertTrue;
2524

2625
import java.util.Arrays;
@@ -173,16 +172,5 @@ public void testContentSourceParseRoundTripAsync() {
173172
System.out
174173
.println(" parseAll -> page " + ds.getPageNumber() + ", polygon points: " + ds.getPolygon().size());
175174
}
176-
177-
// --- Page-only format: D(page) via DocumentSource.parse() ---
178-
List<DocumentSource> pageOnly = DocumentSource.parse("D(1)");
179-
assertEquals(1, pageOnly.size(), "Page-only should parse to 1 source");
180-
DocumentSource pageOnlyDoc = pageOnly.get(0);
181-
assertEquals(1, pageOnlyDoc.getPageNumber(), "Page-only page number should be 1");
182-
assertNull(pageOnlyDoc.getPolygon(), "Page-only polygon should be null");
183-
assertNull(pageOnlyDoc.getBoundingBox(), "Page-only boundingBox should be null");
184-
assertEquals("D(1)", pageOnlyDoc.getRawValue(), "Page-only round-trip should match");
185-
System.out.println("Page-only: D(1) -> page=" + pageOnlyDoc.getPageNumber() + ", polygon="
186-
+ pageOnlyDoc.getPolygon() + ", boundingBox=" + pageOnlyDoc.getBoundingBox());
187175
}
188176
}

sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample_Advanced_ContentSourceTest.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import static org.junit.jupiter.api.Assertions.assertFalse;
2020
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
2121
import static org.junit.jupiter.api.Assertions.assertNotNull;
22-
import static org.junit.jupiter.api.Assertions.assertNull;
2322
import static org.junit.jupiter.api.Assertions.assertTrue;
2423

2524
import java.util.Arrays;
@@ -158,16 +157,5 @@ public void testContentSourceParseRoundTrip() {
158157
System.out
159158
.println(" parseAll -> page " + ds.getPageNumber() + ", polygon points: " + ds.getPolygon().size());
160159
}
161-
162-
// --- Page-only format: D(page) via DocumentSource.parse() ---
163-
List<DocumentSource> pageOnly = DocumentSource.parse("D(1)");
164-
assertEquals(1, pageOnly.size(), "Page-only should parse to 1 source");
165-
DocumentSource pageOnlyDoc = pageOnly.get(0);
166-
assertEquals(1, pageOnlyDoc.getPageNumber(), "Page-only page number should be 1");
167-
assertNull(pageOnlyDoc.getPolygon(), "Page-only polygon should be null");
168-
assertNull(pageOnlyDoc.getBoundingBox(), "Page-only boundingBox should be null");
169-
assertEquals("D(1)", pageOnlyDoc.getRawValue(), "Page-only round-trip should match");
170-
System.out.println("Page-only: D(1) -> page=" + pageOnlyDoc.getPageNumber() + ", polygon="
171-
+ pageOnlyDoc.getPolygon() + ", boundingBox=" + pageOnlyDoc.getBoundingBox());
172160
}
173161
}

0 commit comments

Comments
 (0)