Skip to content

Commit 1fef6f4

Browse files
author
Changjian Wang
committed
Refactor Sample_Advanced_ContentSource and Sample_Advanced_ContentSourceAsync to enhance multi-segment parsing examples and update documentation comments
1 parent 370ec1a commit 1fef6f4

4 files changed

Lines changed: 84 additions & 84 deletions

File tree

sdk/contentunderstanding/azure-ai-contentunderstanding/src/samples/java/com/azure/ai/contentunderstanding/samples/Sample_Advanced_ContentSource.java

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ public static void main(String[] args) {
7171
documentContentSourceFromAnalysis(documentContent);
7272

7373
// =====================================================================
74-
// Part 2: ContentSource.parseAll() round-trip and multi-segment parsing
74+
// Part 2: DocumentSource.parse() and ContentSource.parseAll() round-trip
7575
// =====================================================================
7676
contentSourceParseRoundTrip(documentContent);
7777
}
@@ -120,12 +120,29 @@ private static void documentContentSourceFromAnalysis(DocumentContent documentCo
120120
/**
121121
* Demonstrates the two public parse methods and {@link ContentSource#toRawString(List)}:
122122
* <ul>
123-
* <li>{@link ContentSource#parseAll(String)} — base-class method, returns {@code List<ContentSource>}</li>
124123
* <li>{@link DocumentSource#parse(String)} — typed method, returns {@code List<DocumentSource>}</li>
124+
* <li>{@link ContentSource#parseAll(String)} — base-class method, returns {@code List<ContentSource>}</li>
125125
* </ul>
126126
*/
127127
// BEGIN: com.azure.ai.contentunderstanding.advanced.contentsource.parse
128128
private static void contentSourceParseRoundTrip(DocumentContent documentContent) {
129+
// --- DocumentSource.parse() — typed method ---
130+
// DocumentSource.parse() is the typed convenience method. It returns List<DocumentSource>
131+
// directly — no casting needed. Use this when you know the source string contains only D() segments.
132+
ContentField multiSourceField = documentContent.getFields().values().stream()
133+
.filter(f -> f.getSources() != null && f.getSources().size() > 1)
134+
.findFirst()
135+
.orElseThrow(() -> new IllegalStateException("No field with multiple sources found"));
136+
String multiWireFormat = ContentSource.toRawString(multiSourceField.getSources());
137+
System.out.println("Multi-segment wire format: " + multiWireFormat);
138+
139+
List<DocumentSource> docSources = DocumentSource.parse(multiWireFormat);
140+
for (DocumentSource ds : docSources) {
141+
RectangleF bbox = ds.getBoundingBox();
142+
System.out.printf(" parse -> page %d, bbox: x=%.4f, y=%.4f, w=%.4f, h=%.4f%n",
143+
ds.getPageNumber(), bbox.getX(), bbox.getY(), bbox.getWidth(), bbox.getHeight());
144+
}
145+
129146
// --- toRawString + ContentSource.parseAll() round-trip ---
130147
// ContentSource.parseAll() is the base-class method that handles both D() and AV() formats.
131148
// It returns List<ContentSource>, so you cast each element to the appropriate subclass.
@@ -147,23 +164,6 @@ private static void contentSourceParseRoundTrip(DocumentContent documentContent)
147164
// AudioVisualSource would be handled here once the service returns AV sources.
148165
}
149166

150-
// --- DocumentSource.parse() — typed method ---
151-
// DocumentSource.parse() is the typed convenience method. It returns List<DocumentSource>
152-
// directly — no casting needed. Use this when you know the source string contains only D() segments.
153-
ContentField multiSourceField = documentContent.getFields().values().stream()
154-
.filter(f -> f.getSources() != null && f.getSources().size() > 1)
155-
.findFirst()
156-
.orElseThrow(() -> new IllegalStateException("No field with multiple sources found"));
157-
String multiWireFormat = ContentSource.toRawString(multiSourceField.getSources());
158-
System.out.println("Multi-segment wire format: " + multiWireFormat);
159-
160-
List<DocumentSource> docSources = DocumentSource.parse(multiWireFormat);
161-
for (DocumentSource ds : docSources) {
162-
RectangleF bbox = ds.getBoundingBox();
163-
System.out.printf(" parse -> page %d, bbox: x=%.4f, y=%.4f, w=%.4f, h=%.4f%n",
164-
ds.getPageNumber(), bbox.getX(), bbox.getY(), bbox.getWidth(), bbox.getHeight());
165-
}
166-
167167
// --- Page-only format: D(page) ---
168168
// Both parseAll() and parse() support the page-only format with no coordinates.
169169
List<DocumentSource> pageOnly = DocumentSource.parse("D(1)");

sdk/contentunderstanding/azure-ai-contentunderstanding/src/samples/java/com/azure/ai/contentunderstanding/samples/Sample_Advanced_ContentSourceAsync.java

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ public static void main(String[] args) throws InterruptedException {
7979
// Part 1: Document ContentSource from analysis
8080
documentContentSourceFromAnalysis(documentContent);
8181

82-
// Part 2: ContentSource.parseAll() round-trip and multi-segment parsing
82+
// Part 2: DocumentSource.parse() and ContentSource.parseAll() round-trip
8383
contentSourceParseRoundTrip(documentContent);
8484
})
8585
.doFinally(signal -> latch.countDown())
@@ -127,12 +127,29 @@ private static void documentContentSourceFromAnalysis(DocumentContent documentCo
127127
/**
128128
* Demonstrates the two public parse methods and {@link ContentSource#toRawString(List)}:
129129
* <ul>
130-
* <li>{@link ContentSource#parseAll(String)} — base-class method, returns {@code List<ContentSource>}</li>
131130
* <li>{@link DocumentSource#parse(String)} — typed method, returns {@code List<DocumentSource>}</li>
131+
* <li>{@link ContentSource#parseAll(String)} — base-class method, returns {@code List<ContentSource>}</li>
132132
* </ul>
133133
*/
134134
// BEGIN: com.azure.ai.contentunderstanding.advanced.contentsource.parse.async
135135
private static void contentSourceParseRoundTrip(DocumentContent documentContent) {
136+
// --- DocumentSource.parse() — typed method ---
137+
// DocumentSource.parse() is the typed convenience method. It returns List<DocumentSource>
138+
// directly — no casting needed. Use this when you know the source string contains only D() segments.
139+
ContentField multiSourceField = documentContent.getFields().values().stream()
140+
.filter(f -> f.getSources() != null && f.getSources().size() > 1)
141+
.findFirst()
142+
.orElseThrow(() -> new IllegalStateException("No field with multiple sources found"));
143+
String multiWireFormat = ContentSource.toRawString(multiSourceField.getSources());
144+
System.out.println("Multi-segment wire format: " + multiWireFormat);
145+
146+
List<DocumentSource> docSources = DocumentSource.parse(multiWireFormat);
147+
for (DocumentSource ds : docSources) {
148+
RectangleF bbox = ds.getBoundingBox();
149+
System.out.printf(" parse -> page %d, bbox: x=%.4f, y=%.4f, w=%.4f, h=%.4f%n",
150+
ds.getPageNumber(), bbox.getX(), bbox.getY(), bbox.getWidth(), bbox.getHeight());
151+
}
152+
136153
// --- toRawString + ContentSource.parseAll() round-trip ---
137154
// ContentSource.parseAll() is the base-class method that handles both D() and AV() formats.
138155
// It returns List<ContentSource>, so you cast each element to the appropriate subclass.
@@ -154,23 +171,6 @@ private static void contentSourceParseRoundTrip(DocumentContent documentContent)
154171
// AudioVisualSource would be handled here once the service returns AV sources.
155172
}
156173

157-
// --- DocumentSource.parse() — typed method ---
158-
// DocumentSource.parse() is the typed convenience method. It returns List<DocumentSource>
159-
// directly — no casting needed. Use this when you know the source string contains only D() segments.
160-
ContentField multiSourceField = documentContent.getFields().values().stream()
161-
.filter(f -> f.getSources() != null && f.getSources().size() > 1)
162-
.findFirst()
163-
.orElseThrow(() -> new IllegalStateException("No field with multiple sources found"));
164-
String multiWireFormat = ContentSource.toRawString(multiSourceField.getSources());
165-
System.out.println("Multi-segment wire format: " + multiWireFormat);
166-
167-
List<DocumentSource> docSources = DocumentSource.parse(multiWireFormat);
168-
for (DocumentSource ds : docSources) {
169-
RectangleF bbox = ds.getBoundingBox();
170-
System.out.printf(" parse -> page %d, bbox: x=%.4f, y=%.4f, w=%.4f, h=%.4f%n",
171-
ds.getPageNumber(), bbox.getX(), bbox.getY(), bbox.getWidth(), bbox.getHeight());
172-
}
173-
174174
// --- Page-only format: D(page) ---
175175
// Both parseAll() and parse() support the page-only format with no coordinates.
176176
List<DocumentSource> pageOnly = DocumentSource.parse("D(1)");

sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample_Advanced_ContentSourceAsyncTest.java

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,29 @@ public void testContentSourceParseRoundTripAsync() {
123123

124124
DocumentContent documentContent = (DocumentContent) result.getContents().get(0);
125125

126+
// --- DocumentSource.parse() — typed method for multi-segment ---
127+
ContentField multiSourceField = documentContent.getFields()
128+
.values()
129+
.stream()
130+
.filter(f -> f.getSources() != null && f.getSources().size() > 1)
131+
.findFirst()
132+
.orElseThrow(() -> new AssertionError("No field with multiple sources found"));
133+
String multiWireFormat = ContentSource.toRawString(multiSourceField.getSources());
134+
System.out.println("Multi-segment wire format: " + multiWireFormat);
135+
136+
List<DocumentSource> docSources = DocumentSource.parse(multiWireFormat);
137+
assertEquals(multiSourceField.getSources().size(), docSources.size(),
138+
"DocumentSource.parse() count should match original source count");
139+
for (DocumentSource ds : docSources) {
140+
assertTrue(ds.getPageNumber() >= 1, "Page number should be >= 1");
141+
RectangleF bbox = ds.getBoundingBox();
142+
assertNotNull(bbox, "BoundingBox should not be null");
143+
assertTrue(bbox.getWidth() > 0, "BoundingBox width should be > 0");
144+
assertTrue(bbox.getHeight() > 0, "BoundingBox height should be > 0");
145+
System.out.printf(" parse -> page %d, bbox: x=%.4f, y=%.4f, w=%.4f, h=%.4f%n", ds.getPageNumber(),
146+
bbox.getX(), bbox.getY(), bbox.getWidth(), bbox.getHeight());
147+
}
148+
126149
// --- ContentSource.parseAll() round-trip ---
127150
ContentField fieldWithSource = documentContent.getFields()
128151
.values()
@@ -151,29 +174,6 @@ public void testContentSourceParseRoundTripAsync() {
151174
.println(" parseAll -> page " + ds.getPageNumber() + ", polygon points: " + ds.getPolygon().size());
152175
}
153176

154-
// --- DocumentSource.parse() — typed method for multi-segment ---
155-
ContentField multiSourceField = documentContent.getFields()
156-
.values()
157-
.stream()
158-
.filter(f -> f.getSources() != null && f.getSources().size() > 1)
159-
.findFirst()
160-
.orElseThrow(() -> new AssertionError("No field with multiple sources found"));
161-
String multiWireFormat = ContentSource.toRawString(multiSourceField.getSources());
162-
System.out.println("Multi-segment wire format: " + multiWireFormat);
163-
164-
List<DocumentSource> docSources = DocumentSource.parse(multiWireFormat);
165-
assertEquals(multiSourceField.getSources().size(), docSources.size(),
166-
"DocumentSource.parse() count should match original source count");
167-
for (DocumentSource ds : docSources) {
168-
assertTrue(ds.getPageNumber() >= 1, "Page number should be >= 1");
169-
RectangleF bbox = ds.getBoundingBox();
170-
assertNotNull(bbox, "BoundingBox should not be null");
171-
assertTrue(bbox.getWidth() > 0, "BoundingBox width should be > 0");
172-
assertTrue(bbox.getHeight() > 0, "BoundingBox height should be > 0");
173-
System.out.printf(" parse -> page %d, bbox: x=%.4f, y=%.4f, w=%.4f, h=%.4f%n", ds.getPageNumber(),
174-
bbox.getX(), bbox.getY(), bbox.getWidth(), bbox.getHeight());
175-
}
176-
177177
// --- Page-only format: D(page) via DocumentSource.parse() ---
178178
List<DocumentSource> pageOnly = DocumentSource.parse("D(1)");
179179
assertEquals(1, pageOnly.size(), "Page-only should parse to 1 source");

sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample_Advanced_ContentSourceTest.java

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,29 @@ public void testContentSourceParseRoundTrip() {
108108
AnalysisResult result = operation.getFinalResult();
109109
DocumentContent documentContent = (DocumentContent) result.getContents().get(0);
110110

111+
// --- DocumentSource.parse() — typed method for multi-segment ---
112+
ContentField multiSourceField = documentContent.getFields()
113+
.values()
114+
.stream()
115+
.filter(f -> f.getSources() != null && f.getSources().size() > 1)
116+
.findFirst()
117+
.orElseThrow(() -> new AssertionError("No field with multiple sources found"));
118+
String multiWireFormat = ContentSource.toRawString(multiSourceField.getSources());
119+
System.out.println("Multi-segment wire format: " + multiWireFormat);
120+
121+
List<DocumentSource> docSources = DocumentSource.parse(multiWireFormat);
122+
assertEquals(multiSourceField.getSources().size(), docSources.size(),
123+
"DocumentSource.parse() count should match original source count");
124+
for (DocumentSource ds : docSources) {
125+
assertTrue(ds.getPageNumber() >= 1, "Page number should be >= 1");
126+
RectangleF bbox = ds.getBoundingBox();
127+
assertNotNull(bbox, "BoundingBox should not be null");
128+
assertTrue(bbox.getWidth() > 0, "BoundingBox width should be > 0");
129+
assertTrue(bbox.getHeight() > 0, "BoundingBox height should be > 0");
130+
System.out.printf(" parse -> page %d, bbox: x=%.4f, y=%.4f, w=%.4f, h=%.4f%n", ds.getPageNumber(),
131+
bbox.getX(), bbox.getY(), bbox.getWidth(), bbox.getHeight());
132+
}
133+
111134
// --- ContentSource.parseAll() round-trip ---
112135
ContentField fieldWithSource = documentContent.getFields()
113136
.values()
@@ -136,29 +159,6 @@ public void testContentSourceParseRoundTrip() {
136159
.println(" parseAll -> page " + ds.getPageNumber() + ", polygon points: " + ds.getPolygon().size());
137160
}
138161

139-
// --- DocumentSource.parse() — typed method for multi-segment ---
140-
ContentField multiSourceField = documentContent.getFields()
141-
.values()
142-
.stream()
143-
.filter(f -> f.getSources() != null && f.getSources().size() > 1)
144-
.findFirst()
145-
.orElseThrow(() -> new AssertionError("No field with multiple sources found"));
146-
String multiWireFormat = ContentSource.toRawString(multiSourceField.getSources());
147-
System.out.println("Multi-segment wire format: " + multiWireFormat);
148-
149-
List<DocumentSource> docSources = DocumentSource.parse(multiWireFormat);
150-
assertEquals(multiSourceField.getSources().size(), docSources.size(),
151-
"DocumentSource.parse() count should match original source count");
152-
for (DocumentSource ds : docSources) {
153-
assertTrue(ds.getPageNumber() >= 1, "Page number should be >= 1");
154-
RectangleF bbox = ds.getBoundingBox();
155-
assertNotNull(bbox, "BoundingBox should not be null");
156-
assertTrue(bbox.getWidth() > 0, "BoundingBox width should be > 0");
157-
assertTrue(bbox.getHeight() > 0, "BoundingBox height should be > 0");
158-
System.out.printf(" parse -> page %d, bbox: x=%.4f, y=%.4f, w=%.4f, h=%.4f%n", ds.getPageNumber(),
159-
bbox.getX(), bbox.getY(), bbox.getWidth(), bbox.getHeight());
160-
}
161-
162162
// --- Page-only format: D(page) via DocumentSource.parse() ---
163163
List<DocumentSource> pageOnly = DocumentSource.parse("D(1)");
164164
assertEquals(1, pageOnly.size(), "Page-only should parse to 1 source");

0 commit comments

Comments
 (0)