Skip to content

Commit 5b9d2bf

Browse files
authored
[GH-2652] Add RS_AsCOG SQL function for Cloud Optimized GeoTiff output (#2669)
1 parent a648f0c commit 5b9d2bf

8 files changed

Lines changed: 360 additions & 16 deletions

File tree

common/src/main/java/org/apache/sedona/common/raster/RasterOutputs.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,62 @@ public static byte[] asCloudOptimizedGeoTiff(GridCoverage2D raster, CogOptions o
108108
}
109109
}
110110

111+
// ---------- asCOG overloads (used by RS_AsCOG via InferredExpression) ----------
112+
113+
public static byte[] asCOG(GridCoverage2D raster) {
114+
return asCloudOptimizedGeoTiff(raster, CogOptions.defaults());
115+
}
116+
117+
public static byte[] asCOG(GridCoverage2D raster, String compression) {
118+
return asCloudOptimizedGeoTiff(raster, CogOptions.builder().compression(compression).build());
119+
}
120+
121+
public static byte[] asCOG(GridCoverage2D raster, String compression, int tileSize) {
122+
return asCloudOptimizedGeoTiff(
123+
raster, CogOptions.builder().compression(compression).tileSize(tileSize).build());
124+
}
125+
126+
public static byte[] asCOG(
127+
GridCoverage2D raster, String compression, int tileSize, double quality) {
128+
return asCloudOptimizedGeoTiff(
129+
raster,
130+
CogOptions.builder()
131+
.compression(compression)
132+
.tileSize(tileSize)
133+
.compressionQuality(quality)
134+
.build());
135+
}
136+
137+
public static byte[] asCOG(
138+
GridCoverage2D raster, String compression, int tileSize, double quality, String resampling) {
139+
return asCloudOptimizedGeoTiff(
140+
raster,
141+
CogOptions.builder()
142+
.compression(compression)
143+
.tileSize(tileSize)
144+
.compressionQuality(quality)
145+
.resampling(resampling)
146+
.build());
147+
}
148+
149+
public static byte[] asCOG(
150+
GridCoverage2D raster,
151+
String compression,
152+
int tileSize,
153+
double quality,
154+
String resampling,
155+
int overviewCount) {
156+
return asCloudOptimizedGeoTiff(
157+
raster,
158+
CogOptions.builder()
159+
.compression(compression)
160+
.tileSize(tileSize)
161+
.compressionQuality(quality)
162+
.resampling(resampling)
163+
.overviewCount(overviewCount)
164+
.build());
165+
}
166+
111167
/**
112168
* Creates a GeoTiff file with the provided raster. Primarily used for testing.
113169
*

common/src/main/java/org/apache/sedona/common/raster/cog/CogOptions.java

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
import java.util.Arrays;
2222
import java.util.List;
23-
import java.util.Locale;
2423

2524
/**
2625
* Options for Cloud Optimized GeoTIFF (COG) generation.
@@ -206,10 +205,19 @@ public CogOptions build() {
206205
if (compression == null || compression.isEmpty()) {
207206
throw new IllegalArgumentException("compression must not be null or empty");
208207
}
209-
if (!VALID_COMPRESSION.contains(compression)) {
208+
// Preserve the original input for error reporting
209+
String originalCompression = compression;
210+
// Case-insensitive matching: find the canonical value from the valid list
211+
String normalizedCompression = matchIgnoreCase(VALID_COMPRESSION, originalCompression);
212+
if (normalizedCompression == null) {
210213
throw new IllegalArgumentException(
211-
"compression must be one of " + VALID_COMPRESSION + ", got: '" + compression + "'");
214+
"compression must be one of "
215+
+ VALID_COMPRESSION
216+
+ ", got: '"
217+
+ originalCompression
218+
+ "'");
212219
}
220+
this.compression = normalizedCompression;
213221
if (compressionQuality < 0 || compressionQuality > 1.0) {
214222
throw new IllegalArgumentException(
215223
"compressionQuality must be between 0.0 and 1.0, got: " + compressionQuality);
@@ -225,27 +233,40 @@ public CogOptions build() {
225233
"overviewCount must be -1 (auto), 0 (none), or positive, got: " + overviewCount);
226234
}
227235

228-
// Normalize resampling to title-case for matching
229-
String normalized = normalizeResampling(resampling);
230-
if (!VALID_RESAMPLING.contains(normalized)) {
231-
throw new IllegalArgumentException(
232-
"resampling must be one of " + VALID_RESAMPLING + ", got: '" + resampling + "'");
236+
// Case-insensitive matching for resampling; treat null/blank as default (Nearest)
237+
if (resampling == null || resampling.isEmpty()) {
238+
this.resampling = "Nearest";
239+
} else {
240+
String originalResampling = resampling;
241+
String normalizedResampling = matchIgnoreCase(VALID_RESAMPLING, originalResampling);
242+
if (normalizedResampling == null) {
243+
throw new IllegalArgumentException(
244+
"resampling must be one of "
245+
+ VALID_RESAMPLING
246+
+ ", got: '"
247+
+ originalResampling
248+
+ "'");
249+
}
250+
this.resampling = normalizedResampling;
233251
}
234-
this.resampling = normalized;
235252

236253
return new CogOptions(this);
237254
}
238255

239256
/**
240-
* Normalize the resampling string to title-case (first letter uppercase, rest lowercase) so
241-
* callers can pass "nearest", "BILINEAR", etc.
257+
* Find the canonical value from a list that matches the input case-insensitively. Returns null
258+
* if no match found. This allows callers to pass "lzw", "PACKBITS", "bilinear", etc.
242259
*/
243-
private static String normalizeResampling(String value) {
244-
if (value == null || value.isEmpty()) {
245-
return "Nearest";
260+
private static String matchIgnoreCase(List<String> validValues, String input) {
261+
if (input == null) {
262+
return null;
263+
}
264+
for (String valid : validValues) {
265+
if (valid.equalsIgnoreCase(input)) {
266+
return valid;
267+
}
246268
}
247-
String lower = value.toLowerCase(Locale.ROOT);
248-
return Character.toUpperCase(lower.charAt(0)) + lower.substring(1);
269+
return null;
249270
}
250271
}
251272
}

common/src/test/java/org/apache/sedona/common/raster/RasterOutputTest.java

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,4 +254,103 @@ public void testAsImageVariousBandDataType() throws IOException, FactoryExceptio
254254
}
255255
}
256256
}
257+
258+
// ---- RS_AsCOG / asCOG tests ----
259+
260+
@Test
261+
public void testAsCOGDefaults() throws IOException {
262+
GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster/test1.tiff");
263+
byte[] cogBytes = RasterOutputs.asCOG(raster);
264+
assertNotNull(cogBytes);
265+
assertTrue(cogBytes.length >= 2);
266+
// Verify it is a valid TIFF (starts with II or MM)
267+
assertTrue(
268+
(cogBytes[0] == 'I' && cogBytes[1] == 'I') || (cogBytes[0] == 'M' && cogBytes[1] == 'M'));
269+
}
270+
271+
@Test
272+
public void testAsCOGRoundTrip() throws IOException {
273+
GridCoverage2D original = rasterFromGeoTiff(resourceFolder + "raster/test1.tiff");
274+
byte[] cogBytes = RasterOutputs.asCOG(original, "LZW", 256);
275+
// Read COG bytes back as a raster via GeoTiff reader
276+
GridCoverage2D roundTripped = RasterConstructors.fromGeoTiff(cogBytes);
277+
assertNotNull(roundTripped);
278+
// Verify envelope is preserved
279+
assertEquals(original.getEnvelope2D().toString(), roundTripped.getEnvelope2D().toString());
280+
// Verify dimensions are preserved
281+
assertEquals(
282+
original.getRenderedImage().getWidth(), roundTripped.getRenderedImage().getWidth());
283+
assertEquals(
284+
original.getRenderedImage().getHeight(), roundTripped.getRenderedImage().getHeight());
285+
// Verify number of bands is preserved
286+
assertEquals(original.getNumSampleDimensions(), roundTripped.getNumSampleDimensions());
287+
}
288+
289+
@Test
290+
public void testAsCOGWithCompression() throws IOException {
291+
GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster/test1.tiff");
292+
byte[] cogLZW = RasterOutputs.asCOG(raster, "LZW");
293+
byte[] cogDeflate = RasterOutputs.asCOG(raster, "Deflate");
294+
assertNotNull(cogLZW);
295+
assertNotNull(cogDeflate);
296+
assertTrue(cogLZW.length > 0);
297+
assertTrue(cogDeflate.length > 0);
298+
// Different compressions should produce different sizes
299+
assertNotEquals(cogLZW.length, cogDeflate.length);
300+
}
301+
302+
@Test
303+
public void testAsCOGWithCompressionAndTileSize() throws IOException {
304+
GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster/test1.tiff");
305+
byte[] cog256 = RasterOutputs.asCOG(raster, "Deflate", 256);
306+
byte[] cog512 = RasterOutputs.asCOG(raster, "Deflate", 512);
307+
assertNotNull(cog256);
308+
assertNotNull(cog512);
309+
assertTrue(cog256.length > 0);
310+
assertTrue(cog512.length > 0);
311+
}
312+
313+
@Test
314+
public void testAsCOGWithCompressionTileSizeAndQuality() throws IOException {
315+
GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster/test1.tiff");
316+
byte[] cogHighQ = RasterOutputs.asCOG(raster, "Deflate", 256, 1.0);
317+
byte[] cogLowQ = RasterOutputs.asCOG(raster, "Deflate", 256, 0.1);
318+
assertNotNull(cogHighQ);
319+
assertNotNull(cogLowQ);
320+
assertTrue(cogHighQ.length > 0);
321+
assertTrue(cogLowQ.length > 0);
322+
}
323+
324+
@Test
325+
public void testAsCOGWithResampling() throws IOException {
326+
GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster/test1.tiff");
327+
byte[] cog = RasterOutputs.asCOG(raster, "Deflate", 256, 0.2, "Bilinear");
328+
assertNotNull(cog);
329+
assertTrue(cog.length > 0);
330+
}
331+
332+
@Test
333+
public void testAsCOGAllArgs() throws IOException {
334+
GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster/test1.tiff");
335+
byte[] cog = RasterOutputs.asCOG(raster, "LZW", 256, 0.5, "Nearest", 2);
336+
assertNotNull(cog);
337+
assertTrue(cog.length > 0);
338+
}
339+
340+
@Test
341+
public void testAsCOGCaseInsensitive() throws IOException {
342+
GridCoverage2D raster = rasterFromGeoTiff(resourceFolder + "raster/test1.tiff");
343+
// compression and resampling should be case-insensitive
344+
byte[] cog = RasterOutputs.asCOG(raster, "lzw", 256, 0.5, "bilinear", 2);
345+
assertNotNull(cog);
346+
assertTrue(cog.length > 0);
347+
// uppercase
348+
byte[] cog2 = RasterOutputs.asCOG(raster, "DEFLATE", 256, 0.5, "NEAREST", 2);
349+
assertNotNull(cog2);
350+
assertTrue(cog2.length > 0);
351+
// mixed case: packbits
352+
byte[] cog3 = RasterOutputs.asCOG(raster, "packbits");
353+
assertNotNull(cog3);
354+
assertTrue(cog3.length > 0);
355+
}
257356
}

docs/api/sql/Raster-writer.md

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,71 @@ root
116116
|-- geotiff: binary (nullable = true)
117117
```
118118

119+
#### RS_AsCOG
120+
121+
Introduction: Returns a binary DataFrame from a Raster DataFrame. Each raster object in the resulting DataFrame is a [Cloud Optimized GeoTIFF](https://www.cogeo.org/) (COG) image in binary format. COG is a GeoTIFF that is internally organized to enable efficient range-read access over HTTP, making it ideal for cloud-hosted raster data.
122+
123+
Possible values for `compression`: `Deflate` (default), `LZW`, `JPEG`, `PackBits`. Case-insensitive.
124+
125+
`tileSize` must be a power of 2 (e.g., 128, 256, 512). Default value: `256`
126+
127+
Possible values for `quality`: any decimal number between 0 and 1. 0 means maximum compression and 1 means minimum compression. Default value: `0.2`
128+
129+
Possible values for `resampling`: `Nearest` (default), `Bilinear`, `Bicubic`. Case-insensitive. This controls the resampling algorithm used to build overview levels.
130+
131+
`overviewCount` controls the number of overview levels. Use `-1` for automatic (default), `0` for no overviews, or any positive integer for a specific count.
132+
133+
Format:
134+
135+
`RS_AsCOG(raster: Raster)`
136+
137+
`RS_AsCOG(raster: Raster, compression: String)`
138+
139+
`RS_AsCOG(raster: Raster, compression: String, tileSize: Integer)`
140+
141+
`RS_AsCOG(raster: Raster, compression: String, tileSize: Integer, quality: Double)`
142+
143+
`RS_AsCOG(raster: Raster, compression: String, tileSize: Integer, quality: Double, resampling: String)`
144+
145+
`RS_AsCOG(raster: Raster, compression: String, tileSize: Integer, quality: Double, resampling: String, overviewCount: Integer)`
146+
147+
Since: `v1.9.0`
148+
149+
SQL Example
150+
151+
```sql
152+
SELECT RS_AsCOG(raster) FROM my_raster_table
153+
```
154+
155+
SQL Example
156+
157+
```sql
158+
SELECT RS_AsCOG(raster, 'LZW') FROM my_raster_table
159+
```
160+
161+
SQL Example
162+
163+
```sql
164+
SELECT RS_AsCOG(raster, 'LZW', 512, 0.75, 'Bilinear', 3) FROM my_raster_table
165+
```
166+
167+
Output:
168+
169+
```html
170+
+--------------------+
171+
| cog|
172+
+--------------------+
173+
|[4D 4D 00 2A 00 0...|
174+
+--------------------+
175+
```
176+
177+
Output schema:
178+
179+
```sql
180+
root
181+
|-- cog: binary (nullable = true)
182+
```
183+
119184
#### RS_AsPNG
120185

121186
Introduction: Returns a PNG byte array, that can be written to raster files as PNGs using the [sedona function](#write-a-binary-dataframe-to-raster-files). This function can only accept pixel data type of unsigned integer. PNG can accept 1 or 3 bands of data from the raster, refer to [RS_Band](Raster-operators.md#rs_band) for more details.

docs/tutorial/raster.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,14 @@ Use [RS_AsGeoTiff](../api/sql/Raster-writer.md#rs_asgeotiff) to get the binary D
527527
SELECT RS_AsGeoTiff(raster)
528528
```
529529

530+
### As Cloud Optimized GeoTiff
531+
532+
Use [RS_AsCOG](../api/sql/Raster-writer.md#rs_ascog) to get the binary Dataframe of the raster in [Cloud Optimized GeoTiff](https://www.cogeo.org/) (COG) format. COG is ideal for cloud-hosted raster data because it supports efficient range-read access over HTTP.
533+
534+
```sql
535+
SELECT RS_AsCOG(raster)
536+
```
537+
530538
### As PNG
531539

532540
Use [RS_AsPNG](../api/sql/Raster-writer.md#rs_aspng) to get the binary Dataframe of the raster in PNG format.

spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ object Catalog extends AbstractCatalog with Logging {
314314
function[RS_Intersects](),
315315
function[RS_Interpolate](),
316316
function[RS_AsGeoTiff](),
317+
function[RS_AsCOG](),
317318
function[RS_AsRaster](),
318319
function[RS_AsArcGrid](),
319320
function[RS_AsBase64](),

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/RasterOutputs.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,16 @@ private[apache] case class RS_AsImage(inputExpressions: Seq[Expression])
7878
copy(inputExpressions = newChildren)
7979
}
8080
}
81+
82+
private[apache] case class RS_AsCOG(inputExpressions: Seq[Expression])
83+
extends InferredExpression(
84+
inferrableFunction6(RasterOutputs.asCOG),
85+
inferrableFunction5(RasterOutputs.asCOG),
86+
inferrableFunction4(RasterOutputs.asCOG),
87+
inferrableFunction3(RasterOutputs.asCOG),
88+
inferrableFunction2(RasterOutputs.asCOG),
89+
inferrableFunction1(RasterOutputs.asCOG)) {
90+
protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
91+
copy(inputExpressions = newChildren)
92+
}
93+
}

0 commit comments

Comments
 (0)