Skip to content

Commit 189cfd0

Browse files
authored
[GH-2672] Add a new raster data source reader that can automatically tile GeoTiffs and bypass the Spark record limit (#2673)
1 parent caafcf8 commit 189cfd0

17 files changed

Lines changed: 1784 additions & 187 deletions

File tree

common/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@
129129
<groupId>org.datasyslab</groupId>
130130
<artifactId>proj4sedona</artifactId>
131131
</dependency>
132+
<dependency>
133+
<groupId>org.apache.hadoop</groupId>
134+
<artifactId>hadoop-client</artifactId>
135+
<scope>provided</scope>
136+
</dependency>
132137
</dependencies>
133138
<build>
134139
<sourceDirectory>src/main/java</sourceDirectory>

common/src/main/java/org/apache/sedona/common/raster/RasterConstructors.java

Lines changed: 27 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,23 @@
1919
package org.apache.sedona.common.raster;
2020

2121
import java.awt.*;
22-
import java.awt.image.Raster;
23-
import java.awt.image.RenderedImage;
2422
import java.awt.image.WritableRaster;
2523
import java.io.IOException;
2624
import java.util.Arrays;
2725
import java.util.List;
2826
import java.util.Map;
27+
import javax.imageio.stream.ImageInputStream;
2928
import javax.media.jai.RasterFactory;
3029
import org.apache.sedona.common.FunctionsGeoTools;
3130
import org.apache.sedona.common.raster.inputstream.ByteArrayImageInputStream;
3231
import org.apache.sedona.common.raster.netcdf.NetCdfReader;
33-
import org.apache.sedona.common.utils.ImageUtils;
3432
import org.apache.sedona.common.utils.RasterUtils;
3533
import org.geotools.api.feature.simple.SimpleFeature;
3634
import org.geotools.api.feature.simple.SimpleFeatureType;
37-
import org.geotools.api.metadata.spatial.PixelOrientation;
3835
import org.geotools.api.referencing.FactoryException;
3936
import org.geotools.api.referencing.crs.CoordinateReferenceSystem;
4037
import org.geotools.api.referencing.datum.PixelInCell;
4138
import org.geotools.api.referencing.operation.MathTransform;
42-
import org.geotools.coverage.GridSampleDimension;
4339
import org.geotools.coverage.grid.GridCoverage2D;
4440
import org.geotools.coverage.grid.GridEnvelope2D;
4541
import org.geotools.coverage.grid.GridGeometry2D;
@@ -73,6 +69,21 @@ public static GridCoverage2D fromGeoTiff(byte[] bytes) throws IOException {
7369
return geoTiffReader.read(null);
7470
}
7571

72+
/**
73+
* Creates a GridCoverage2D from a GeoTIFF via an ImageInputStream. This avoids materializing the
74+
* entire file as a byte[], which is critical for files larger than 2 GB.
75+
*
76+
* @param inputStream an ImageInputStream positioned at the start of the GeoTIFF data
77+
* @return a GridCoverage2D with a lazily-decoded RenderedImage
78+
* @throws IOException if the GeoTIFF cannot be read
79+
*/
80+
public static GridCoverage2D fromGeoTiff(ImageInputStream inputStream) throws IOException {
81+
GeoTiffReader geoTiffReader =
82+
new GeoTiffReader(
83+
inputStream, new Hints(Hints.FORCE_LONGITUDE_FIRST_AXIS_ORDER, Boolean.TRUE));
84+
return geoTiffReader.read(null);
85+
}
86+
7687
public static GridCoverage2D fromNetCDF(
7788
byte[] bytes, String variableName, String lonDimensionName, String latDimensionName)
7889
throws IOException, FactoryException {
@@ -560,32 +571,9 @@ public static GridCoverage2D makeNonEmptyRaster(
560571
return RasterUtils.create(raster, gridGeometry, null);
561572
}
562573

563-
public static class Tile {
564-
private final int tileX;
565-
private final int tileY;
566-
private final GridCoverage2D coverage;
567-
568-
public Tile(int tileX, int tileY, GridCoverage2D coverage) {
569-
this.tileX = tileX;
570-
this.tileY = tileY;
571-
this.coverage = coverage;
572-
}
573-
574-
public int getTileX() {
575-
return tileX;
576-
}
577-
578-
public int getTileY() {
579-
return tileY;
580-
}
581-
582-
public GridCoverage2D getCoverage() {
583-
return coverage;
584-
}
585-
}
586-
587574
/**
588-
* Generate tiles from a grid coverage
575+
* Generate tiles from a grid coverage. Returns a lazy iterator that generates tiles one at a
576+
* time, reading only the necessary pixel data for each tile from the source image.
589577
*
590578
* @param gridCoverage2D the grid coverage
591579
* @param bandIndices the indices of the bands to select (1-based), can be null or empty to
@@ -595,9 +583,9 @@ public GridCoverage2D getCoverage() {
595583
* @param padWithNoData whether to pad the tiles with no data value
596584
* @param padNoDataValue the no data value for padded tiles, only used when padWithNoData is true.
597585
* If the value is NaN, the no data value of the original band will be used.
598-
* @return the tiles
586+
* @return a lazy iterator of tiles
599587
*/
600-
public static Tile[] generateTiles(
588+
public static TileGenerator.TileIterator generateTiles(
601589
GridCoverage2D gridCoverage2D,
602590
int[] bandIndices,
603591
int tileWidth,
@@ -620,102 +608,10 @@ public static Tile[] generateTiles(
620608
}
621609
}
622610
}
623-
return doGenerateTiles(
611+
return TileGenerator.generateInDbTiles(
624612
gridCoverage2D, bandIndices, tileWidth, tileHeight, padWithNoData, padNoDataValue);
625613
}
626614

627-
/**
628-
* Generate tiles from an in-db grid coverage. The generated tiles are also in-db grid coverages.
629-
* Pixel data will be copied into the tiles.
630-
*
631-
* @param gridCoverage2D the in-db grid coverage
632-
* @param bandIndices the indices of the bands to select (1-based)
633-
* @param tileWidth the width of the tiles
634-
* @param tileHeight the height of the tiles
635-
* @param padWithNoData whether to pad the tiles with no data value
636-
* @param padNoDataValue the no data value for padded tiles, only used when padWithNoData is true.
637-
* If the value is NaN, the no data value of the original band will be used.
638-
* @return the tiles
639-
*/
640-
private static Tile[] doGenerateTiles(
641-
GridCoverage2D gridCoverage2D,
642-
int[] bandIndices,
643-
int tileWidth,
644-
int tileHeight,
645-
boolean padWithNoData,
646-
double padNoDataValue) {
647-
AffineTransform2D affine =
648-
RasterUtils.getAffineTransform(gridCoverage2D, PixelOrientation.CENTER);
649-
RenderedImage image = gridCoverage2D.getRenderedImage();
650-
double[] noDataValues = new double[bandIndices.length];
651-
for (int i = 0; i < bandIndices.length; i++) {
652-
noDataValues[i] =
653-
RasterUtils.getNoDataValue(gridCoverage2D.getSampleDimension(bandIndices[i] - 1));
654-
}
655-
int width = image.getWidth();
656-
int height = image.getHeight();
657-
int numTileX = (int) Math.ceil((double) width / tileWidth);
658-
int numTileY = (int) Math.ceil((double) height / tileHeight);
659-
Tile[] tiles = new Tile[numTileX * numTileY];
660-
for (int tileY = 0; tileY < numTileY; tileY++) {
661-
for (int tileX = 0; tileX < numTileX; tileX++) {
662-
int x0 = tileX * tileWidth;
663-
int y0 = tileY * tileHeight;
664-
665-
// Rect to copy from the original image
666-
int rectWidth = Math.min(tileWidth, width - x0);
667-
int rectHeight = Math.min(tileHeight, height - y0);
668-
669-
// If we don't pad with no data, the tiles on the boundary may have a different size
670-
int currentTileWidth = padWithNoData ? tileWidth : rectWidth;
671-
int currentTileHeight = padWithNoData ? tileHeight : rectHeight;
672-
boolean needPadding = padWithNoData && (rectWidth < tileWidth || rectHeight < tileHeight);
673-
674-
// Create a new affine transformation for this tile
675-
AffineTransform2D tileAffine = RasterUtils.translateAffineTransform(affine, x0, y0);
676-
GridGeometry2D gridGeometry2D =
677-
new GridGeometry2D(
678-
new GridEnvelope2D(0, 0, currentTileWidth, currentTileHeight),
679-
PixelInCell.CELL_CENTER,
680-
tileAffine,
681-
gridCoverage2D.getCoordinateReferenceSystem(),
682-
null);
683-
684-
// Prepare a new image for this tile, and copy the data from the original image
685-
WritableRaster raster =
686-
RasterFactory.createBandedRaster(
687-
image.getSampleModel().getDataType(),
688-
currentTileWidth,
689-
currentTileHeight,
690-
bandIndices.length,
691-
null);
692-
GridSampleDimension[] sampleDimensions = new GridSampleDimension[bandIndices.length];
693-
Raster sourceRaster = image.getData(new Rectangle(x0, y0, rectWidth, rectHeight));
694-
for (int k = 0; k < bandIndices.length; k++) {
695-
int bandIndex = bandIndices[k] - 1;
696-
697-
// Copy sample dimensions from source bands, and pad with no data value if necessary
698-
GridSampleDimension sampleDimension = gridCoverage2D.getSampleDimension(bandIndex);
699-
double noDataValue = noDataValues[k];
700-
if (needPadding && !Double.isNaN(padNoDataValue)) {
701-
sampleDimension =
702-
RasterUtils.createSampleDimensionWithNoDataValue(sampleDimension, padNoDataValue);
703-
noDataValue = padNoDataValue;
704-
}
705-
sampleDimensions[k] = sampleDimension;
706-
707-
// Copy data from original image to tile image
708-
ImageUtils.copyRasterWithPadding(sourceRaster, bandIndex, raster, k, noDataValue);
709-
}
710-
711-
GridCoverage2D tile = RasterUtils.create(raster, gridGeometry2D, sampleDimensions);
712-
tiles[tileY * numTileX + tileX] = new Tile(tileX, tileY, tile);
713-
}
714-
}
715-
716-
return tiles;
717-
}
718-
719615
public static GridCoverage2D[] rsTile(
720616
GridCoverage2D gridCoverage2D,
721617
int[] bandIndices,
@@ -729,12 +625,14 @@ public static GridCoverage2D[] rsTile(
729625
if (padNoDataValue == null) {
730626
padNoDataValue = Double.NaN;
731627
}
732-
Tile[] tiles =
628+
TileGenerator.TileIterator tileIterator =
733629
generateTiles(
734630
gridCoverage2D, bandIndices, tileWidth, tileHeight, padWithNoData, padNoDataValue);
735-
GridCoverage2D[] result = new GridCoverage2D[tiles.length];
736-
for (int i = 0; i < tiles.length; i++) {
737-
result[i] = tiles[i].getCoverage();
631+
GridCoverage2D[] result = new GridCoverage2D[tileIterator.getNumTiles()];
632+
int i = 0;
633+
while (tileIterator.hasNext()) {
634+
TileGenerator.Tile tile = tileIterator.next();
635+
result[i++] = tile.getCoverage();
738636
}
739637
return result;
740638
}

0 commit comments

Comments
 (0)