Skip to content

Commit c80e96d

Browse files
Updating geospatial datatype to return ogc wkb (#1090)
## Description Replace standard JTS `WKBWriter` with custom `JTSOGCWKBWriter` to provide OGC-compliant WKB encoding with full support for 3D and 4D geometry coordinates (Z, M, and ZM dimensions). **Changes:** - Updated `WKTConverter.toWKB()` to use `JTSOGCWKBWriter` for OGC-compliant WKB generation - Updated `WKTConverter.toWKT()` to preserve dimensions when converting WKB back to WKT ## Testing Manually tested the flow with real databricks workspace and unit tests covering the following: - 2D and 3D geometries: POINT/LINESTRING/POLYGON with Z and M coordinates - 4D geometries: POINT/LINESTRING/POLYGON with both Z and M (ZM) coordinates - Empty geometries and edge cases (negative coordinates, large decimal values) - WKB → WKT → WKB and WKB → WKT → WKB round-trip tests to verify binary-level consistency *Update*: Have added a comprehensive test suite of 140+ test cases in `wkb_test_cases.json` --------- Signed-off-by: Sreekanth Vadigi <sreekanth.vadigi@databricks.com> Co-authored-by: Samikshya Chand <148681192+samikshya-db@users.noreply.github.com>
1 parent 857d993 commit c80e96d

5 files changed

Lines changed: 1421 additions & 20 deletions

File tree

NEXT_CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
- Call statements will return result sets in response
1717

1818
### Updated
19+
- Geospatial `getWKB()` now returns OGC-compliant WKB values.
1920
- Minimized OAuth requests by reducing calls in feature flags and telemetry.
2021

2122
### Fixed
Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
package com.databricks.jdbc.api.impl.converters;
2+
3+
import java.nio.ByteBuffer;
4+
import java.nio.ByteOrder;
5+
import java.util.EnumSet;
6+
import org.locationtech.jts.geom.Coordinate;
7+
import org.locationtech.jts.geom.Geometry;
8+
import org.locationtech.jts.geom.GeometryCollection;
9+
import org.locationtech.jts.geom.LineString;
10+
import org.locationtech.jts.geom.LinearRing;
11+
import org.locationtech.jts.geom.MultiLineString;
12+
import org.locationtech.jts.geom.MultiPoint;
13+
import org.locationtech.jts.geom.MultiPolygon;
14+
import org.locationtech.jts.geom.Point;
15+
import org.locationtech.jts.geom.Polygon;
16+
import org.locationtech.jts.io.Ordinate;
17+
18+
// Implements a WKB writer for JTS geometries that is OGC compliant.
19+
public class JTSOGCWKBWriter {
20+
// Constructs a write using the enum set for the output coordinates and the provided byte order
21+
// (endianness).
22+
public JTSOGCWKBWriter(EnumSet<Ordinate> outputOrdinates, ByteOrder byteOrder) {
23+
this.outputOrdinates = outputOrdinates;
24+
this.byteOrder = byteOrder;
25+
this.outputDimension = 2;
26+
if (hasZ()) ++this.outputDimension;
27+
if (hasM()) ++this.outputDimension;
28+
}
29+
30+
// Returns the WKB description of the input JTS geometry.
31+
public byte[] write(Geometry geom) {
32+
byte[] wkb = new byte[numBytes(geom)];
33+
ByteBuffer buffer = ByteBuffer.wrap(wkb).order(byteOrder);
34+
write(buffer, /* pos= */ 0, geom);
35+
return wkb;
36+
}
37+
38+
// Helper method to compute the number of bytes for the WKB description of the input geometry.
39+
// It performs dispatching to the concrete classes implementing the `Geometry` abstract class.
40+
private int numBytes(Geometry geom) {
41+
if (geom instanceof Point) {
42+
return numBytes((Point) geom);
43+
} else if (geom instanceof LineString) {
44+
return numBytes((LineString) geom);
45+
} else if (geom instanceof Polygon) {
46+
return numBytes((Polygon) geom);
47+
} else if (geom instanceof GeometryCollection) {
48+
// This accounts for multipoints, multilinestring, and multipolygons as well.
49+
return numBytes((GeometryCollection) geom);
50+
} else {
51+
throw new IllegalArgumentException("numBytes: Unknown or unsupported type");
52+
}
53+
}
54+
55+
// Number of bytes for a byte.
56+
private static final int SIZE_OF_BYTE = Byte.BYTES;
57+
// Number of bytes for integers.
58+
private static final int SIZE_OF_INT = Integer.BYTES;
59+
// Number of bytes for doubles.
60+
private static final int SIZE_OF_DOUBLE = Double.BYTES;
61+
62+
// Returns the number of bytes required for the WKB description of the input point.
63+
private int numBytes(Point point) {
64+
// We need:
65+
// - 1 byte for the endianness.
66+
// - 4 bytes for encoding the type.
67+
// - 8 bytes per coordinate.
68+
return (SIZE_OF_BYTE + SIZE_OF_INT) + outputDimension * SIZE_OF_DOUBLE;
69+
}
70+
71+
// Returns the number of bytes required for the WKB description of the input linestring.
72+
private int numBytes(LineString lineString) {
73+
// We need:
74+
// - 1 byte for the endianness.
75+
// - 4 bytes for encoding the type.
76+
// - 4 bytes for the number of points in the linestring.
77+
// - 8 bytes per coordinate, per point.
78+
return (SIZE_OF_BYTE + 2 * SIZE_OF_INT)
79+
+ lineString.getNumPoints() * outputDimension * SIZE_OF_DOUBLE;
80+
}
81+
82+
// Returns the number of bytes required for the WKB description of the input polygon.
83+
private int numBytes(Polygon polygon) {
84+
// We need:
85+
// - 1 byte for the endianness.
86+
// - 4 bytes for encoding the type.
87+
// - 4 bytes for the number of rings in the polygon.
88+
// - 4 bytes for the number of points per ring.
89+
// - 8 bytes per coordinate, per point.
90+
return (SIZE_OF_BYTE + 2 * SIZE_OF_INT)
91+
+ polygon.getNumPoints() * outputDimension * SIZE_OF_DOUBLE
92+
+ numRings(polygon) * SIZE_OF_INT;
93+
}
94+
95+
// Returns the number of bytes required for the WKB description of the input collection (in JTS
96+
// this accounts for multipoints, multilinestrings, and multipolygons).
97+
private int numBytes(GeometryCollection collection) {
98+
// We need:
99+
// - 1 byte for the endianness.
100+
// - 4 bytes for encoding the type.
101+
// - 4 bytes for the number of geometries in the collection.
102+
// - The number of bytes to represent each geometry in the collection.
103+
int numBytes = (SIZE_OF_BYTE + 2 * SIZE_OF_INT);
104+
for (int i = 0; i < collection.getNumGeometries(); ++i) {
105+
numBytes += numBytes(collection.getGeometryN(i));
106+
}
107+
return numBytes;
108+
}
109+
110+
// Returns the number of rings of the input polygon.
111+
private int numRings(Polygon polygon) {
112+
return polygon.isEmpty() ? 0 : (polygon.getNumInteriorRing() + 1);
113+
}
114+
115+
// OGC type values for the various geometry types. The values below are for 2D geometries.
116+
private static final int POINT_TYPE = 1;
117+
private static final int LINESTRING_TYPE = 2;
118+
private static final int POLYGON_TYPE = 3;
119+
private static final int MULTIPOINT_TYPE = 4;
120+
private static final int MULTILINESTRING_TYPE = 5;
121+
private static final int MULTIPOLYGON_TYPE = 6;
122+
private static final int GEOMETRYCOLLECTION_TYPE = 7;
123+
124+
// Writes the WKB description of the input geometry starting at position `pos` in the provided
125+
// buffer. Returns the position in the buffer after the written bytes.
126+
public int write(ByteBuffer buffer, int pos, Geometry geom) {
127+
if (geom instanceof Point) {
128+
return write(buffer, pos, (Point) geom);
129+
} else if (geom instanceof LineString) {
130+
return write(buffer, pos, (LineString) geom);
131+
} else if (geom instanceof Polygon) {
132+
return write(buffer, pos, (Polygon) geom);
133+
} else if (geom instanceof GeometryCollection) {
134+
// This accounts for multipoints, multilinestring, and multipolygons as well.
135+
return write(buffer, pos, (GeometryCollection) geom);
136+
} else {
137+
throw new IllegalArgumentException("write: Unknown or unsupported type");
138+
}
139+
}
140+
141+
// Writes the WKB description of the input point starting at position `pos` in the provided
142+
// buffer. Returns the position in the buffer after the written bytes.
143+
private int write(ByteBuffer buffer, int pos, Point point) {
144+
// Write the endianness.
145+
buffer.put(pos, (byte) (isLittleEndian() ? 1 : 0));
146+
pos += SIZE_OF_BYTE;
147+
// Write the type.
148+
buffer.putInt(pos, fromBaseType(POINT_TYPE));
149+
// Write the point coordinates and return the position after the written bytes.
150+
return writeCoords(buffer, pos + SIZE_OF_INT, point.getCoordinate());
151+
}
152+
153+
// Writes the WKB description of the input linestring starting at position `pos` in the provided
154+
// buffer. Returns the position in the buffer after the written bytes.
155+
private int write(ByteBuffer buffer, int pos, LineString lineString) {
156+
// Write the endianness.
157+
buffer.put(pos, (byte) (isLittleEndian() ? 1 : 0));
158+
pos += SIZE_OF_BYTE;
159+
// Write the type.
160+
buffer.putInt(pos, fromBaseType(LINESTRING_TYPE));
161+
pos += SIZE_OF_INT;
162+
// Write the number of points.
163+
buffer.putInt(pos, lineString.getNumPoints());
164+
pos += SIZE_OF_INT;
165+
// Write the point coordinates.
166+
for (int i = 0; i < lineString.getNumPoints(); ++i) {
167+
pos = writeCoords(buffer, pos, lineString.getCoordinateN(i));
168+
}
169+
// Return the position after the written bytes.
170+
return pos;
171+
}
172+
173+
// Writes the WKB description of the input polygon starting at position `pos` in the provided
174+
// buffer. Returns the position in the buffer after the written bytes.
175+
private int write(ByteBuffer buffer, int pos, Polygon polygon) {
176+
// Write the endianness.
177+
buffer.put(pos, (byte) (isLittleEndian() ? 1 : 0));
178+
pos += SIZE_OF_BYTE;
179+
// Write the type.
180+
buffer.putInt(pos, fromBaseType(POLYGON_TYPE));
181+
pos += SIZE_OF_INT;
182+
int numRings = numRings(polygon);
183+
// Write the number of rings.
184+
buffer.putInt(pos, numRings);
185+
pos += SIZE_OF_INT;
186+
if (numRings > 0) {
187+
LinearRing outer = polygon.getExteriorRing();
188+
// Write number of points in the outer ring.
189+
buffer.putInt(pos, outer.getNumPoints());
190+
pos += SIZE_OF_INT;
191+
// Write the point coordinates in the outer ring.
192+
for (int i = 0; i < outer.getNumPoints(); ++i) {
193+
pos = writeCoords(buffer, pos, outer.getCoordinateN(i));
194+
}
195+
}
196+
for (int i = 0; i < polygon.getNumInteriorRing(); ++i) {
197+
LinearRing inner = polygon.getInteriorRingN(i);
198+
// Write number of points in the i-th inner ring.
199+
buffer.putInt(pos, inner.getNumPoints());
200+
pos += SIZE_OF_INT;
201+
// Write the point coordinates in the i-th inner ring.
202+
for (int j = 0; j < inner.getNumPoints(); ++j) {
203+
pos = writeCoords(buffer, pos, inner.getCoordinateN(j));
204+
}
205+
}
206+
return pos;
207+
}
208+
209+
// Writes the WKB description of the input collection (includes multipoints, multilinestrings,
210+
// and multipolygons) starting at position `pos` in the provided buffer. Returns the position in
211+
// the buffer after the written bytes.
212+
private int write(ByteBuffer buffer, int pos, GeometryCollection collection) {
213+
// Write the endianness.
214+
buffer.put(pos, (byte) (isLittleEndian() ? 1 : 0));
215+
pos += SIZE_OF_BYTE;
216+
// Write the type.
217+
if (collection instanceof MultiPoint) {
218+
buffer.putInt(pos, fromBaseType(MULTIPOINT_TYPE));
219+
} else if (collection instanceof MultiLineString) {
220+
buffer.putInt(pos, fromBaseType(MULTILINESTRING_TYPE));
221+
} else if (collection instanceof MultiPolygon) {
222+
buffer.putInt(pos, fromBaseType(MULTIPOLYGON_TYPE));
223+
} else {
224+
buffer.putInt(pos, fromBaseType(GEOMETRYCOLLECTION_TYPE));
225+
}
226+
pos += SIZE_OF_INT;
227+
// Write the number of geometries.
228+
buffer.putInt(pos, collection.getNumGeometries());
229+
pos += SIZE_OF_INT;
230+
for (int i = 0; i < collection.getNumGeometries(); ++i) {
231+
// Get the WKB of the i-th geometry.
232+
pos = write(buffer, pos, collection.getGeometryN(i));
233+
}
234+
return pos;
235+
}
236+
237+
// Write the coordinates to the provided byte buffer. Returns the next position in the buffer,
238+
// after the written bytes.
239+
private int writeCoords(ByteBuffer buffer, int pos, Coordinate coords) {
240+
if (coords == null) {
241+
// We have an empty point. We need to output `Coordinate.NULL_ORDINATE` for all point
242+
// coordinates in this case.
243+
buffer.putDouble(pos, Coordinate.NULL_ORDINATE);
244+
pos += SIZE_OF_DOUBLE;
245+
buffer.putDouble(pos, Coordinate.NULL_ORDINATE);
246+
if (hasZ()) {
247+
pos += SIZE_OF_DOUBLE;
248+
buffer.putDouble(pos, Coordinate.NULL_ORDINATE);
249+
}
250+
if (hasM()) {
251+
pos += SIZE_OF_DOUBLE;
252+
buffer.putDouble(pos, Coordinate.NULL_ORDINATE);
253+
}
254+
return pos + SIZE_OF_DOUBLE;
255+
}
256+
buffer.putDouble(pos, coords.getX());
257+
pos += SIZE_OF_DOUBLE;
258+
buffer.putDouble(pos, coords.getY());
259+
if (hasZ()) {
260+
pos += SIZE_OF_DOUBLE;
261+
buffer.putDouble(pos, coords.getZ());
262+
}
263+
if (hasM()) {
264+
pos += SIZE_OF_DOUBLE;
265+
buffer.putDouble(pos, coords.getM());
266+
}
267+
return pos + SIZE_OF_DOUBLE;
268+
}
269+
270+
// Returns `true` iff the byte order specified at construction is little endian.
271+
private boolean isLittleEndian() {
272+
return byteOrder == ByteOrder.LITTLE_ENDIAN;
273+
}
274+
275+
// Returns `true` iff the ordinates specified at construction include the Z ordinate.
276+
private boolean hasZ() {
277+
return outputOrdinates.contains(Ordinate.Z);
278+
}
279+
280+
// Returns `true` iff the ordinates specified at construction include the M ordinate.
281+
private boolean hasM() {
282+
return outputOrdinates.contains(Ordinate.M);
283+
}
284+
285+
// Offsets (with respect to the 2D OGC type value) for the OGC type valuefor geometries with Z
286+
// and/or M coordinates.
287+
private static final int Z_TYPE_OFFSET = 1000;
288+
private static final int M_TYPE_OFFSET = 2000;
289+
private static final int ZM_TYPE_OFFSET = 3000;
290+
291+
// Given the base OGC type value for a geometry type, returns the OGC type value that takes the
292+
// dimension of the geometry into account.
293+
private int fromBaseType(int baseType) {
294+
boolean hasZ = hasZ();
295+
boolean hasM = hasM();
296+
if (hasZ && hasM) return baseType + ZM_TYPE_OFFSET;
297+
if (hasZ) return baseType + Z_TYPE_OFFSET;
298+
if (hasM) return baseType + M_TYPE_OFFSET;
299+
return baseType;
300+
}
301+
302+
// The ordinates we want to output when we write the WKB description of a geometry using this
303+
// writer.
304+
private EnumSet<Ordinate> outputOrdinates;
305+
// The byte order (endianness) of the WKB description of a geometry written using this writer.
306+
private ByteOrder byteOrder;
307+
// The number of dimensions we write in the WKB description of a geometry written using this
308+
// writer. This is a derivative quantity computed from `outputOrdinates` and is stored for
309+
// efficiency.
310+
private int outputDimension;
311+
}

0 commit comments

Comments
 (0)