Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SEDONA-498] Add ST_BestSRID #1246

Merged
merged 11 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 34 additions & 12 deletions common/src/main/java/org/apache/sedona/common/Functions.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import org.apache.commons.lang3.tuple.Pair;
import org.apache.sedona.common.geometryObjects.Circle;
import org.apache.sedona.common.sphere.Spheroid;
import org.apache.sedona.common.subDivide.GeometrySubDivider;
import org.apache.sedona.common.utils.GeomUtils;
import org.apache.sedona.common.utils.GeometryGeoHashEncoder;
Expand All @@ -26,18 +27,7 @@
import org.apache.sedona.common.utils.S2Utils;
import org.locationtech.jts.algorithm.MinimumBoundingCircle;
import org.locationtech.jts.algorithm.hull.ConcaveHull;
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.GeometryCollection;
import org.locationtech.jts.geom.GeometryFactory;
import org.locationtech.jts.geom.LineString;
import org.locationtech.jts.geom.LinearRing;
import org.locationtech.jts.geom.MultiLineString;
import org.locationtech.jts.geom.MultiPoint;
import org.locationtech.jts.geom.MultiPolygon;
import org.locationtech.jts.geom.Point;
import org.locationtech.jts.geom.Polygon;
import org.locationtech.jts.geom.PrecisionModel;
import org.locationtech.jts.geom.*;
import org.locationtech.jts.geom.impl.CoordinateArraySequence;
import org.locationtech.jts.geom.util.GeometryFixer;
import org.locationtech.jts.io.gml2.GMLWriter;
Expand Down Expand Up @@ -196,6 +186,38 @@ else if (singleParam[0].equalsIgnoreCase(listBufferParameters[5])) {
return bufferParameters;
}

public static int bestSRID(Geometry geometry) {
Envelope envelope = geometry.getEnvelopeInternal();
if (envelope.isNull()) return Spheroid.EPSG_WORLD_MERCATOR; // Fallback EPSG

// Calculate the center of the envelope
double centerX = (envelope.getMinX() + envelope.getMaxX()) / 2.0; // centroid.getX();
double centerY = (envelope.getMinY() + envelope.getMaxY()) / 2.0; // centroid.getY();

// Calculate angular width and height
double xwidth = Spheroid.angularWidth(envelope);
double ywidth = Spheroid.angularHeight(envelope);

// Prioritize polar regions for Lambert Azimuthal Equal Area projection
if (centerY >= 70.0 && ywidth < 45.0) return Spheroid.EPSG_NORTH_LAMBERT;
if (centerY <= -70.0 && ywidth < 45.0) return Spheroid.EPSG_SOUTH_LAMBERT;

// Check for UTM zones
if (xwidth < 6.0) {
int zone;
if (centerX == -180.0 || centerX == 180.0) {
zone = 59; // UTM zone 60
} else {
zone = (int)Math.floor((centerX + 180.0) / 6.0);
zone = Math.min(zone, 59);
}
return (centerY < 0.0) ? Spheroid.EPSG_SOUTH_UTM_START + zone : Spheroid.EPSG_NORTH_UTM_START + zone;
}

// Default fallback to Mercator projection
return Spheroid.EPSG_WORLD_MERCATOR;
}

public static Geometry envelope(Geometry geometry) {
return geometry.getEnvelope();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,24 @@
import net.sf.geographiclib.PolygonArea;
import net.sf.geographiclib.PolygonResult;
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;

import static java.lang.Math.abs;

public class Spheroid
{
// Standard EPSG Codes
public static final int EPSG_WORLD_MERCATOR = 3395;
public static final int EPSG_NORTH_UTM_START = 32601;
public static final int EPSG_NORTH_UTM_END = 32660;
public static final int EPSG_NORTH_LAMBERT = 3574;
public static final int EPSG_NORTH_STEREO = 3995;
public static final int EPSG_SOUTH_UTM_START = 32701;
public static final int EPSG_SOUTH_UTM_END = 32760;
public static final int EPSG_SOUTH_LAMBERT = 3409;
public static final int EPSG_SOUTH_STEREO = 3031;

/**
* Calculate the distance between two points on the earth using the Spheroid formula.
* This algorithm does not use the radius of the earth, but instead uses the WGS84 ellipsoid.
Expand Down Expand Up @@ -118,4 +130,34 @@ else if (geomType.equals("MultiPolygon") || geomType.equals("GeometryCollection"
return 0.0;
}
}

public static double angularWidth(Envelope envelope) {
double lon1 = envelope.getMinX();
double lon2 = envelope.getMaxX();
double lat = (envelope.getMinY() + envelope.getMaxY()) / 2; // Mid-latitude for width calculation

// Compute geodesic distance
GeodesicData g = Geodesic.WGS84.Inverse(lat, lon1, lat, lon2);
double distance = g.s12; // Distance in meters

// Convert distance to angular width in degrees
double angularWidth = Math.toDegrees(distance / (Geodesic.WGS84.EquatorialRadius() * Math.PI / 180));

return angularWidth;
}

public static double angularHeight(Envelope envelope) {
double lat1 = envelope.getMinY();
double lat2 = envelope.getMaxY();
double lon = (envelope.getMinX() + envelope.getMaxX()) / 2; // Mid-longitude for height calculation

// Compute geodesic distance
GeodesicData g = Geodesic.WGS84.Inverse(lat1, lon, lat2, lon);
double distance = g.s12; // Distance in meters

// Convert distance to angular height in degrees
double angularHeight = Math.toDegrees(distance / (Geodesic.WGS84.EquatorialRadius() * Math.PI / 180));

return angularHeight;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,75 @@ public void testBuffer() {
assertEquals(expected, actual);
}

@Test
public void testBestSRID() {
int[][] testCases_special = {
{0, -70, 3409}, // EPSG:3409 (Antarctic Polar Stereographic)
{0, 70, 3574}, // EPSG:3575 (North Pole LAEA Alaska)
// Special cases
{-180, 60, 32660}, {180, 60, 32660}, {-180, -60, 32760}, {180, -60, 32760},
};

// Number of UTM zones
int numZones = (177 - (-177)) / 6 + 1;
int numZonesSouth = (177 - (-177)) / 6 + 1;

int[][] testCases_UTMNorth = new int[numZones][3];
int[][] testCases_UTMSouth = new int[numZonesSouth][3];

int indexNorth = 0;
int northernLat = 60; // Latitude for Northern Hemisphere UTM zones
for (int lon = -177, epsg = 32601; lon <= 177; lon += 6, epsg++) {
testCases_UTMNorth[indexNorth][0] = lon; // Longitude
testCases_UTMNorth[indexNorth][1] = northernLat; // Latitude
testCases_UTMNorth[indexNorth][2] = epsg; // EPSG code
indexNorth++;
}

int indexSouth = 0;
int southernLat = -60; // Latitude for Southern Hemisphere UTM zones
for (int lon = -177, epsg = 32701; lon <= 177; lon += 6, epsg++) {
testCases_UTMSouth[indexSouth][0] = lon; // Longitude
testCases_UTMSouth[indexSouth][1] = southernLat; // Latitude
testCases_UTMSouth[indexSouth][2] = epsg; // EPSG code
indexSouth++;
}

for (int[] testCase : testCases_special) {
Geometry geom = GEOMETRY_FACTORY.createPoint(new Coordinate(testCase[0], testCase[1]));
int actualEPSG = Functions.bestSRID(geom);
int expectedEPSG = testCase[2];
assertEquals("Failed at coordinates (" + testCase[0] + ", " + testCase[1] + ")", expectedEPSG, actualEPSG);
}

for (int[] testCase : testCases_UTMNorth) {
Geometry geom = GEOMETRY_FACTORY.createPoint(new Coordinate(testCase[0], testCase[1]));
int actualEPSG = Functions.bestSRID(geom);
int expectedEPSG = testCase[2];
assertEquals("Failed at coordinates (" + testCase[0] + ", " + testCase[1] + ")", expectedEPSG, actualEPSG);
}

for (int[] testCase : testCases_UTMSouth) {
Geometry geom = GEOMETRY_FACTORY.createPoint(new Coordinate(testCase[0], testCase[1]));
int actualEPSG = Functions.bestSRID(geom);
int expectedEPSG = testCase[2];
assertEquals("Failed at coordinates (" + testCase[0] + ", " + testCase[1] + ")", expectedEPSG, actualEPSG);
}

// Large geometry that does not fit into UTM or polar categories
Geometry geom = GEOMETRY_FACTORY.createPolygon(new Coordinate[] {
new Coordinate(-160, -40),
new Coordinate(-160, 40),
new Coordinate(160, 40),
new Coordinate(160, -40),
new Coordinate(-160, -40)
});
int expectedEPSG = 3395; // EPSG code for World Mercator
int actualEPSG = Functions.bestSRID(geom);

assertEquals("Expected World Mercator projection for wide range geometry", expectedEPSG, actualEPSG);
}

@Test
public void nRingsUnsupported() {
LineString lineString = GEOMETRY_FACTORY.createLineString(coordArray3d(0, 1, 1, 1, 2, 1, 1, 2, 2));
Expand Down
28 changes: 28 additions & 0 deletions docs/api/flink/Function.md
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,34 @@ Output:
3.141592653589793
```

## ST_BestSRID

Introduction: Returns the estimated most appropriate Spatial Reference Identifier (SRID) for a given geometry, based on its spatial extent and location. It evaluates the geometry's bounding envelope and selects an SRID that optimally represents the geometry on the Earth's surface. The function prioritizes Universal Transverse Mercator (UTM), Lambert Azimuthal Equal Area (LAEA), or falls back to the Mercator projection.

- For geometries in the Arctic or Antarctic regions, the Lambert Azimuthal Equal Area projection is used.
- For geometries that fit within a single UTM zone and do not cross the International Date Line (IDL), a corresponding UTM SRID is chosen.
- In cases where none of the above conditions are met, the function defaults to the Mercator projection.
- For Geometries that cross the IDL, `ST_BestSRID` defaults the SRID to Mercator. Currently, `ST_BestSRID` does not handle geometries crossing the IDL.

!!!Warning
`ST_BestSRID` is designed to estimate a suitable SRID from a set of approximately 125 EPSG codes and works best for geometries that fit within the UTM zones. It should not be solely relied upon to determine the most accurate SRID, especially for specialized or high-precision spatial requirements.

Format: `ST_BestSRID(geom: Geometry)`

Since: `v1.6.0`

Spark SQL Example:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Please call it SQL Example in all flink, spark, snowflake docs.
  2. Please say that this ST_BestSRID functions takes a WGS84 geometry as input and the geometry should follow lon/lat order.


```sql
SELECT ST_BestSRID(ST_GeomFromWKT('POLYGON((-73.9980 40.7265, -73.9970 40.7265, -73.9970 40.7255, -73.9980 40.7255, -73.9980 40.7265))'))
```

Output:

```
32618
```

## ST_Boundary

Introduction: Returns the closure of the combinatorial boundary of this Geometry.
Expand Down
28 changes: 28 additions & 0 deletions docs/api/snowflake/vector-data/Function.md
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,34 @@ SELECT ST_Azimuth(ST_POINT(0.0, 25.0), ST_POINT(0.0, 0.0))

Output: `3.141592653589793`

## ST_BestSRID

Introduction: Returns the estimated most appropriate Spatial Reference Identifier (SRID) for a given geometry, based on its spatial extent and location. It evaluates the geometry's bounding envelope and selects an SRID that optimally represents the geometry on the Earth's surface. The function prioritizes Universal Transverse Mercator (UTM), Lambert Azimuthal Equal Area (LAEA), or falls back to the Mercator projection.

- For geometries in the Arctic or Antarctic regions, the Lambert Azimuthal Equal Area projection is used.
- For geometries that fit within a single UTM zone and do not cross the International Date Line (IDL), a corresponding UTM SRID is chosen.
- In cases where none of the above conditions are met, the function defaults to the Mercator projection.
- For Geometries that cross the IDL, `ST_BestSRID` defaults the SRID to Mercator. Currently, `ST_BestSRID` does not handle geometries crossing the IDL.

!!!Warning
`ST_BestSRID` is designed to estimate a suitable SRID from a set of approximately 125 EPSG codes and works best for geometries that fit within the UTM zones. It should not be solely relied upon to determine the most accurate SRID, especially for specialized or high-precision spatial requirements.

Format: `ST_BestSRID(geom: Geometry)`

Since: `v1.6.0`

Spark SQL Example:

```sql
SELECT ST_BestSRID(ST_GeomFromWKT('POLYGON((-73.9980 40.7265, -73.9970 40.7265, -73.9970 40.7255, -73.9980 40.7255, -73.9980 40.7265))'))
```

Output:

```
32618
```

## ST_Boundary

Introduction: Returns the closure of the combinatorial boundary of this Geometry.
Expand Down
28 changes: 28 additions & 0 deletions docs/api/sql/Function.md
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,34 @@ Output:
3.141592653589793
```

## ST_BestSRID

Introduction: Returns the estimated most appropriate Spatial Reference Identifier (SRID) for a given geometry, based on its spatial extent and location. It evaluates the geometry's bounding envelope and selects an SRID that optimally represents the geometry on the Earth's surface. The function prioritizes Universal Transverse Mercator (UTM), Lambert Azimuthal Equal Area (LAEA), or falls back to the Mercator projection.

- For geometries in the Arctic or Antarctic regions, the Lambert Azimuthal Equal Area projection is used.
- For geometries that fit within a single UTM zone and do not cross the International Date Line (IDL), a corresponding UTM SRID is chosen.
- In cases where none of the above conditions are met, the function defaults to the Mercator projection.
- For Geometries that cross the IDL, `ST_BestSRID` defaults the SRID to Mercator. Currently, `ST_BestSRID` does not handle geometries crossing the IDL.

!!!Warning
`ST_BestSRID` is designed to estimate a suitable SRID from a set of approximately 125 EPSG codes and works best for geometries that fit within the UTM zones. It should not be solely relied upon to determine the most accurate SRID, especially for specialized or high-precision spatial requirements.

Format: `ST_BestSRID(geom: Geometry)`

Since: `v1.6.0`

Spark SQL Example:

```sql
SELECT ST_BestSRID(ST_GeomFromWKT('POLYGON((-73.9980 40.7265, -73.9970 40.7265, -73.9970 40.7255, -73.9980 40.7255, -73.9980 40.7265))'))
```

Output:

```
32618
```

## ST_Boundary

Introduction: Returns the closure of the combinatorial boundary of this Geometry.
Expand Down
1 change: 1 addition & 0 deletions flink/src/main/java/org/apache/sedona/flink/Catalog.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public static UserDefinedFunction[] getFuncs() {
new Functions.ST_Azimuth(),
new Functions.ST_Boundary(),
new Functions.ST_Buffer(),
new Functions.ST_BestSRID(),
new Functions.ST_ClosestPoint(),
new Functions.ST_Centroid(),
new Functions.ST_Collect(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ public Geometry eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.j
}
}

public static class ST_BestSRID extends ScalarFunction {
@DataTypeHint("Integer")
public int eval(@DataTypeHint(value = "RAW", bridgedTo = Geometry.class) Object o) {
Geometry geom = (Geometry) o;
return org.apache.sedona.common.Functions.bestSRID(geom);
}
}

public static class ST_ClosestPoint extends ScalarFunction {
@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class)
public Geometry eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object g1,
Expand Down
24 changes: 20 additions & 4 deletions flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,14 @@
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.planner.expressions.In;
import org.apache.sedona.flink.expressions.Functions;
import org.apache.sedona.flink.expressions.FunctionsGeoTools;
import org.geotools.referencing.CRS;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.LineString;
import org.locationtech.jts.geom.Point;
import org.locationtech.jts.geom.Polygon;
import org.locationtech.jts.geom.*;
import org.locationtech.jts.io.ParseException;
import org.locationtech.jts.operation.buffer.BufferParameters;
import org.opengis.referencing.FactoryException;
Expand Down Expand Up @@ -99,6 +97,24 @@ public void testBuffer() {
assertEquals(expected, actual);
}

@Test
public void testBestSRID() {
Table table1 = tableEnv.sqlQuery("SELECT ST_GeomFromWKT('POINT (160 40)') AS geom");
table1 = table1.select(call(Functions.ST_BestSRID.class.getSimpleName(), $("geom")));
int result = (int) first(table1).getField(0);
assertEquals(32657, result);

Table table2 = tableEnv.sqlQuery("SELECT ST_GeomFromWKT('LINESTRING(-91.185 30.4505, -91.187 30.452, -91.189 30.4535)') AS geom");
table2 = table2.select(call(Functions.ST_BestSRID.class.getSimpleName(), $("geom")));
result = (int) first(table2).getField(0);
assertEquals(32615, result);

Table table3 = tableEnv.sqlQuery("SELECT ST_GeomFromWKT('POLYGON((-120 30, -80 30, -80 50, -120 50, -120 30))') AS geom");
table3 = table3.select(call(Functions.ST_BestSRID.class.getSimpleName(), $("geom")));
result = (int) first(table3).getField(0);
assertEquals(3395, result);
}

@Test
public void testClosestPoint() {
Table table = tableEnv.sqlQuery("SELECT ST_GeomFromWKT('POINT (160 40)') AS g1, ST_GeomFromWKT('POINT (10 10)') as g2");
Expand Down
Loading