diff --git a/common/src/main/java/org/apache/sedona/common/Functions.java b/common/src/main/java/org/apache/sedona/common/Functions.java index 08293bb04a..4faba7d4a6 100644 --- a/common/src/main/java/org/apache/sedona/common/Functions.java +++ b/common/src/main/java/org/apache/sedona/common/Functions.java @@ -18,6 +18,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.sedona.common.geometryObjects.Circle; +import org.apache.sedona.common.sphere.Spheroid; import org.apache.sedona.common.subDivide.GeometrySubDivider; import org.apache.sedona.common.utils.GeomUtils; import org.apache.sedona.common.utils.GeometryGeoHashEncoder; @@ -26,18 +27,7 @@ import org.apache.sedona.common.utils.S2Utils; import org.locationtech.jts.algorithm.MinimumBoundingCircle; import org.locationtech.jts.algorithm.hull.ConcaveHull; -import org.locationtech.jts.geom.Coordinate; -import org.locationtech.jts.geom.Geometry; -import org.locationtech.jts.geom.GeometryCollection; -import org.locationtech.jts.geom.GeometryFactory; -import org.locationtech.jts.geom.LineString; -import org.locationtech.jts.geom.LinearRing; -import org.locationtech.jts.geom.MultiLineString; -import org.locationtech.jts.geom.MultiPoint; -import org.locationtech.jts.geom.MultiPolygon; -import org.locationtech.jts.geom.Point; -import org.locationtech.jts.geom.Polygon; -import org.locationtech.jts.geom.PrecisionModel; +import org.locationtech.jts.geom.*; import org.locationtech.jts.geom.impl.CoordinateArraySequence; import org.locationtech.jts.geom.util.GeometryFixer; import org.locationtech.jts.io.gml2.GMLWriter; @@ -196,6 +186,38 @@ else if (singleParam[0].equalsIgnoreCase(listBufferParameters[5])) { return bufferParameters; } + public static int bestSRID(Geometry geometry) { + Envelope envelope = geometry.getEnvelopeInternal(); + if (envelope.isNull()) return Spheroid.EPSG_WORLD_MERCATOR; // Fallback EPSG + + // Calculate the center of the envelope + double centerX = (envelope.getMinX() + envelope.getMaxX()) / 2.0; // centroid.getX(); + double centerY = (envelope.getMinY() + envelope.getMaxY()) / 2.0; // centroid.getY(); + + // Calculate angular width and height + double xwidth = Spheroid.angularWidth(envelope); + double ywidth = Spheroid.angularHeight(envelope); + + // Prioritize polar regions for Lambert Azimuthal Equal Area projection + if (centerY >= 70.0 && ywidth < 45.0) return Spheroid.EPSG_NORTH_LAMBERT; + if (centerY <= -70.0 && ywidth < 45.0) return Spheroid.EPSG_SOUTH_LAMBERT; + + // Check for UTM zones + if (xwidth < 6.0) { + int zone; + if (centerX == -180.0 || centerX == 180.0) { + zone = 59; // UTM zone 60 + } else { + zone = (int)Math.floor((centerX + 180.0) / 6.0); + zone = Math.min(zone, 59); + } + return (centerY < 0.0) ? Spheroid.EPSG_SOUTH_UTM_START + zone : Spheroid.EPSG_NORTH_UTM_START + zone; + } + + // Default fallback to Mercator projection + return Spheroid.EPSG_WORLD_MERCATOR; + } + public static Geometry envelope(Geometry geometry) { return geometry.getEnvelope(); } diff --git a/common/src/main/java/org/apache/sedona/common/sphere/Spheroid.java b/common/src/main/java/org/apache/sedona/common/sphere/Spheroid.java index ef64df55da..7b06f00d73 100644 --- a/common/src/main/java/org/apache/sedona/common/sphere/Spheroid.java +++ b/common/src/main/java/org/apache/sedona/common/sphere/Spheroid.java @@ -23,12 +23,24 @@ import net.sf.geographiclib.PolygonArea; import net.sf.geographiclib.PolygonResult; import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Envelope; import org.locationtech.jts.geom.Geometry; import static java.lang.Math.abs; public class Spheroid { + // Standard EPSG Codes + public static final int EPSG_WORLD_MERCATOR = 3395; + public static final int EPSG_NORTH_UTM_START = 32601; + public static final int EPSG_NORTH_UTM_END = 32660; + public static final int EPSG_NORTH_LAMBERT = 3574; + public static final int EPSG_NORTH_STEREO = 3995; + public static final int EPSG_SOUTH_UTM_START = 32701; + public static final int EPSG_SOUTH_UTM_END = 32760; + public static final int EPSG_SOUTH_LAMBERT = 3409; + public static final int EPSG_SOUTH_STEREO = 3031; + /** * Calculate the distance between two points on the earth using the Spheroid formula. * This algorithm does not use the radius of the earth, but instead uses the WGS84 ellipsoid. @@ -118,4 +130,34 @@ else if (geomType.equals("MultiPolygon") || geomType.equals("GeometryCollection" return 0.0; } } + + public static double angularWidth(Envelope envelope) { + double lon1 = envelope.getMinX(); + double lon2 = envelope.getMaxX(); + double lat = (envelope.getMinY() + envelope.getMaxY()) / 2; // Mid-latitude for width calculation + + // Compute geodesic distance + GeodesicData g = Geodesic.WGS84.Inverse(lat, lon1, lat, lon2); + double distance = g.s12; // Distance in meters + + // Convert distance to angular width in degrees + double angularWidth = Math.toDegrees(distance / (Geodesic.WGS84.EquatorialRadius() * Math.PI / 180)); + + return angularWidth; + } + + public static double angularHeight(Envelope envelope) { + double lat1 = envelope.getMinY(); + double lat2 = envelope.getMaxY(); + double lon = (envelope.getMinX() + envelope.getMaxX()) / 2; // Mid-longitude for height calculation + + // Compute geodesic distance + GeodesicData g = Geodesic.WGS84.Inverse(lat1, lon, lat2, lon); + double distance = g.s12; // Distance in meters + + // Convert distance to angular height in degrees + double angularHeight = Math.toDegrees(distance / (Geodesic.WGS84.EquatorialRadius() * Math.PI / 180)); + + return angularHeight; + } } diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java index 64ce4aad33..0e75192d29 100644 --- a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java +++ b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java @@ -1171,6 +1171,75 @@ public void testBuffer() { assertEquals(expected, actual); } + @Test + public void testBestSRID() { + int[][] testCases_special = { + {0, -70, 3409}, // EPSG:3409 (Antarctic Polar Stereographic) + {0, 70, 3574}, // EPSG:3575 (North Pole LAEA Alaska) + // Special cases + {-180, 60, 32660}, {180, 60, 32660}, {-180, -60, 32760}, {180, -60, 32760}, + }; + + // Number of UTM zones + int numZones = (177 - (-177)) / 6 + 1; + int numZonesSouth = (177 - (-177)) / 6 + 1; + + int[][] testCases_UTMNorth = new int[numZones][3]; + int[][] testCases_UTMSouth = new int[numZonesSouth][3]; + + int indexNorth = 0; + int northernLat = 60; // Latitude for Northern Hemisphere UTM zones + for (int lon = -177, epsg = 32601; lon <= 177; lon += 6, epsg++) { + testCases_UTMNorth[indexNorth][0] = lon; // Longitude + testCases_UTMNorth[indexNorth][1] = northernLat; // Latitude + testCases_UTMNorth[indexNorth][2] = epsg; // EPSG code + indexNorth++; + } + + int indexSouth = 0; + int southernLat = -60; // Latitude for Southern Hemisphere UTM zones + for (int lon = -177, epsg = 32701; lon <= 177; lon += 6, epsg++) { + testCases_UTMSouth[indexSouth][0] = lon; // Longitude + testCases_UTMSouth[indexSouth][1] = southernLat; // Latitude + testCases_UTMSouth[indexSouth][2] = epsg; // EPSG code + indexSouth++; + } + + for (int[] testCase : testCases_special) { + Geometry geom = GEOMETRY_FACTORY.createPoint(new Coordinate(testCase[0], testCase[1])); + int actualEPSG = Functions.bestSRID(geom); + int expectedEPSG = testCase[2]; + assertEquals("Failed at coordinates (" + testCase[0] + ", " + testCase[1] + ")", expectedEPSG, actualEPSG); + } + + for (int[] testCase : testCases_UTMNorth) { + Geometry geom = GEOMETRY_FACTORY.createPoint(new Coordinate(testCase[0], testCase[1])); + int actualEPSG = Functions.bestSRID(geom); + int expectedEPSG = testCase[2]; + assertEquals("Failed at coordinates (" + testCase[0] + ", " + testCase[1] + ")", expectedEPSG, actualEPSG); + } + + for (int[] testCase : testCases_UTMSouth) { + Geometry geom = GEOMETRY_FACTORY.createPoint(new Coordinate(testCase[0], testCase[1])); + int actualEPSG = Functions.bestSRID(geom); + int expectedEPSG = testCase[2]; + assertEquals("Failed at coordinates (" + testCase[0] + ", " + testCase[1] + ")", expectedEPSG, actualEPSG); + } + + // Large geometry that does not fit into UTM or polar categories + Geometry geom = GEOMETRY_FACTORY.createPolygon(new Coordinate[] { + new Coordinate(-160, -40), + new Coordinate(-160, 40), + new Coordinate(160, 40), + new Coordinate(160, -40), + new Coordinate(-160, -40) + }); + int expectedEPSG = 3395; // EPSG code for World Mercator + int actualEPSG = Functions.bestSRID(geom); + + assertEquals("Expected World Mercator projection for wide range geometry", expectedEPSG, actualEPSG); + } + @Test public void nRingsUnsupported() { LineString lineString = GEOMETRY_FACTORY.createLineString(coordArray3d(0, 1, 1, 1, 2, 1, 1, 2, 2)); diff --git a/docs/api/flink/Function.md b/docs/api/flink/Function.md index 73916552a0..940f3a722c 100644 --- a/docs/api/flink/Function.md +++ b/docs/api/flink/Function.md @@ -476,6 +476,34 @@ Output: 3.141592653589793 ``` +## ST_BestSRID + +Introduction: Returns the estimated most appropriate Spatial Reference Identifier (SRID) for a given geometry, based on its spatial extent and location. It evaluates the geometry's bounding envelope and selects an SRID that optimally represents the geometry on the Earth's surface. The function prioritizes Universal Transverse Mercator (UTM), Lambert Azimuthal Equal Area (LAEA), or falls back to the Mercator projection. The function takes a WGS84 geometry and must be in ==lon/lat== order. + +- For geometries in the Arctic or Antarctic regions, the Lambert Azimuthal Equal Area projection is used. +- For geometries that fit within a single UTM zone and do not cross the International Date Line (IDL), a corresponding UTM SRID is chosen. +- In cases where none of the above conditions are met, the function defaults to the Mercator projection. +- For Geometries that cross the IDL, `ST_BestSRID` defaults the SRID to Mercator. Currently, `ST_BestSRID` does not handle geometries crossing the IDL. + +!!!Warning + `ST_BestSRID` is designed to estimate a suitable SRID from a set of approximately 125 EPSG codes and works best for geometries that fit within the UTM zones. It should not be solely relied upon to determine the most accurate SRID, especially for specialized or high-precision spatial requirements. + +Format: `ST_BestSRID(geom: Geometry)` + +Since: `v1.6.0` + +SQL Example: + +```sql +SELECT ST_BestSRID(ST_GeomFromWKT('POLYGON((-73.9980 40.7265, -73.9970 40.7265, -73.9970 40.7255, -73.9980 40.7255, -73.9980 40.7265))')) +``` + +Output: + +``` +32618 +``` + ## ST_Boundary Introduction: Returns the closure of the combinatorial boundary of this Geometry. diff --git a/docs/api/snowflake/vector-data/Function.md b/docs/api/snowflake/vector-data/Function.md index 5f1a53ebec..acbc52f8be 100644 --- a/docs/api/snowflake/vector-data/Function.md +++ b/docs/api/snowflake/vector-data/Function.md @@ -320,6 +320,34 @@ SELECT ST_Azimuth(ST_POINT(0.0, 25.0), ST_POINT(0.0, 0.0)) Output: `3.141592653589793` +## ST_BestSRID + +Introduction: Returns the estimated most appropriate Spatial Reference Identifier (SRID) for a given geometry, based on its spatial extent and location. It evaluates the geometry's bounding envelope and selects an SRID that optimally represents the geometry on the Earth's surface. The function prioritizes Universal Transverse Mercator (UTM), Lambert Azimuthal Equal Area (LAEA), or falls back to the Mercator projection. The function takes a WGS84 geometry and must be in ==lon/lat== order. + +- For geometries in the Arctic or Antarctic regions, the Lambert Azimuthal Equal Area projection is used. +- For geometries that fit within a single UTM zone and do not cross the International Date Line (IDL), a corresponding UTM SRID is chosen. +- In cases where none of the above conditions are met, the function defaults to the Mercator projection. +- For Geometries that cross the IDL, `ST_BestSRID` defaults the SRID to Mercator. Currently, `ST_BestSRID` does not handle geometries crossing the IDL. + +!!!Warning + `ST_BestSRID` is designed to estimate a suitable SRID from a set of approximately 125 EPSG codes and works best for geometries that fit within the UTM zones. It should not be solely relied upon to determine the most accurate SRID, especially for specialized or high-precision spatial requirements. + +Format: `ST_BestSRID(geom: Geometry)` + +Since: `v1.6.0` + +SQL Example: + +```sql +SELECT ST_BestSRID(ST_GeomFromWKT('POLYGON((-73.9980 40.7265, -73.9970 40.7265, -73.9970 40.7255, -73.9980 40.7255, -73.9980 40.7265))')) +``` + +Output: + +``` +32618 +``` + ## ST_Boundary Introduction: Returns the closure of the combinatorial boundary of this Geometry. diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md index e055dc97b2..a3d6f080be 100644 --- a/docs/api/sql/Function.md +++ b/docs/api/sql/Function.md @@ -474,6 +474,34 @@ Output: 3.141592653589793 ``` +## ST_BestSRID + +Introduction: Returns the estimated most appropriate Spatial Reference Identifier (SRID) for a given geometry, based on its spatial extent and location. It evaluates the geometry's bounding envelope and selects an SRID that optimally represents the geometry on the Earth's surface. The function prioritizes Universal Transverse Mercator (UTM), Lambert Azimuthal Equal Area (LAEA), or falls back to the Mercator projection. The function takes a WGS84 geometry and must be in ==lon/lat== order. + +- For geometries in the Arctic or Antarctic regions, the Lambert Azimuthal Equal Area projection is used. +- For geometries that fit within a single UTM zone and do not cross the International Date Line (IDL), a corresponding UTM SRID is chosen. +- In cases where none of the above conditions are met, the function defaults to the Mercator projection. +- For Geometries that cross the IDL, `ST_BestSRID` defaults the SRID to Mercator. Currently, `ST_BestSRID` does not handle geometries crossing the IDL. + +!!!Warning + `ST_BestSRID` is designed to estimate a suitable SRID from a set of approximately 125 EPSG codes and works best for geometries that fit within the UTM zones. It should not be solely relied upon to determine the most accurate SRID, especially for specialized or high-precision spatial requirements. + +Format: `ST_BestSRID(geom: Geometry)` + +Since: `v1.6.0` + +SQL Example: + +```sql +SELECT ST_BestSRID(ST_GeomFromWKT('POLYGON((-73.9980 40.7265, -73.9970 40.7265, -73.9970 40.7255, -73.9980 40.7255, -73.9980 40.7265))')) +``` + +Output: + +``` +32618 +``` + ## ST_Boundary Introduction: Returns the closure of the combinatorial boundary of this Geometry. diff --git a/flink/src/main/java/org/apache/sedona/flink/Catalog.java b/flink/src/main/java/org/apache/sedona/flink/Catalog.java index 8cb644bb8c..0a2b4390f8 100644 --- a/flink/src/main/java/org/apache/sedona/flink/Catalog.java +++ b/flink/src/main/java/org/apache/sedona/flink/Catalog.java @@ -46,6 +46,7 @@ public static UserDefinedFunction[] getFuncs() { new Functions.ST_Azimuth(), new Functions.ST_Boundary(), new Functions.ST_Buffer(), + new Functions.ST_BestSRID(), new Functions.ST_ClosestPoint(), new Functions.ST_Centroid(), new Functions.ST_Collect(), diff --git a/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java b/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java index 597dc6bb15..dc707a825d 100644 --- a/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java +++ b/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java @@ -82,6 +82,14 @@ public Geometry eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.j } } + public static class ST_BestSRID extends ScalarFunction { + @DataTypeHint("Integer") + public int eval(@DataTypeHint(value = "RAW", bridgedTo = Geometry.class) Object o) { + Geometry geom = (Geometry) o; + return org.apache.sedona.common.Functions.bestSRID(geom); + } + } + public static class ST_ClosestPoint extends ScalarFunction { @DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) public Geometry eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object g1, diff --git a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java index 5486bd6229..3b342840a4 100644 --- a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java +++ b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java @@ -16,16 +16,14 @@ import org.apache.commons.codec.binary.Hex; import org.apache.commons.lang3.tuple.Pair; import org.apache.flink.table.api.Table; +import org.apache.flink.table.planner.expressions.In; import org.apache.sedona.flink.expressions.Functions; import org.apache.sedona.flink.expressions.FunctionsGeoTools; import org.geotools.referencing.CRS; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; -import org.locationtech.jts.geom.Geometry; -import org.locationtech.jts.geom.LineString; -import org.locationtech.jts.geom.Point; -import org.locationtech.jts.geom.Polygon; +import org.locationtech.jts.geom.*; import org.locationtech.jts.io.ParseException; import org.locationtech.jts.operation.buffer.BufferParameters; import org.opengis.referencing.FactoryException; @@ -99,6 +97,24 @@ public void testBuffer() { assertEquals(expected, actual); } + @Test + public void testBestSRID() { + Table table1 = tableEnv.sqlQuery("SELECT ST_GeomFromWKT('POINT (160 40)') AS geom"); + table1 = table1.select(call(Functions.ST_BestSRID.class.getSimpleName(), $("geom"))); + int result = (int) first(table1).getField(0); + assertEquals(32657, result); + + Table table2 = tableEnv.sqlQuery("SELECT ST_GeomFromWKT('LINESTRING(-91.185 30.4505, -91.187 30.452, -91.189 30.4535)') AS geom"); + table2 = table2.select(call(Functions.ST_BestSRID.class.getSimpleName(), $("geom"))); + result = (int) first(table2).getField(0); + assertEquals(32615, result); + + Table table3 = tableEnv.sqlQuery("SELECT ST_GeomFromWKT('POLYGON((-120 30, -80 30, -80 50, -120 50, -120 30))') AS geom"); + table3 = table3.select(call(Functions.ST_BestSRID.class.getSimpleName(), $("geom"))); + result = (int) first(table3).getField(0); + assertEquals(3395, result); + } + @Test public void testClosestPoint() { Table table = tableEnv.sqlQuery("SELECT ST_GeomFromWKT('POINT (160 40)') AS g1, ST_GeomFromWKT('POINT (10 10)') as g2"); diff --git a/python/sedona/sql/st_functions.py b/python/sedona/sql/st_functions.py index aad15ea879..3465b9c162 100644 --- a/python/sedona/sql/st_functions.py +++ b/python/sedona/sql/st_functions.py @@ -37,6 +37,7 @@ "ST_AsKML", "ST_AsText", "ST_Azimuth", + "ST_BestSRID", "ST_Boundary", "ST_Buffer", "ST_BuildArea", @@ -304,6 +305,18 @@ def ST_Azimuth(point_a: ColumnOrName, point_b: ColumnOrName) -> Column: return _call_st_function("ST_Azimuth", (point_a, point_b)) +@validate_argument_types +def ST_BestSRID(geometry: ColumnOrName) -> Column: + """Estimates the best SRID (EPSG code) of the geometry. + + :param geometry: Geometry column to calculate the boundary for. + :type geometry: ColumnOrName + :return: SRID as an Integer + :rtype: Column + """ + return _call_st_function("ST_BestSRID", geometry) + + @validate_argument_types def ST_Boundary(geometry: ColumnOrName) -> Column: """Calculate the closure of the combinatorial boundary of a geometry column. diff --git a/python/tests/sql/test_dataframe_api.py b/python/tests/sql/test_dataframe_api.py index bd3ebb57ba..be8278c727 100644 --- a/python/tests/sql/test_dataframe_api.py +++ b/python/tests/sql/test_dataframe_api.py @@ -71,6 +71,7 @@ (stf.ST_AsKML, ("point",), "point_geom", "", "\n 0.0,1.0\n\n"), (stf.ST_AsText, ("point",), "point_geom", "", "POINT (0 1)"), (stf.ST_Azimuth, ("a", "b"), "two_points", "geom * 180.0 / pi()", 90.0), + (stf.ST_BestSRID, ("geom",), "triangle_geom", "", 3395), (stf.ST_Boundary, ("geom",), "triangle_geom", "", "LINESTRING (0 0, 1 0, 1 1, 0 0)"), (stf.ST_Buffer, ("point", 1.0), "point_geom", "ST_ReducePrecision(geom, 2)", "POLYGON ((0.98 0.8, 0.92 0.62, 0.83 0.44, 0.71 0.29, 0.56 0.17, 0.38 0.08, 0.2 0.02, 0 0, -0.2 0.02, -0.38 0.08, -0.56 0.17, -0.71 0.29, -0.83 0.44, -0.92 0.62, -0.98 0.8, -1 1, -0.98 1.2, -0.92 1.38, -0.83 1.56, -0.71 1.71, -0.56 1.83, -0.38 1.92, -0.2 1.98, 0 2, 0.2 1.98, 0.38 1.92, 0.56 1.83, 0.71 1.71, 0.83 1.56, 0.92 1.38, 0.98 1.2, 1 1, 0.98 0.8))"), (stf.ST_BuildArea, ("geom",), "multiline_geom", "ST_Normalize(geom)", "POLYGON ((0 0, 1 1, 1 0, 0 0))"), @@ -229,6 +230,7 @@ (stf.ST_AsText, (None,)), (stf.ST_Azimuth, (None, "")), (stf.ST_Azimuth, ("", None)), + (stf.ST_BestSRID, (None,)), (stf.ST_Boundary, (None,)), (stf.ST_Buffer, (None, 1.0)), (stf.ST_Buffer, ("", None)), diff --git a/python/tests/sql/test_function.py b/python/tests/sql/test_function.py index cb141e4546..c7e9af4ffb 100644 --- a/python/tests/sql/test_function.py +++ b/python/tests/sql/test_function.py @@ -101,6 +101,23 @@ def test_st_buffer(self): actual = function_df.take(1)[0][0].wkt assert actual == "POLYGON ((-107.02 42.06, -107.02 42.07, -107.02 42.09, -107.02 42.11, -107.02 42.32, -107.02 42.33, -107.01 42.42, -107.01 42.43, -106.77 44.33, -106.16 46.15, -105.22 47.82, -103.98 49.27, -102.48 50.47, -100.78 51.36, -98.94 51.9, -97.04 52.09, -97.03 52.09, -97.01 52.09, -96.95 52.09, -96.9 52.09, -96.81 52.09, -96.7 52.09, -96.68 52.09, -96.65 52.09, -96.55 52.09, -96.54 52.09, -96.49 52.09, -96.48 52.09, -94.58 51.89, -92.74 51.33, -91.04 50.43, -89.55 49.23, -88.32 47.76, -87.39 46.08, -86.79 44.25, -86.56 42.35, -86.56 42.18, -86.56 42.17, -86.56 42.1, -86.55 41.99, -86.56 41.9, -86.56 41.78, -86.56 41.77, -86.56 41.75, -86.56 41.73, -86.56 41.7, -86.75 39.76, -87.33 37.89, -88.25 36.17, -89.49 34.66, -91 33.43, -92.72 32.51, -94.59 31.94, -96.53 31.74, -96.55 31.74, -96.56 31.74, -96.57 31.74, -96.58 31.74, -96.6 31.74, -96.69 31.74, -96.72 31.74, -96.75 31.74, -96.94 31.74, -97.02 31.74, -97.04 31.74, -97.06 31.74, -98.99 31.94, -100.85 32.5, -102.56 33.42, -104.07 34.65, -105.31 36.14, -106.23 37.85, -106.81 39.71, -107.02 41.64, -107.02 41.75, -107.02 41.94, -107.02 41.96, -107.02 41.99, -107.02 42.01, -107.02 42.02, -107.02 42.03, -107.02 42.06))" + def test_st_bestsrid(self): + polygon_from_wkt = self.spark.read.format("csv"). \ + option("delimiter", "\t"). \ + option("header", "false"). \ + load(mixed_wkt_geometry_input_location) + + polygon_from_wkt.createOrReplaceTempView("polgontable") + polygon_from_wkt.show() + + polygon_df = self.spark.sql("select ST_GeomFromWKT(polygontable._c0) as countyshape from polygontable") + polygon_df.createOrReplaceTempView("polygondf") + polygon_df.show() + function_df = self.spark.sql("select ST_BestSRID(polygondf.countyshape) from polygondf") + function_df.show() + actual = function_df.take(1)[0][0] + assert actual == 3395 + def test_st_envelope(self): polygon_from_wkt = self.spark.read.format("csv"). \ option("delimiter", "\t"). \ diff --git a/python/tests/streaming/spark/test_constructor_functions.py b/python/tests/streaming/spark/test_constructor_functions.py index e197cd2cdb..9e066063a5 100644 --- a/python/tests/streaming/spark/test_constructor_functions.py +++ b/python/tests/streaming/spark/test_constructor_functions.py @@ -297,7 +297,11 @@ "ST_GeomFromText('POINT(21.427834 52.042576573)')", "ST_GeomFromText('POINT(45.342524 56.342354355)')"]) .with_expected_result(0.0) - .with_transform("ST_LENGTH")) + .with_transform("ST_LENGTH")), + (SuiteContainer.empty() + .with_function_name("ST_BestSRID") + .with_arguments(["ST_GeomFromText('POINT (-177 60)')"]) + .with_expected_result(32601)) ] diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java index 37e71dff9e..25b298cf4f 100644 --- a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java @@ -191,6 +191,16 @@ public void test_ST_Buffer() { "POLYGON ((1 1, 0.9807852804032304 0.8049096779838718, 0.9238795325112867 0.6173165676349102, 0.8314696123025452 0.4444297669803978, 0.7071067811865476 0.2928932188134525, 0.5555702330196023 0.1685303876974548, 0.3826834323650898 0.0761204674887133, 0.1950903220161283 0.0192147195967696, 0.0000000000000001 0, -0.1950903220161282 0.0192147195967696, -0.3826834323650897 0.0761204674887133, -0.555570233019602 0.1685303876974547, -0.7071067811865475 0.2928932188134524, -0.8314696123025453 0.4444297669803978, -0.9238795325112867 0.6173165676349102, -0.9807852804032304 0.8049096779838714, -1 0.9999999999999999, -0.9807852804032304 1.1950903220161284, -0.9238795325112868 1.3826834323650896, -0.8314696123025455 1.555570233019602, -0.7071067811865477 1.7071067811865475, -0.5555702330196022 1.8314696123025453, -0.3826834323650903 1.9238795325112865, -0.1950903220161287 1.9807852804032304, -0.0000000000000002 2, 0.1950903220161283 1.9807852804032304, 0.38268343236509 1.9238795325112865, 0.5555702330196018 1.8314696123025453, 0.7071067811865474 1.7071067811865477, 0.8314696123025452 1.5555702330196022, 0.9238795325112865 1.3826834323650905, 0.9807852804032303 1.1950903220161286, 1 1))" ); } + + @Test + public void test_ST_BestSRID() { + registerUDF("ST_BestSRID", byte[].class); + verifySqlSingleRes( + "select sedona.ST_BestSRID(sedona.ST_GeomFromText('POINT (-180 60)'))", + 32660 + ); + } + @Test public void test_ST_BuildArea() { registerUDF("ST_BuildArea", byte[].class); diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java index a671d76693..7bb16fc222 100644 --- a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java @@ -169,6 +169,16 @@ public void test_ST_Azimuth() { 240.0133139011053 * Math.PI / 180 ); } + + @Test + public void test_ST_BestSRID() { + registerUDFV2("ST_BestSRID", String.class); + verifySqlSingleRes( + "select sedona.ST_BestSRID(ST_GeometryFromWKT('POINT (-180 60)'))", + 32660 + ); + } + @Test public void test_ST_Boundary() { registerUDFV2("ST_Boundary", String.class); diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java index d9763d9fc2..6bd07a035f 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java @@ -168,6 +168,13 @@ public static byte[] ST_Buffer(byte[] geometry, double radius) { ); } + @UDFAnnotations.ParamMeta(argNames = {"geometry"}) + public static int ST_BestSRID(byte[] geometry) { + return Functions.bestSRID( + GeometrySerde.deserialize(geometry) + ); + } + @UDFAnnotations.ParamMeta(argNames = {"geometry"}) public static byte[] ST_BuildArea(byte[] geometry) { return GeometrySerde.serialize( diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java index a94a7df722..ab0abdc0dc 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java @@ -171,6 +171,13 @@ public static String ST_BoundingDiagonal(String geometry) { ); } + @UDFAnnotations.ParamMeta(argNames = {"geometry"}, argTypes = {"Geometry"}) + public static int ST_BestSRID(String geometry) { + return Functions.bestSRID( + GeometrySerde.deserGeoJson(geometry) + ); + } + @UDFAnnotations.ParamMeta(argNames = {"geometry", "radius"}, argTypes = {"Geometry", "double"}, returnTypes = "Geometry") public static String ST_Buffer(String geometry, double radius) { return GeometrySerde.serGeoJson( diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala index 00a04b0786..60d43b52de 100644 --- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala +++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala @@ -69,6 +69,7 @@ object Catalog { function[ST_NPoints](), function[ST_NDims](), function[ST_Buffer](), + function[ST_BestSRID](), function[ST_Envelope](), function[ST_Length](), function[ST_Area](), diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala index 3ebf42b910..9d933c18af 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala @@ -160,6 +160,13 @@ case class ST_Buffer(inputExpressions: Seq[Expression]) } } +case class ST_BestSRID(inputExpressions: Seq[Expression]) + extends InferredExpression(Functions.bestSRID _) { + + protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { + copy(inputExpressions = newChildren) + } +} /** * Return the bounding rectangle for a Geometry diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala index 957c2483ee..6744ad1dfb 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala @@ -69,6 +69,9 @@ object st_functions extends DataFrameAPI { def ST_Buffer(geometry: Column, buffer: Column, parameters: Column): Column = wrapExpression[ST_Buffer](geometry, buffer, parameters) def ST_Buffer(geometry: String, buffer: Double, parameters: String): Column = wrapExpression[ST_Buffer](geometry, buffer, parameters) + def ST_BestSRID(geometry: Column): Column = wrapExpression[ST_BestSRID](geometry) + def ST_BestSRID(geometry: String): Column = wrapExpression[ST_BestSRID](geometry) + def ST_BuildArea(geometry: Column): Column = wrapExpression[ST_BuildArea](geometry) def ST_BuildArea(geometry: String): Column = wrapExpression[ST_BuildArea](geometry) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala index 0b163a1c34..56685f65be 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala @@ -216,6 +216,32 @@ class dataFrameAPITestScala extends TestBaseScala { assertEquals(expected, actual) } + it("Passed ST_BestSRID") { + val pointDf = sparkSession.sql("SELECT ST_Point(-177, -60) AS geom") + val df = pointDf.select(ST_BestSRID("geom").as("geom")) + var actual = df.take(1)(0).get(0).asInstanceOf[Int] + var expected = 32701 + assertEquals(expected, actual) + + val linestringDf = sparkSession.sql("SELECT ST_GeomFromWKT('LINESTRING(-91.185 30.4505, -91.187 30.452, -91.189 30.4535)') AS geom") + val dfLine = linestringDf.select(ST_BestSRID("geom").as("geom")) + actual = dfLine.take(1)(0).get(0).asInstanceOf[Int] + expected = 32615 + assertEquals(expected, actual) + + val polygonDf = sparkSession.sql("SELECT ST_GeomFromWKT('POLYGON((-120 30, -80 30, -80 50, -120 50, -120 30))') AS geom") + val dfPolygon = polygonDf.select(ST_BestSRID("geom").as("geom")) + actual = dfPolygon.take(1)(0).get(0).asInstanceOf[Int] + expected = 3395 + assertEquals(expected, actual) + + val polygonDf2 = sparkSession.sql("SELECT ST_GeomFromWKT('POLYGON((-73.9980 40.7265, -73.9970 40.7265, -73.9970 40.7255, -73.9980 40.7255, -73.9980 40.7265))') AS geom") + val dfPolygon2 = polygonDf2.select(ST_BestSRID("geom").as("geom")) + actual = dfPolygon2.take(1)(0).get(0).asInstanceOf[Int] + expected = 32618 + assertEquals(expected, actual) + } + it("Passed ST_Envelope") { val polygonDf = sparkSession.sql("SELECT ST_GeomFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 0))') AS geom") val df = polygonDf.select(ST_Envelope("geom")) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala index e8eb96de8b..f31d698193 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala @@ -73,6 +73,15 @@ class functionTestScala extends TestBaseScala with Matchers with GeometrySample assert(functionDf.count() > 0); } + it("Passed ST_BestSRID") { + val polygonWktDf = sparkSession.read.format("csv").option("delimiter", "\t").option("header", "false").load(mixedWktGeometryInputLocation) + polygonWktDf.createOrReplaceTempView("polygontable") + val polygonDf = sparkSession.sql("select ST_GeomFromWKT(polygontable._c0) as countyshape from polygontable") + polygonDf.createOrReplaceTempView("polygondf") + val functionDf = sparkSession.sql("select ST_BestSRID(polygondf.countyshape) from polygondf") + assert(functionDf.count() > 0); + } + it("Passed ST_Envelope") { var polygonWktDf = sparkSession.read.format("csv").option("delimiter", "\t").option("header", "false").load(mixedWktGeometryInputLocation) polygonWktDf.createOrReplaceTempView("polygontable")