Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SEDONA-471] Support adding pandas df to SedonaKepler if geometry column is not present #1233

Merged
merged 35 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
21258e8
Add ST_DWithin
iGN5117 Jan 1, 2024
e887506
Add documentation for ST_DWithin
iGN5117 Jan 1, 2024
f1b3f84
Remove unwanted code
iGN5117 Jan 1, 2024
341017d
removed null check test for ST_DWithin
iGN5117 Jan 1, 2024
4f6f09d
Fix EOF lint error
iGN5117 Jan 1, 2024
a41339d
Add explanation for ST_DWithin
iGN5117 Jan 2, 2024
043223d
Remove CRS checking logic in ST_DWithin
iGN5117 Jan 2, 2024
0079af5
Add optimized join support for ST_DWithin
iGN5117 Jan 14, 2024
747dca8
Merge branch 'develop_Nilesh_1.5.1' of https://github.com/iGN5117/sed…
iGN5117 Jan 14, 2024
1c61199
Remove test change to resourceFolder
iGN5117 Jan 14, 2024
f9ee1ec
remove unnecessary cast to double
iGN5117 Jan 14, 2024
8a0fc7d
Add broadcast join test
iGN5117 Jan 14, 2024
158325f
Add example of ST_DWithin in Optimizer.md
iGN5117 Jan 14, 2024
cda4802
Add useSpheroid version to ST_DWithin | Add optimized join support
iGN5117 Jan 19, 2024
1fe3c98
Merge branch 'sedona-master' into develop_Nilesh_1.5.1
iGN5117 Jan 19, 2024
91f3ca7
remove accidental resourceFolder change
iGN5117 Jan 19, 2024
3bf9c99
Fix mistake in making useSpheroid optional in ST_DWithin
iGN5117 Jan 19, 2024
ecac80d
Fix incorrect test data in test_dataframe_api.py
iGN5117 Jan 19, 2024
57f3f94
fix failing test in test_predicate.py
iGN5117 Jan 19, 2024
ae9f131
Address PR changes | Move ST_DWithin to DistanceJoin
iGN5117 Jan 20, 2024
8488cad
fix failing test
iGN5117 Jan 20, 2024
c1e85b9
Remove randomness from sphere test case generation
iGN5117 Jan 25, 2024
22422d7
Merge branch 'sedona-master' into develop_Nilesh_1.5.1
iGN5117 Jan 25, 2024
e486724
Refactor documentation of ST_DWithin
iGN5117 Jan 25, 2024
5a31de2
revert resourceFolder path
iGN5117 Jan 25, 2024
b569d89
Handle complex boolean expressions in ST_DWithin
iGN5117 Jan 27, 2024
a5022b2
add a blanket try catch for ST_DWithin to handle complex boolean expr…
iGN5117 Jan 28, 2024
7e6cd49
add collect to the python test
iGN5117 Jan 28, 2024
d0bfbaa
replace head() with count()
iGN5117 Jan 28, 2024
51b3d11
Merge branch 'sedona-master' into develop_Nilesh_1.5.1
iGN5117 Jan 28, 2024
5352fb9
Add null check for geometry column while adding a df to keplergl
iGN5117 Jan 28, 2024
172ae05
Revert "Add null check for geometry column while adding a df to keple…
iGN5117 Jan 28, 2024
81079a3
Add null check for geometry column while adding df to keplergl
iGN5117 Jan 28, 2024
19a92ba
support adding pandas df to sedonaKepler and SedonaPyDeck if geometry…
iGN5117 Feb 7, 2024
9b104ac
Merge branch 'sedona-master' into develop_Nilesh_1.5.1_KeplerBug
iGN5117 Feb 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/sedona/maps/SedonaKepler.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,5 @@ def add_df(cls, kepler_map, df, name="unnamed"):
:param name: [Optional] Name to assign to the dataframe, default name assigned is 'unnamed'
:return: Does not return anything, adds df directly to the given map object
"""
geo_df = SedonaMapUtils.__convert_to_gdf__(df)
geo_df = SedonaMapUtils.__convert_to_gdf_or_pdf__(df)
kepler_map.add_data(geo_df, name=name)
12 changes: 9 additions & 3 deletions python/sedona/maps/SedonaMapUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,20 @@
class SedonaMapUtils:

@classmethod
def __convert_to_gdf__(cls, df, rename=True, geometry_col=None):
def __convert_to_gdf_or_pdf__(cls, df, rename=True, geometry_col=None):
"""
Converts a SedonaDataFrame to a GeoPandasDataFrame and also renames geometry column to a standard name of
'geometry' :param df: SedonaDataFrame to convert :param geometry_col: [Optional] :return:
'geometry'
However, if no geometry column is found even after traversing schema, returns a Pandas Dataframe
:param df: SedonaDataFrame to convert
:param geometry_col: [Optional]
:return: GeoPandas Dataframe or Pandas Dataframe
"""
if geometry_col is None:
geometry_col = SedonaMapUtils.__get_geometry_col__(df)
pandas_df = df.toPandas()
if geometry_col is None: # No geometry column found even after searching schema, return Pandas Dataframe
return pandas_df
geo_df = gpd.GeoDataFrame(pandas_df, geometry=geometry_col)
if geometry_col != "geometry" and rename is True:
geo_df.rename_geometry("geometry", inplace=True)
Expand All @@ -44,7 +50,7 @@ def __convert_to_geojson__(cls, df):
:param df: SedonaDataFrame to convert
:return: GeoJSON object
"""
gdf = SedonaMapUtils.__convert_to_gdf__(df)
gdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df)
gjson_str = gdf.to_json()
gjson = json.loads(gjson_str)
return gjson
Expand Down
2 changes: 1 addition & 1 deletion python/sedona/maps/SedonaPyDeck.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def _prepare_df_(cls, df, add_coords=False, geometry_col=None):
"""
if geometry_col is None:
geometry_col = SedonaMapUtils.__get_geometry_col__(df=df)
gdf = SedonaMapUtils.__convert_to_gdf__(df, rename=False, geometry_col=geometry_col)
gdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df, rename=False, geometry_col=geometry_col)
if add_coords is True:
SedonaPyDeck._create_coord_column_(gdf=gdf, geometry_col=geometry_col)
return gdf
Expand Down
20 changes: 19 additions & 1 deletion python/tests/maps/test_sedonakepler_visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from tests import mixed_wkt_geometry_input_location
from tests import csv_point_input_location
import geopandas as gpd
from pyspark.sql.functions import explode, hex


class TestVisualization(TestBase):
Expand Down Expand Up @@ -70,6 +71,22 @@ def test_df_addition(self):
assert sedona_kepler_empty_map._repr_html_() == kepler_map._repr_html_()
assert sedona_kepler_empty_map.config == kepler_map.config

def test_pandas_df_addition(self):
polygon_wkt_df = self.spark.read.format("csv"). \
option("delimiter", "\t"). \
option("header", "false"). \
load(mixed_wkt_geometry_input_location)

polygon_wkt_df.createOrReplaceTempView("polygontable")
polygon_h3_df = self.spark.sql(
"select ST_H3CellIDs(ST_GeomFromWKT(polygontable._c0), 3, false) as h3_cellID from polygontable")
polygon_exploded_h3 = polygon_h3_df.select(explode(polygon_h3_df.h3_cellID).alias("h3"))
polygon_hex_exploded_h3 = polygon_exploded_h3.select(hex(polygon_exploded_h3.h3).alias("hex_h3"))
kepler_map = SedonaKepler.create_map(df=polygon_hex_exploded_h3, name="h3")

# just test if the map creation is successful.
assert kepler_map is not None

def test_adding_multiple_datasets(self):
config = {'version': 'v1',
'config': {'visState': {'filters': [],
Expand Down Expand Up @@ -180,7 +197,8 @@ def test_adding_multiple_datasets(self):
load(csv_point_input_location)

point_csv_df.createOrReplaceTempView("pointtable")
point_df = self.spark.sql("select ST_Point(cast(pointtable._c0 as Decimal(24,20)), cast(pointtable._c1 as Decimal(24,20))) as arealandmark from pointtable")
point_df = self.spark.sql(
"select ST_Point(cast(pointtable._c0 as Decimal(24,20)), cast(pointtable._c1 as Decimal(24,20))) as arealandmark from pointtable")
polygon_wkt_df.createOrReplaceTempView("polygontable")
polygon_df = self.spark.sql("select ST_GeomFromWKT(polygontable._c0) as countyshape from polygontable")

Expand Down
Loading