Skip to content

Commit

Permalink
[SEDONA-163] Better handle of unsupported types in shapefile reader (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
jiayuasu authored Sep 7, 2022
1 parent abd142d commit c4e37ba
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 43 deletions.
9 changes: 9 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,18 @@ BSD 2-Clause License
--------------------------------------
zeppelin/index.js (modified based on volume-leaflet: https://github.com/volumeint/helium-volume-leaflet)


No-copyright data used in unit tests
--------------------------------------

UrbIS-Adm 3D from datastore.brussels (Creative Commons CC-0 licence, No copyright)
---------------
core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.dbf
core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.prj
core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shp
core/src/test/resources/shapefiles/unsupported/UrbAdm3D_142166_Bu_Ground.shx


TIGER/Line from United States Census Bureau
---------------
core/src/test/resources/arealm-small.csv
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,45 @@
import org.locationtech.jts.geom.GeometryFactory;

import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;

public enum ShapeType
implements Serializable
{

UNDEFINED(0),
POINT(1),
POLYLINE(3),
POLYGON(5),
MULTIPOINT(8);
// The following IDs are defined in Shapefile specification
NULL(0, false),
POINT(1, true),
POLYLINE(3, true),
POLYGON(5, true),
MULTIPOINT(8, true),
POINTZ(11, false),
POLYLINEZ(13, false),
POLYGONZ(15, false),
MULTIPOINTZ(18, false),
POINTM(21, false),
POLYLINEM(23, false),
POLYGONM(25, false),
MULTIPOINTM(28, false),
MULTIPATCH(31, false),
// A normal shapefile should NOT have UNDEFINED type
UNDEFINED(-1, false);

private final int id;
private final boolean supported;
// A lookup map for getting a Type from its id
private static final Map<Integer, ShapeType> lookup = new HashMap<Integer, ShapeType>();

static {
for (ShapeType s : ShapeType.values()) {
lookup.put(s.id, s);
}
}

ShapeType(int id)
ShapeType(int id, boolean supported)
{
this.id = id;
this.supported = supported;
}

/**
Expand All @@ -48,24 +71,8 @@ public enum ShapeType
*/
public static ShapeType getType(int id)
{
ShapeType type;
switch (id) {
case 1:
type = POINT;
break;
case 3:
type = POLYLINE;
break;
case 5:
type = POLYGON;
break;
case 8:
type = MULTIPOINT;
break;
default:
type = UNDEFINED;
}
return type;
ShapeType type = lookup.get(id);
return type == null ? UNDEFINED : type;
}

/**
Expand Down Expand Up @@ -99,4 +106,13 @@ public int getId()
{
return id;
}

/**
* return whether the shape type is supported by Sedona
* @return
*/
public boolean isSupported()
{
return supported;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,6 @@ public class TypeUnknownException
*/
public TypeUnknownException(int typeID)
{
super("Unknown shape type " + typeID);
super("Unknown shape type " + ShapeType.getType(typeID).name());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -144,11 +144,13 @@ public boolean nextKeyValue()
boolean hasNextShp = shapeFileReader.nextKeyValue();
if (hasDbf) { hasNextDbf = dbfFileReader.nextKeyValue(); }

int curShapeType = shapeFileReader.getCurrentValue().getTypeID();
while (hasNextShp && ShapeType.getType(curShapeType) == ShapeType.UNDEFINED) {
ShapeType curShapeType = shapeFileReader.getCurrentValue().getType();
while (hasNextShp && !curShapeType.isSupported()) {
logger.warn("[SEDONA] Shapefile type " + curShapeType.name() + " is not supported. Skipped this record." +
" Please use QGIS or GeoPandas to convert it to a type listed in ShapeType.java");
if (hasDbf) { hasNextDbf = dbfFileReader.nextKeyValue(); }
hasNextShp = shapeFileReader.nextKeyValue();
curShapeType = shapeFileReader.getCurrentValue().getTypeID();
curShapeType = shapeFileReader.getCurrentValue().getType();
}
// check if records match in .shp and .dbf
if (hasDbf) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.apache.sedona.core.formatMapper.shapefileParser.shapes;

import org.apache.hadoop.io.BytesWritable;
import org.apache.sedona.core.formatMapper.shapefileParser.parseUtils.shp.ShapeType;

import java.io.Serializable;

Expand Down Expand Up @@ -59,4 +60,9 @@ public int getTypeID()
{
return typeID;
}

public ShapeType getType()
{
return ShapeType.getType(typeID);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,23 +104,14 @@ public static void tearDown()
*
* @throws IOException
*/
@Ignore
public void testShapefileEndWithUndefinedType()
@Test
public void testShapefileEndWithUnsupportedType()
throws IOException
{
// load shape with geotool.shapefile
String inputLocation = getShapeFilePath("undefined");
FeatureCollection<SimpleFeatureType, SimpleFeature> collection = loadFeatures(inputLocation);
// load shapes with our tool
// Read data that is in PolygonZ format
String inputLocation = getShapeFilePath("unsupported");
SpatialRDD shapeRDD = ShapefileReader.readToGeometryRDD(sc, inputLocation);
FeatureIterator<SimpleFeature> features = collection.features();
int nullNum = 0;
while (features.hasNext()) {
SimpleFeature feature = features.next();
Geometry g = (Geometry) feature.getDefaultGeometry();
if (g == null) { nullNum++; }
}
assertEquals(shapeRDD.getRawSpatialRDD().count(), collection.size() - nullNum);
assertEquals(0, shapeRDD.getRawSpatialRDD().count());
}

/**
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PROJCS["Belge_Lambert_1972",GEOGCS["GCS_Belge_1972",DATUM["D_Belge_1972",SPHEROID["International_1924",6378388.0,297.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",150000.01256],PARAMETER["False_Northing",5400088.4378],PARAMETER["Central_Meridian",4.367486666666666],PARAMETER["Standard_Parallel_1",49.8333339],PARAMETER["Standard_Parallel_2",51.16666723333333],PARAMETER["Latitude_Of_Origin",90.0],UNIT["Meter",1.0]]
Binary file not shown.
Binary file not shown.

0 comments on commit c4e37ba

Please sign in to comment.