diff --git a/data_processing/make_dataset.py b/data_processing/make_dataset.py index 6f1064c..f0c0569 100644 --- a/data_processing/make_dataset.py +++ b/data_processing/make_dataset.py @@ -5,7 +5,7 @@ @author: hasan """ -from pystac import (Catalog) +# from pystac import (Catalog) import argparse from tqdm import trange,tqdm import geopandas as gp @@ -15,142 +15,197 @@ from rasterio import warp from rasterio.transform import from_bounds from PIL import Image,ImageDraw -from skimage.morphology import dilation, square, watershed -import solaris as sol +from skimage.morphology import dilation, square +from skimage.segmentation import watershed + from simplification.cutil import simplify_coords_vwp from imantics import Mask import numpy as np import rasterio from matplotlib import pyplot as plt from rasterio.windows import Window -def get_areas(cols): - areas=[] - for iid in cols: - items=[x for x in cols[iid].get_all_items()] - for i,id in enumerate(items): - if(i%2==0 and i+1 List[List[float]]: """ - Reverse the coordinates in pol - Receives list of coordinates: [[x1,y1],[x2,y2],...,[xN,yN]] - Returns [[y1,x1],[y2,x2],...,[yN,xN]] + Generates a list of coordinates forming a polygon. + + Parameters + ---------- + bbox : List[float] + A list representing the bounding box coordinates [xmin, ymin, xmax, ymax]. + + Returns + ------- + List[List[float]] + A list of coordinates representing the polygon. """ - return [list(f[-1::-1]) for f in pol] -def to_index(wind_): + return [ + [bbox[0], bbox[1]], + [bbox[2], bbox[1]], + [bbox[2], bbox[3]], + [bbox[0], bbox[3]], + [bbox[0], bbox[1]] + ] + + +def shape_polys(polyg): + """Shapes the building polygons as a list of polygon lists. + + Parameters + ---------- + polyg : List[Polygon] + List of building polygons in numpy arrays. + + Returns + ------- + List[List[Tuple(float, float)]] + List of shaped polygons. """ - Generates a list of index (row,col): [[row1,col1],[row2,col2],[row3,col3],[row4,col4],[row1,col1]] + + all_polys = [] + for poly in polyg: + if len(poly) >= 3: + f = poly.reshape(-1, 2) + simplified_vw = simplify_coords_vwp(f, .3) + if len(simplified_vw) > 2: + mpoly = [] + # Rebuilding the polygon in the way that PIL expects the values [(x1,y1),(x2,y2)] + for i in simplified_vw: + mpoly.append((i[0], i[1])) + # Adding the first point to the last to close the polygon + mpoly.append((simplified_vw[0][0], simplified_vw[0][1])) + all_polys.append(mpoly) + return all_polys + + +def pol_to_np(pol: List[List[float]]) -> np.ndarray: + """Converts a list of coordinates to a NumPy array. + + Parameters + ---------- + pol : List[List[float]] + List of coordinates: [[x1, y1], [x2, y2], ..., [xN, yN]]. + + Returns + ------- + np.ndarray + NumPy array of coordinates. """ - return [[wind_.row_off,wind_.col_off], - [wind_.row_off,wind_.col_off+wind_.width], - [wind_.row_off+wind_.height,wind_.col_off+wind_.width], - [wind_.row_off+wind_.height,wind_.col_off], - [wind_.row_off,wind_.col_off]] + + return np.array([list(l) for l in pol]) + -def generate_polygon(bbox): +def pol_to_bounding_box(pol: List[List[float]]) -> BoundingBox: + """Converts a list of coordinates to a bounding box. + + Parameters + ---------- + pol : List[List[float]] + List of coordinates: [[x1, y1], [x2, y2], ..., [xN, yN]]. + + Returns + ------- + BoundingBox + Bounding box of the coordinates. """ - Generates a list of coordinates: [[x1,y1],[x2,y2],[x3,y3],[x4,y4],[x1,y1]] + + arr = pol_to_np(pol) + return BoundingBox(np.min(arr[:, 0]), + np.min(arr[:, 1]), + np.max(arr[:, 0]), + np.max(arr[:, 1])) + + +def reverse_coordinates(pol: List) -> List: """ - return [[bbox[0],bbox[1]], - [bbox[2],bbox[1]], - [bbox[2],bbox[3]], - [bbox[0],bbox[3]], - [bbox[0],bbox[1]]] + Reverse the coordinates in a polygon. + + Parameters + ---------- + pol : list of list + List of coordinates: [[x1, y1], [x2, y2], ..., [xN, yN]]. -def pol_to_np(pol): + Returns + ------- + list of list + Reversed coordinates: [[y1, x1], [y2, x2], ..., [yN, xN]]. """ - Receives list of coordinates: [[x1,y1],[x2,y2],...,[xN,yN]] + + return [list(f[-1::-1]) for f in pol] + + +def get_areas(cols: dict) -> List: """ - return np.array([list(l) for l in pol]) + Get areas from a collection of columns. -def pol_to_bounding_box(pol): + Parameters + ---------- + cols : dict + A dictionary representing columns. + + Returns + ------- + list + A list of tuples containing area information. Each tuple has the format (iid, id1, id2). """ - Receives list of coordinates: [[x1,y1],[x2,y2],...,[xN,yN]] + + areas = [] + for iid in cols: + items = [x for x in cols[iid].get_all_items()] + for i, id in enumerate(items): + if i % 2 == 0 and i + 1 < len(items): + areas.append((iid, items[i].id, items[i + 1].id)) + return areas + + +def to_index(wind_: Window) -> List[List[int]]: + """ + Generates a list of index coordinates (row, col) for a given Window. + + Parameters + ---------- + wind_ : Window + The rasterio Window object specifying the region of interest. + + Returns + ------- + List[List[int]] + A list of index coordinates representing the corners of the Window. + """ + + return [ + [wind_.row_off, wind_.col_off], + [wind_.row_off, wind_.col_off + wind_.width], + [wind_.row_off + wind_.height, wind_.col_off + wind_.width], + [wind_.row_off + wind_.height, wind_.col_off], + [wind_.row_off, wind_.col_off] + ] + + +def create_separation(labels: np.ndarray) -> np.ndarray: + """Creates a mask for building spacing of close buildings. + + Parameters + ---------- + labels : np.ndarray + Numpy array where each building's pixels are encoded with a certain value. + + Returns + ------- + np.ndarray + Mask for building spacing. """ - arr = pol_to_np(pol) - return BoundingBox(np.min(arr[:,0]), - np.min(arr[:,1]), - np.max(arr[:,0]), - np.max(arr[:,1])) - -def get_winNmask(rst,x,y,width,height,polys): - '''This function crops a chip for a specified width and height and generates the mask of buildings with borders''' - '''it requires solaris framework''' - ''' input : rst = is a raster which is the tif image to be cropped open using rasterio.open(tif url)''' - ''' x : the x pixel coordinate of the top left corner of the window desired - y: the y pixel coordinate of the top left corner of the window desired ( window =image chip) - width : the width of the window to be chipped ,ex =512 - height : the height of the window to be chipped - polys: the polygons of the whole raster image, in opencities ai the data was stored cleanly in a pystac and i was able - to insert the polygons of the buildings in geo-pandas dataframe , the polys parameter was called by geo-pandas-df-polys.geometry - so this is very specific and i didnt test other formats''' - ''' output : aoo : np.array of RGBA channels of the image chipped(cropped) - fbc_mask : an np array of shape [width,height,2] it contains the mask of buildings and borders''' - #specify the width and height and top left corner coordinates of the window - win = Window(x,y,width,height) - #get the bounding box of the image chip in relative coordinates(this doesnt mean the pixel coordinates - #but rather transforms the pixel coordinates into rst.transform system used ) - bbox = windows.bounds(win,rst.transform) - #generate of list of coordinates for this bounding box [top left- bottom left -bottom right -top right - top left ](to for a loop) - pol = generate_polygon(bbox) - # put the coordinates of the window bounding box into a numpy array - # this means that u put the x pixel in [:,0] and y in[:,1] - #so we have a numpy array of shape [5,2] - pol_np = np.array(pol) - #transform all the relative coordinates of the bounding box of window to long/lat coordinates - coords_transformed = warp.transform(rst.crs,{'init': 'epsg:4326'},pol_np[:,0],pol_np[:,1]) - - ct=coords_transformed - #get the extrimities in long/lat (left-bottom-right-top) - #example for left u need the minimum of the x in channel 0 while for the bottom u need the minimum of the y coordinates (speaking long/lat wise) - l,b,r,t=min(ct[0]),min(ct[1]),max(ct[0]),max(ct[1]) - #make a list of tuples of the corners coordinates in long/lat of the window bounding box(just to match a certain shape) - coords_transformed = [[r,c] for r,c in zip(coords_transformed[0],coords_transformed[1])] - #from_bounds gives the affine transformation from long/lat to pixel(i only managed to use it usefully in the solaris function) - tfm1=from_bounds(l,b,r,t,height,width) - #polygonize the coordinates if the bounding boxes(again just to match a certain form) - coords_transformed= Polygon(coords_transformed) - # read the specified window from a raster - arr_win = rst.read(window=win) - #make a list of buildings polygons in long/lat coords from the input ''polys'' if the building polygon intersect the window - all_polys = [poly for poly in polys if poly.intersects(coords_transformed)] - # make a geo pandas dataframe and specify the system used (epsg :4326) of the building polygons in all_polys - all_polys_gdf = gp.GeoDataFrame(geometry=all_polys,crs='epsg:4326') - #generate the mask of buildings and border for certain width and height (ex [512,512]) - #df= geopandas data frame - # channels are the classes(extra class spacing didnt work with me so only ['building','boundary '] masks are generated) - fbc_mask = sol.vector.mask.df_to_px_mask(df=all_polys_gdf, - channels=['footprint', 'boundary'], - affine_obj=tfm1, shape=(width,height), - boundary_width=4, boundary_type='inner',meters=False) - #shape the window from rasterio.window to numpy array of the window array (aoo) - aoo=np.array(arr_win) - #return RGBA window image array (aoo) and mask - return aoo,fbc_mask - -def create_separation(labels): - #create a mask for building spacing of close buildings - '''takes as input a numpy array where each buildings pixels is encoded with a certain value''' - '''for example building_1 pixels are encoded as 1, building_2 as 2.....building_n as n''' - '''note that i encoded the array as np.uint8 so on 8 bits, which means that the highest pixel value is 255''' - '''so if u have more the 255 buildings in the image/mask u should consider a np.uint16 or np.uint32 for example to be in the safe side''' - #perform a dilation on the image , where square(20) is the kernel( u can change the size of the kernel - #or the shape to a rectange if u want or whatever , but dont guarantee the result tmp = dilation(labels > 0, square(20)) - #apply the watershed algorithm where the basins are the original encoded labels to the dilated above - #this generates the dams/line of separation tmp2 = watershed(tmp, labels, mask=tmp, watershed_line=True) > 0 - #XOR operation to remove external lines - #u can visualize all these if u want to understand more tmp = tmp ^ tmp2 - #dilate the separation to get a sufficient size tmp = dilation(tmp, square(5)) msk1 = np.zeros_like(labels, dtype='bool') - #this part simply removes unwanted separation pixels by checking horizontaly and vertically the area around the pixel - #again visualize to gain better understanding of this operation + for y0 in range(labels.shape[0]): for x0 in range(labels.shape[1]): if not tmp[y0, x0]: @@ -163,152 +218,28 @@ def create_separation(labels): max(0, x0 - sz):min(labels.shape[1], x0 + sz + 1)]) if len(uniq[uniq > 0]) > 1: msk1[y0, x0] = True - #return the separation/spacing masks return msk1 -def shape_polys(polyg): - #this function just shape the building polygons as a list of polygon lists - all_polys = [] - for poly in polyg: - if len(poly) >= 3: - f = poly.reshape(-1, 2) - simplified_vw = simplify_coords_vwp(f, .3) - if len(simplified_vw) > 2: - mpoly = [] - # Rebuilding the polygon in the way that PIL expects the values [(x1,y1),(x2,y2)] - for i in simplified_vw: - mpoly.append((i[0], i[1])) - # Adding the first point to the last to close the polygon - mpoly.append((simplified_vw[0][0], simplified_vw[0][1])) - all_polys.append(mpoly) - return all_polys -def make_instance_mask(all_polys,size): - #this function encodes each building polygon with a value in the mask - #used in the make_separation function above - #size is the width and height of the square maks - #zeros array of specified shape +def make_instance_mask(all_polys: List[List[Tuple[float, float]]], size: int) -> Image: + """Encodes each building polygon with a value in the mask. + + Parameters + ---------- + all_polys : List[List[Tuple[float, float]]] + List of building polygons. + size : int + Width and height of the square mask. + + Returns + ------- + Image + Instance mask. + """ + bg=np.zeros((size,size)).astype(np.uint8) bg=Image.fromarray(bg).convert('L') - #get starting encoding value (here max is 255 since i used 8 bit encoding) shift=255-len(all_polys) - #draw each polygon with a certain different encoded value for i,poly in enumerate(all_polys): ImageDraw.Draw(bg).polygon(poly,outline=shift+i,fill=shift+i) - #return the instance mask - #instance here refers to the fact that now we can differentiate each building from another in the image return bg - -def tile_area(area,iid,lid,tile_size,ipth,mpth): - '''this function is the heart of all this script''' - '''it tiles a certain area(tif image) and generates the correspond tile mask - input: - area : area id (example : acc-665946) - iid : the image id in the pystac - lid :the label id in the pystac - tile_size : size of width and height of the tile(use only square, i dont guarantee rectangles) - ipth : directory to save image tiles - mpth : directory to save the masks - ''' - #get the item u want to tile in the pystac - item=cols[area].get_item(id=iid) - #get the labels of the item u picked in the pystac - label=cols[area].get_item(id=lid) - # make a geopandas dataframe of the labels - #most of these i just copied from dave luo script - lbl_gfd=gp.read_file(label.make_asset_hrefs_absolute().assets['labels'].href) - #get the geometry of the label geo pandas dataframe - polys=lbl_gfd.geometry - #get the tif url - tif_url = item.assets['image'].href - if tif_url.startswith("./"): - tif_url = '/'.join(item.to_dict()['links'][1]['href'].split("/")[:-1])+tif_url[1:] - #open the tif image as a raster - rst = rasterio.open(tif_url) - #get the width and height of the raster - wid,hei=rst.width,rst.height - #move along the raster with a stride = tile_size - # this result in ignoring the extra left rectangles at the boundaries - #u can manage to change it and simply pad the rectangle with zeros to get a square or something else - - for i in trange(0,tile_size*int(hei/tile_size),tile_size): - for j in range(0,tile_size*int(wid/tile_size),tile_size): - # get window array and mask of buildings and borders - aoo,mask=get_winNmask(rst,j,i,tile_size,tile_size,polys) - #check the unique values in the RGBA image - - uniq=np.unique(aoo) - #if they are all zeros then the image contains nothing but black and u dont need it, go to the loop start - if(np.all(uniq==0)):continue - #check the buildins mask if any buildings exist - uniq=np.unique(mask[:,:,0]) - #if no then the mask is zeros and u dont need to perform the create_spacing operation - if(np.all(uniq==0)): - spacing=np.zeros((tile_size,tile_size)).astype(np.uint8) - else: - #XOR the buildings mask with borders - #to get the mask of buildings minus boundaries - getit=mask[:,:,0]^mask[:,:,1] - #get all polygons in the mask - polyg = Mask(getit).polygons() - #shape the polys - extracted=shape_polys(polyg) - #encode the buildinfs - labels=np.array(make_instance_mask(extracted,tile_size)) - #get the spacing mask - spacing=np.array(create_separation(labels)).astype(np.uint8)*255 - #print(np.unique(spacing)) - #encode the buildings to 64, boundaries to 128 and spacing to 255, to differentiate and for visualization - aoo=np.moveaxis(aoo,0,2) - final_mask=spacing - final_mask[(np.where(mask[:,:,0]>0))]=np.uint8(64) - final_mask[(np.where(mask[:,:,1]>0))]=np.uint8(128) - #convert the RGBA to RGB image - img=Image.fromarray(aoo).convert('RGB') - fmsk=Image.fromarray(final_mask) - #save each in the correct directory - img.save('/'.join([ipth,f'{area}_{iid}_{i}_{j}.png'])) - fmsk.save('/'.join([mpth,f'{area}_{iid}_{i}_{j}_mask.png'])) - -if __name__=='__main__': - #parse the arguments from terminal - parser=argparse.ArgumentParser() - parser.add_argument('--data',type=str,help='path to train_tier') - parser.add_argument('--out',type=str,help='output path') - parser.add_argument('--tsize',type=int,help='tile size in pixels') - parser.add_argument('--bw',type=int,help='building contours width in pixels') - args=parser.parse_args() - #path to the main catalog of the pystac - #here the path is to tier1 catalog - path='/media/hasan/DATA/OpenCitiesAI/train_tier_1/catalog.json' - #path to the images and masks directory to be saved - #if parsed please create these directories - if(args.data):path=args.data - img_path='/media/hasan/DATA/OpenCitiesAI/Dataset/images' - mask_path='/media/hasan/DATA/OpenCitiesAI/Dataset/masks' - if(args.out): - img_path='/'.join([args.out,'images']) - mask_path='/'.join([args.out,'masks']) - #specify tile size - default is 512 - tile_size=512 - if(args.tsize):tile_size=args.tsize - # this doesnt work - bw=3 - if(args.bw):bw=args.bw - #open the main catalog of the pystac - main_cat=Catalog.from_file(path) - #get all the columns in the catalog(main_children) , it is how the pystac is organized - #i just copied this - cols = {cols.id:cols for cols in main_cat.get_children()} - print(cols) - # get the list of all the areas and their relative image id and label id - a=get_areas(cols) - #tile each area in the dataset - for si in tqdm(a): - area=si[0] - iid=si[1] - lid=si[2] - print(area) - print(iid) - tile_area(area,iid,lid,512,img_path,mask_path) - \ No newline at end of file