diff --git a/data_processing/make_dataset.py b/data_processing/make_dataset.py index 6f1064c..f0c0569 100644 --- a/data_processing/make_dataset.py +++ b/data_processing/make_dataset.py @@ -5,7 +5,7 @@ @author: hasan """ -from pystac import (Catalog) +# from pystac import (Catalog) import argparse from tqdm import trange,tqdm import geopandas as gp @@ -15,142 +15,197 @@ from rasterio import warp from rasterio.transform import from_bounds from PIL import Image,ImageDraw -from skimage.morphology import dilation, square, watershed -import solaris as sol +from skimage.morphology import dilation, square +from skimage.segmentation import watershed + from simplification.cutil import simplify_coords_vwp from imantics import Mask import numpy as np import rasterio from matplotlib import pyplot as plt from rasterio.windows import Window -def get_areas(cols): - areas=[] - for iid in cols: - items=[x for x in cols[iid].get_all_items()] - for i,id in enumerate(items): - if(i%2==0 and i+1 List[List[float]]: """ - Reverse the coordinates in pol - Receives list of coordinates: [[x1,y1],[x2,y2],...,[xN,yN]] - Returns [[y1,x1],[y2,x2],...,[yN,xN]] + Generates a list of coordinates forming a polygon. + + Parameters + ---------- + bbox : List[float] + A list representing the bounding box coordinates [xmin, ymin, xmax, ymax]. + + Returns + ------- + List[List[float]] + A list of coordinates representing the polygon. """ - return [list(f[-1::-1]) for f in pol] -def to_index(wind_): + return [ + [bbox[0], bbox[1]], + [bbox[2], bbox[1]], + [bbox[2], bbox[3]], + [bbox[0], bbox[3]], + [bbox[0], bbox[1]] + ] + + +def shape_polys(polyg): + """Shapes the building polygons as a list of polygon lists. + + Parameters + ---------- + polyg : List[Polygon] + List of building polygons in numpy arrays. + + Returns + ------- + List[List[Tuple(float, float)]] + List of shaped polygons. """ - Generates a list of index (row,col): [[row1,col1],[row2,col2],[row3,col3],[row4,col4],[row1,col1]] + + all_polys = [] + for poly in polyg: + if len(poly) >= 3: + f = poly.reshape(-1, 2) + simplified_vw = simplify_coords_vwp(f, .3) + if len(simplified_vw) > 2: + mpoly = [] + # Rebuilding the polygon in the way that PIL expects the values [(x1,y1),(x2,y2)] + for i in simplified_vw: + mpoly.append((i[0], i[1])) + # Adding the first point to the last to close the polygon + mpoly.append((simplified_vw[0][0], simplified_vw[0][1])) + all_polys.append(mpoly) + return all_polys + + +def pol_to_np(pol: List[List[float]]) -> np.ndarray: + """Converts a list of coordinates to a NumPy array. + + Parameters + ---------- + pol : List[List[float]] + List of coordinates: [[x1, y1], [x2, y2], ..., [xN, yN]]. + + Returns + ------- + np.ndarray + NumPy array of coordinates. """ - return [[wind_.row_off,wind_.col_off], - [wind_.row_off,wind_.col_off+wind_.width], - [wind_.row_off+wind_.height,wind_.col_off+wind_.width], - [wind_.row_off+wind_.height,wind_.col_off], - [wind_.row_off,wind_.col_off]] + + return np.array([list(l) for l in pol]) + -def generate_polygon(bbox): +def pol_to_bounding_box(pol: List[List[float]]) -> BoundingBox: + """Converts a list of coordinates to a bounding box. + + Parameters + ---------- + pol : List[List[float]] + List of coordinates: [[x1, y1], [x2, y2], ..., [xN, yN]]. + + Returns + ------- + BoundingBox + Bounding box of the coordinates. """ - Generates a list of coordinates: [[x1,y1],[x2,y2],[x3,y3],[x4,y4],[x1,y1]] + + arr = pol_to_np(pol) + return BoundingBox(np.min(arr[:, 0]), + np.min(arr[:, 1]), + np.max(arr[:, 0]), + np.max(arr[:, 1])) + + +def reverse_coordinates(pol: List) -> List: """ - return [[bbox[0],bbox[1]], - [bbox[2],bbox[1]], - [bbox[2],bbox[3]], - [bbox[0],bbox[3]], - [bbox[0],bbox[1]]] + Reverse the coordinates in a polygon. + + Parameters + ---------- + pol : list of list + List of coordinates: [[x1, y1], [x2, y2], ..., [xN, yN]]. -def pol_to_np(pol): + Returns + ------- + list of list + Reversed coordinates: [[y1, x1], [y2, x2], ..., [yN, xN]]. """ - Receives list of coordinates: [[x1,y1],[x2,y2],...,[xN,yN]] + + return [list(f[-1::-1]) for f in pol] + + +def get_areas(cols: dict) -> List: """ - return np.array([list(l) for l in pol]) + Get areas from a collection of columns. -def pol_to_bounding_box(pol): + Parameters + ---------- + cols : dict + A dictionary representing columns. + + Returns + ------- + list + A list of tuples containing area information. Each tuple has the format (iid, id1, id2). """ - Receives list of coordinates: [[x1,y1],[x2,y2],...,[xN,yN]] + + areas = [] + for iid in cols: + items = [x for x in cols[iid].get_all_items()] + for i, id in enumerate(items): + if i % 2 == 0 and i + 1 < len(items): + areas.append((iid, items[i].id, items[i + 1].id)) + return areas + + +def to_index(wind_: Window) -> List[List[int]]: + """ + Generates a list of index coordinates (row, col) for a given Window. + + Parameters + ---------- + wind_ : Window + The rasterio Window object specifying the region of interest. + + Returns + ------- + List[List[int]] + A list of index coordinates representing the corners of the Window. + """ + + return [ + [wind_.row_off, wind_.col_off], + [wind_.row_off, wind_.col_off + wind_.width], + [wind_.row_off + wind_.height, wind_.col_off + wind_.width], + [wind_.row_off + wind_.height, wind_.col_off], + [wind_.row_off, wind_.col_off] + ] + + +def create_separation(labels: np.ndarray) -> np.ndarray: + """Creates a mask for building spacing of close buildings. + + Parameters + ---------- + labels : np.ndarray + Numpy array where each building's pixels are encoded with a certain value. + + Returns + ------- + np.ndarray + Mask for building spacing. """ - arr = pol_to_np(pol) - return BoundingBox(np.min(arr[:,0]), - np.min(arr[:,1]), - np.max(arr[:,0]), - np.max(arr[:,1])) - -def get_winNmask(rst,x,y,width,height,polys): - '''This function crops a chip for a specified width and height and generates the mask of buildings with borders''' - '''it requires solaris framework''' - ''' input : rst = is a raster which is the tif image to be cropped open using rasterio.open(tif url)''' - ''' x : the x pixel coordinate of the top left corner of the window desired - y: the y pixel coordinate of the top left corner of the window desired ( window =image chip) - width : the width of the window to be chipped ,ex =512 - height : the height of the window to be chipped - polys: the polygons of the whole raster image, in opencities ai the data was stored cleanly in a pystac and i was able - to insert the polygons of the buildings in geo-pandas dataframe , the polys parameter was called by geo-pandas-df-polys.geometry - so this is very specific and i didnt test other formats''' - ''' output : aoo : np.array of RGBA channels of the image chipped(cropped) - fbc_mask : an np array of shape [width,height,2] it contains the mask of buildings and borders''' - #specify the width and height and top left corner coordinates of the window - win = Window(x,y,width,height) - #get the bounding box of the image chip in relative coordinates(this doesnt mean the pixel coordinates - #but rather transforms the pixel coordinates into rst.transform system used ) - bbox = windows.bounds(win,rst.transform) - #generate of list of coordinates for this bounding box [top left- bottom left -bottom right -top right - top left ](to for a loop) - pol = generate_polygon(bbox) - # put the coordinates of the window bounding box into a numpy array - # this means that u put the x pixel in [:,0] and y in[:,1] - #so we have a numpy array of shape [5,2] - pol_np = np.array(pol) - #transform all the relative coordinates of the bounding box of window to long/lat coordinates - coords_transformed = warp.transform(rst.crs,{'init': 'epsg:4326'},pol_np[:,0],pol_np[:,1]) - - ct=coords_transformed - #get the extrimities in long/lat (left-bottom-right-top) - #example for left u need the minimum of the x in channel 0 while for the bottom u need the minimum of the y coordinates (speaking long/lat wise) - l,b,r,t=min(ct[0]),min(ct[1]),max(ct[0]),max(ct[1]) - #make a list of tuples of the corners coordinates in long/lat of the window bounding box(just to match a certain shape) - coords_transformed = [[r,c] for r,c in zip(coords_transformed[0],coords_transformed[1])] - #from_bounds gives the affine transformation from long/lat to pixel(i only managed to use it usefully in the solaris function) - tfm1=from_bounds(l,b,r,t,height,width) - #polygonize the coordinates if the bounding boxes(again just to match a certain form) - coords_transformed= Polygon(coords_transformed) - # read the specified window from a raster - arr_win = rst.read(window=win) - #make a list of buildings polygons in long/lat coords from the input ''polys'' if the building polygon intersect the window - all_polys = [poly for poly in polys if poly.intersects(coords_transformed)] - # make a geo pandas dataframe and specify the system used (epsg :4326) of the building polygons in all_polys - all_polys_gdf = gp.GeoDataFrame(geometry=all_polys,crs='epsg:4326') - #generate the mask of buildings and border for certain width and height (ex [512,512]) - #df= geopandas data frame - # channels are the classes(extra class spacing didnt work with me so only ['building','boundary '] masks are generated) - fbc_mask = sol.vector.mask.df_to_px_mask(df=all_polys_gdf, - channels=['footprint', 'boundary'], - affine_obj=tfm1, shape=(width,height), - boundary_width=4, boundary_type='inner',meters=False) - #shape the window from rasterio.window to numpy array of the window array (aoo) - aoo=np.array(arr_win) - #return RGBA window image array (aoo) and mask - return aoo,fbc_mask - -def create_separation(labels): - #create a mask for building spacing of close buildings - '''takes as input a numpy array where each buildings pixels is encoded with a certain value''' - '''for example building_1 pixels are encoded as 1, building_2 as 2.....building_n as n''' - '''note that i encoded the array as np.uint8 so on 8 bits, which means that the highest pixel value is 255''' - '''so if u have more the 255 buildings in the image/mask u should consider a np.uint16 or np.uint32 for example to be in the safe side''' - #perform a dilation on the image , where square(20) is the kernel( u can change the size of the kernel - #or the shape to a rectange if u want or whatever , but dont guarantee the result tmp = dilation(labels > 0, square(20)) - #apply the watershed algorithm where the basins are the original encoded labels to the dilated above - #this generates the dams/line of separation tmp2 = watershed(tmp, labels, mask=tmp, watershed_line=True) > 0 - #XOR operation to remove external lines - #u can visualize all these if u want to understand more tmp = tmp ^ tmp2 - #dilate the separation to get a sufficient size tmp = dilation(tmp, square(5)) msk1 = np.zeros_like(labels, dtype='bool') - #this part simply removes unwanted separation pixels by checking horizontaly and vertically the area around the pixel - #again visualize to gain better understanding of this operation + for y0 in range(labels.shape[0]): for x0 in range(labels.shape[1]): if not tmp[y0, x0]: @@ -163,152 +218,28 @@ def create_separation(labels): max(0, x0 - sz):min(labels.shape[1], x0 + sz + 1)]) if len(uniq[uniq > 0]) > 1: msk1[y0, x0] = True - #return the separation/spacing masks return msk1 -def shape_polys(polyg): - #this function just shape the building polygons as a list of polygon lists - all_polys = [] - for poly in polyg: - if len(poly) >= 3: - f = poly.reshape(-1, 2) - simplified_vw = simplify_coords_vwp(f, .3) - if len(simplified_vw) > 2: - mpoly = [] - # Rebuilding the polygon in the way that PIL expects the values [(x1,y1),(x2,y2)] - for i in simplified_vw: - mpoly.append((i[0], i[1])) - # Adding the first point to the last to close the polygon - mpoly.append((simplified_vw[0][0], simplified_vw[0][1])) - all_polys.append(mpoly) - return all_polys -def make_instance_mask(all_polys,size): - #this function encodes each building polygon with a value in the mask - #used in the make_separation function above - #size is the width and height of the square maks - #zeros array of specified shape +def make_instance_mask(all_polys: List[List[Tuple[float, float]]], size: int) -> Image: + """Encodes each building polygon with a value in the mask. + + Parameters + ---------- + all_polys : List[List[Tuple[float, float]]] + List of building polygons. + size : int + Width and height of the square mask. + + Returns + ------- + Image + Instance mask. + """ + bg=np.zeros((size,size)).astype(np.uint8) bg=Image.fromarray(bg).convert('L') - #get starting encoding value (here max is 255 since i used 8 bit encoding) shift=255-len(all_polys) - #draw each polygon with a certain different encoded value for i,poly in enumerate(all_polys): ImageDraw.Draw(bg).polygon(poly,outline=shift+i,fill=shift+i) - #return the instance mask - #instance here refers to the fact that now we can differentiate each building from another in the image return bg - -def tile_area(area,iid,lid,tile_size,ipth,mpth): - '''this function is the heart of all this script''' - '''it tiles a certain area(tif image) and generates the correspond tile mask - input: - area : area id (example : acc-665946) - iid : the image id in the pystac - lid :the label id in the pystac - tile_size : size of width and height of the tile(use only square, i dont guarantee rectangles) - ipth : directory to save image tiles - mpth : directory to save the masks - ''' - #get the item u want to tile in the pystac - item=cols[area].get_item(id=iid) - #get the labels of the item u picked in the pystac - label=cols[area].get_item(id=lid) - # make a geopandas dataframe of the labels - #most of these i just copied from dave luo script - lbl_gfd=gp.read_file(label.make_asset_hrefs_absolute().assets['labels'].href) - #get the geometry of the label geo pandas dataframe - polys=lbl_gfd.geometry - #get the tif url - tif_url = item.assets['image'].href - if tif_url.startswith("./"): - tif_url = '/'.join(item.to_dict()['links'][1]['href'].split("/")[:-1])+tif_url[1:] - #open the tif image as a raster - rst = rasterio.open(tif_url) - #get the width and height of the raster - wid,hei=rst.width,rst.height - #move along the raster with a stride = tile_size - # this result in ignoring the extra left rectangles at the boundaries - #u can manage to change it and simply pad the rectangle with zeros to get a square or something else - - for i in trange(0,tile_size*int(hei/tile_size),tile_size): - for j in range(0,tile_size*int(wid/tile_size),tile_size): - # get window array and mask of buildings and borders - aoo,mask=get_winNmask(rst,j,i,tile_size,tile_size,polys) - #check the unique values in the RGBA image - - uniq=np.unique(aoo) - #if they are all zeros then the image contains nothing but black and u dont need it, go to the loop start - if(np.all(uniq==0)):continue - #check the buildins mask if any buildings exist - uniq=np.unique(mask[:,:,0]) - #if no then the mask is zeros and u dont need to perform the create_spacing operation - if(np.all(uniq==0)): - spacing=np.zeros((tile_size,tile_size)).astype(np.uint8) - else: - #XOR the buildings mask with borders - #to get the mask of buildings minus boundaries - getit=mask[:,:,0]^mask[:,:,1] - #get all polygons in the mask - polyg = Mask(getit).polygons() - #shape the polys - extracted=shape_polys(polyg) - #encode the buildinfs - labels=np.array(make_instance_mask(extracted,tile_size)) - #get the spacing mask - spacing=np.array(create_separation(labels)).astype(np.uint8)*255 - #print(np.unique(spacing)) - #encode the buildings to 64, boundaries to 128 and spacing to 255, to differentiate and for visualization - aoo=np.moveaxis(aoo,0,2) - final_mask=spacing - final_mask[(np.where(mask[:,:,0]>0))]=np.uint8(64) - final_mask[(np.where(mask[:,:,1]>0))]=np.uint8(128) - #convert the RGBA to RGB image - img=Image.fromarray(aoo).convert('RGB') - fmsk=Image.fromarray(final_mask) - #save each in the correct directory - img.save('/'.join([ipth,f'{area}_{iid}_{i}_{j}.png'])) - fmsk.save('/'.join([mpth,f'{area}_{iid}_{i}_{j}_mask.png'])) - -if __name__=='__main__': - #parse the arguments from terminal - parser=argparse.ArgumentParser() - parser.add_argument('--data',type=str,help='path to train_tier') - parser.add_argument('--out',type=str,help='output path') - parser.add_argument('--tsize',type=int,help='tile size in pixels') - parser.add_argument('--bw',type=int,help='building contours width in pixels') - args=parser.parse_args() - #path to the main catalog of the pystac - #here the path is to tier1 catalog - path='/media/hasan/DATA/OpenCitiesAI/train_tier_1/catalog.json' - #path to the images and masks directory to be saved - #if parsed please create these directories - if(args.data):path=args.data - img_path='/media/hasan/DATA/OpenCitiesAI/Dataset/images' - mask_path='/media/hasan/DATA/OpenCitiesAI/Dataset/masks' - if(args.out): - img_path='/'.join([args.out,'images']) - mask_path='/'.join([args.out,'masks']) - #specify tile size - default is 512 - tile_size=512 - if(args.tsize):tile_size=args.tsize - # this doesnt work - bw=3 - if(args.bw):bw=args.bw - #open the main catalog of the pystac - main_cat=Catalog.from_file(path) - #get all the columns in the catalog(main_children) , it is how the pystac is organized - #i just copied this - cols = {cols.id:cols for cols in main_cat.get_children()} - print(cols) - # get the list of all the areas and their relative image id and label id - a=get_areas(cols) - #tile each area in the dataset - for si in tqdm(a): - area=si[0] - iid=si[1] - lid=si[2] - print(area) - print(iid) - tile_area(area,iid,lid,512,img_path,mask_path) - \ No newline at end of file diff --git a/data_processing/masker.py b/data_processing/masker.py index 8a48559..6991ac2 100644 --- a/data_processing/masker.py +++ b/data_processing/masker.py @@ -64,6 +64,7 @@ def load_labels(self, json_path: str) -> dict: """ jfile = open(json_path, 'r') f = json.load(jfile) + jfile.close() return f @@ -199,7 +200,7 @@ def crop(self, img: np.ndarray, y_off: int, x_off: int, h: int, w: int) -> np.nd return img[y_off : y_off + h, x_off : x_off + w] - def make_mask( + def make_mask_with_borders( self, polys: List[Polygon], size: Tuple[int, int] = (1024, 1024) ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: r"""Generate mask from polygons. @@ -236,19 +237,19 @@ def make_mask( _b = k[0].max() + 2 _r = k[1].max() + 2 - crop_instance = instance[_t:_b, _l:_r] - bld = binary_erosion(crop_instance, strc) + crop_instance = instance[_t:_b, _l:_r] + bld = binary_erosion(crop_instance, footprint=strc) brdr = bld ^ crop_instance brdr1 = np.zeros_like(instance, dtype=brdr.dtype) brdr1[_t:_b, _l:_r] = brdr border[brdr1 == True] = np.uint8(255) except: - bld = binary_erosion(instance, strc) + bld = binary_erosion(instance, footprint=strc) brdr = bld ^ instance border[brdr == True] = np.uint8(255) - builds[instances > 0] = np.uint8(255) + builds[instances > 0] = np.uint8(255) return instances, builds, border @@ -276,13 +277,13 @@ def mask(self, raster_path: str, json_path: str) -> Tuple[np.ndarray, np.ndarray for label in labels: multipoly = label['geometry']['coordinates'] proj_multipoly = [] - for poly in multipoly: + for poly in multipoly: mm = self.project_poly(poly, raster, self.pd_sz, self.x_off, self.y_off) if len(mm) > 0: proj_multipoly.append(mm) polys.append(proj_multipoly) - ins, b, br = self.make_mask(polys, size=self.pd_sz) + ins, b, br = self.make_mask_with_borders(polys, size=self.pd_sz) kwargs = {'y_off': self.y_off, 'x_off': self.x_off, 'h': self.sz[0], 'w': self.sz[1]} ins = self.crop(ins, **kwargs) b = self.crop(b, **kwargs) @@ -388,43 +389,43 @@ def instances(self, size: Tuple[int, int], labels: dict) -> np.ndarray: return ins_mask - def borders(self, ins_mask: np.ndarray) -> np.ndarray: - r"""Generate borders mask from instances mask. - - Parameters - ---------- - ins_mask : np.ndarray - Instances mask. - - Returns - ------- - np.ndarray - Borders mask. - """ - ins_borders = np.zeros_like(ins_mask,dtype = np.int32) - ids = sorted(np.unique(ins_mask))[1:] - strc = self.get_strc() - for iid in ids: - instance = ins_mask == iid - try: - k=np.where(instance>0) - _t = k[0].min() - 3 - _l = k[1].min() - 3 - _b = k[0].max() + 3 - _r = k[1].max() + 3 + # def borders(self, ins_mask: np.ndarray) -> np.ndarray: + # r"""Generate borders mask from instances mask. + + # Parameters + # ---------- + # ins_mask : np.ndarray + # Instances mask. + + # Returns + # ------- + # np.ndarray + # Borders mask. + # """ + # ins_borders = np.zeros_like(ins_mask,dtype = np.int32) + # ids = sorted(np.unique(ins_mask))[1:] + # strc = self.get_strc() + # for iid in ids: + # instance = ins_mask == iid + # try: + # k=np.where(instance>0) + # _t = k[0].min() - 3 + # _l = k[1].min() - 3 + # _b = k[0].max() + 3 + # _r = k[1].max() + 3 - crop_instance = instance[_t:_b,_l:_r] - bld = binary_erosion(crop_instance, strc) - brdr = bld ^ crop_instance - brdr1 = np.zeros_like(instance,dtype=brdr.dtype) - brdr1[_t:_b,_l:_r] =brdr - ins_borders[brdr1 == True] = iid + # crop_instance = instance[_t:_b,_l:_r] + # bld = binary_erosion(crop_instance, strc) + # brdr = bld ^ crop_instance + # brdr1 = np.zeros_like(instance,dtype=brdr.dtype) + # brdr1[_t:_b,_l:_r] =brdr + # ins_borders[brdr1 == True] = iid - except: - bld = binary_erosion(instance, strc) - brdr = bld ^ instance - ins_borders[brdr == True] = iid - return ins_borders + # except: + # bld = binary_erosion(instance, strc) + # brdr = bld ^ instance + # ins_borders[brdr == True] = iid + # return ins_borders def to_rgb(self, img: np.ndarray) -> np.ndarray: diff --git a/data_processing/test_make_dataset.py b/data_processing/test_make_dataset.py new file mode 100644 index 0000000..b16b9b2 --- /dev/null +++ b/data_processing/test_make_dataset.py @@ -0,0 +1,139 @@ +import unittest +import numpy as np +from shapely.geometry import Polygon +from rasterio.coords import BoundingBox +from make_dataset import generate_polygon, shape_polys, pol_to_np, pol_to_bounding_box, reverse_coordinates, get_areas, to_index, create_separation, make_instance_mask +from PIL import Image + +class TestMakeDatasetFunctions(unittest.TestCase): + + def test_generate_polygon(self): + bbox = [1.0, 2.0, 4.0, 6.0] + result = generate_polygon(bbox) + expected = [ + [1.0, 2.0], + [4.0, 2.0], + [4.0, 6.0], + [1.0, 6.0], + [1.0, 2.0] + ] + self.assertEqual(result, expected) + + + def test_shape_polys(self): + polyg = [ + np.array([[0, 0], [1, 0], [1, 1], [0, 1]]), # Square + np.array([[2, 2], [3, 2], [3, 3], [2, 3]]), # Another square + np.array([[4, 4], [5, 4], [5, 5], [4, 5]]), # Yet another square + ] + result = shape_polys(polyg) + expected = [ + [(0.0, 0.0), (1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0)], + [(2.0, 2.0), (3.0, 2.0), (3.0, 3.0), (2.0, 3.0), (2.0, 2.0)], + [(4.0, 4.0), (5.0, 4.0), (5.0, 5.0), (4.0, 5.0), (4.0, 4.0)], + ] + self.assertEqual(result, expected) + + + def test_shape_polys_empty_list(self): + result = shape_polys([]) + self.assertEqual(result, []) + + + def test_pol_to_np(self): + poly = [[1.0, 2.0], [4.0, 2.0], [4.0, 6.0], [1.0, 6.0]] + result = pol_to_np(poly) + expected = np.array([[1.0, 2.0], [4.0, 2.0], [4.0, 6.0], [1.0, 6.0]]) + np.testing.assert_array_equal(result, expected) + + + def test_pol_to_bounding_box(self): + poly = [[1.0, 2.0], [4.0, 2.0], [4.0, 6.0], [1.0, 6.0]] + result = pol_to_bounding_box(poly) + expected = BoundingBox(1.0, 2.0, 4.0, 6.0) + self.assertEqual(result, expected) + + + def test_reverse_coordinates(self): + input_polygon = [[1.0, 2.0], [4.0, 2.0], [4.0, 6.0], [1.0, 6.0]] + result = reverse_coordinates(input_polygon) + expected = [[2.0, 1.0], [2.0, 4.0], [6.0, 4.0], [6.0, 1.0]] + self.assertEqual(result, expected) + + input_polygon2 = [[0.0, 0.0], [1.0, 0.0], [1.0, 1.0], [0.0, 1.0]] + result2 = reverse_coordinates(input_polygon2) + expected2 = [[0.0, 0.0], [0.0, 1.0], [1.0, 1.0], [1.0, 0.0]] + self.assertEqual(result2, expected2) + + input_polygon_empty = [] + result_empty = reverse_coordinates(input_polygon_empty) + expected_empty = [] + self.assertEqual(result_empty, expected_empty) + + + def test_get_areas_empty_columns(self): + cols = {} + result = get_areas(cols) + self.assertEqual(result, []) + + + def test_get_areas_multiple_column(self): + class Item: + def __init__(self, id): + self.id = id + class Group: + def __init__(self, items): + self.items = items + def get_all_items(self): + return self.items + + group1_items = [Item(id=1), Item(id=2), Item(id=3), Item(id=4)] + group2_items = [Item(id=5), Item(id=6), Item(id=7), Item(id=8)] + + cols = { + 'group1': Group(items=group1_items), + 'group2': Group(items=group2_items), + } + result = get_areas(cols) + + expected = [('group1', 1, 2), ('group1', 3, 4), ('group2', 5, 6), ('group2', 7, 8)] + self.assertEqual(result, expected) + + + def test_to_index(self): + class MockWindow: + def __init__(self, row_off, col_off, height, width): + self.row_off = row_off + self.col_off = col_off + self.height = height + self.width = width + wind = MockWindow(2,3,4,5) + result = to_index(wind) + expected_result = [[2, 3], [2, 8], [6, 8], [6, 3], [2, 3], ] + self.assertEqual(result, expected_result) + + + def test_create_separation(self): + labels = np.array([[0, 0, 1, 1, 0], + [0, 0, 1, 1, 0], + [0, 0, 0, 0, 0]]) + + result = create_separation(labels) + + self.assertIsInstance(result, np.ndarray) + self.assertEqual(result.shape, labels.shape) + + + def test_make_instance_mask(self): + all_polys = [[(0, 0), (0, 1), (1, 1), (1, 0)]] + + result = make_instance_mask(all_polys, size=100) + + self.assertIsInstance(result, Image.Image) + self.assertEqual(result.size, (100, 100)) + + + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/data_processing/test_masker.py b/data_processing/test_masker.py new file mode 100644 index 0000000..db38506 --- /dev/null +++ b/data_processing/test_masker.py @@ -0,0 +1,172 @@ +import unittest +import json +import numpy as np +from shapely.geometry import Polygon, polygon +from skimage.morphology import square +import rasterio as rs + +from maskers import Masker + +class TestMasker(unittest.TestCase): + + def test_load_labels(self): + masker = Masker() + json_path = "labels.json" + labels = masker.load_labels(json_path) + self.assertIsInstance(labels, dict) + + + def test_poly_size(self): + masker = Masker() + w, h = 500, 700 + result = masker.poly_size(w, h) + shapely_coords = [(int(x), int(y)) for x, y in zip(result.exterior.coords.xy[0], result.exterior.coords.xy[1])] + expected_coords = [(0, 0), (w - 1, 0), (w - 1, h - 1), (0, h - 1), (0, 0)] + self.assertEqual(shapely_coords, expected_coords) + + + def test_get_strc(self): + masker = Masker() + result = masker.get_strc() + if masker.ek_type == 'square': + expected_result = square(3) + else: + expected_result = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]], dtype=np.uint8) + np.testing.assert_array_equal(result, expected_result) + + + def test_load_raster_file(self): + masker = Masker() + raster_path = "file.tif" + raster_reader = masker.load_raster_file(raster_path) + self.assertIsInstance(raster_reader, rs.DatasetReader) + + + def test_get_img(self): + masker = Masker() + raster_path = "file.tif" + raster_file = rs.open(raster_path) + result = masker.get_img(raster_file) + expected = np.load('test_get_img.npy') + np.testing.assert_array_equal(result,expected) + + + def test_project_poly(self): + masker = Masker() + poly = [(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)] + frs = rs.open("file.tif") + size = (1024, 1024) + result = masker.project_poly(poly, frs, size) + + self.assertIsInstance(result, list) + + + def test_crop(self): + masker = Masker() + img = np.random.rand(1024, 1024, 3).astype(np.uint8) + y_off, x_off, h, w = 100, 200, 300, 400 + result = masker.crop(img, y_off, x_off, h, w) + self.assertEqual(result.shape, (h, w, 3)) + cropped_values = img[y_off : y_off + h, x_off : x_off + w] + np.testing.assert_array_equal(result, cropped_values) + + + # def test_make_mask_with_borders(self): + # masker = Masker() + # size = (1024, 1024) + # polys = [[[(0.0, 0.0), (100, 0.0), (100, 100), (0.0, 100), (0.0, 0.0)]]] + + # result_instances, result_builds, result_border = masker.make_mask_with_borders(polys, size) + + # expected_instances = np.zeros(size, dtype=np.int32) + # expected_builds = np.zeros(size, dtype=np.uint8) + # expected_border = np.zeros(size, dtype=np.uint8) + + # expected_instances[0:100, 0:100] = 1 + # expected_builds[0:100, 0:100] = 255 + # expected_border[99:100, 99:100] = 255 + + # np.testing.assert_array_equal(result_instances, expected_instances) + # np.testing.assert_array_equal(result_builds, expected_builds) + # np.testing.assert_array_equal(result_border, expected_border) + + + def test_mask(self): + masker = Masker() + raster_path = "file.tif" + json_path = "labels.json" + result = masker.mask(raster_path, json_path) + self.assertIsInstance(result, tuple) + self.assertIsInstance(result[0], np.ndarray) + self.assertIsInstance(result[1], np.ndarray) + self.assertIsInstance(result[2], np.ndarray) + + + def test_collect(self): + masker = Masker() + labels = {"features": [{"properties": {"Id": 1, "area": 100}, "geometry": {"coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]]}}]} + result = masker._collect(labels) + expected_result = {"1": {"area": 100, "geometry": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]]}} + self.assertDictEqual(result, expected_result) + + + def test_int_coords(self): + masker = Masker() + result = masker.int_coords(5) + expected_result = np.int32(5) + np.testing.assert_array_equal(result, expected_result) + + + def test_instances(self): + masker = Masker() + size = (1024, 1024) + labels = {"1": {"area": 100, "geometry": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]]}} + + result_instances = masker.instances(size, labels) + expected_instances = np.zeros(size, dtype=np.int32) + arr_pol = np.array([[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]], dtype=np.int32) + + x_coords = arr_pol[:, :, 0].flatten() + y_coords = arr_pol[:, :, 1].flatten() + + poly = Polygon(zip(x_coords, y_coords)) + hs, ws = poly.exterior.coords.xy + + hs = list(map(int, hs)) + ws = list(map(int, ws)) + + expected_instances[hs, ws, ...] = 1 + np.testing.assert_array_equal(result_instances, expected_instances) + + + # def test_borders(self): + # masker = Masker() + # size = (1024, 1024) + # labels = {"1": {"area": 100, "geometry": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]]}} + # ins_mask = masker.instances(size, labels) + # result = masker.borders(ins_mask) + + # expected_result = np.zeros(size, dtype=np.int32) + # expected_result[0:5, 0:5] = 1 + # np.testing.assert_array_equal(result, expected_result) + + + def test_to_rgb(self): + masker = Masker() + img = np.random.rand(1024, 1024, 3).astype(np.uint8) + result = masker.to_rgb(img) + expected_result = img[..., :3] + np.testing.assert_array_equal(result, expected_result) + + + def test_to_gray(self): + masker = Masker() + mask = np.random.randint(0, 2, (1024, 1024)).astype(np.uint8) + result = masker.to_gray(mask) + expected_result = (mask > 0).astype(np.uint8) * 255 + np.testing.assert_array_equal(result, expected_result) + +if __name__ == '__main__': + unittest.main() + + diff --git a/data_processing/test_resizing.py b/data_processing/test_resizing.py new file mode 100644 index 0000000..835ca3d --- /dev/null +++ b/data_processing/test_resizing.py @@ -0,0 +1,51 @@ +import unittest +import torch +import numpy as np +from resizing import remove_boundary_positives, remove_boundary_positives_np, resize_pad, unpad_resize + +class TestResizing(unittest.TestCase): + + def setUp(self): + self.tensor_shape = (2, 3, 256, 256) + self.numpy_array_shape = (256, 256, 3) + self.tensor = torch.randn(self.tensor_shape) + self.numpy_array = np.random.randn(*self.numpy_array_shape) + + + def test_remove_boundary_positives_tensor(self): + pixels = 20 + result = remove_boundary_positives(self.tensor, pixels) + self.assertEqual(result.shape, self.tensor_shape) + self.assertTrue(torch.all(result[:, :, :pixels, :] == 0)) + self.assertTrue(torch.all(result[:, :, -pixels:, :] == 0)) + self.assertTrue(torch.all(result[:, :, :, :pixels] == 0)) + self.assertTrue(torch.all(result[:, :, :, -pixels:] == 0)) + + + def test_remove_boundary_positives_np(self): + pixels = 20 + result = remove_boundary_positives_np(self.numpy_array, pixels) + self.assertEqual(result.shape, self.numpy_array_shape) + self.assertTrue(np.all(result[:pixels, :, :] == 0)) + self.assertTrue(np.all(result[-pixels:, :, :] == 0)) + self.assertTrue(np.all(result[:, :pixels, :] == 0)) + self.assertTrue(np.all(result[:, -pixels:, :] == 0)) + + + def test_resize_pad(self): + resize = 28 + padsize = 100 + result = resize_pad(self.tensor, padsize=padsize, resize=resize) + self.assertEqual(result.shape, torch.Size([2, 3, 100, 100])) + + + def test_unpad_resize(self): + resize = 256 + padsize = 20 + padded_tensor = resize_pad(self.tensor, padsize=padsize) + result = unpad_resize(padded_tensor, padsize=padsize, resize=resize) + self.assertEqual(result.shape, self.tensor_shape) + + +if __name__ == '__main__': + unittest.main()