From 16e7a63d65fbad4f8f1a2f828729444e4359871b Mon Sep 17 00:00:00 2001 From: aghand0ur Date: Mon, 11 Dec 2023 18:22:46 +0200 Subject: [PATCH] going thru data_processing --- .../__pycache__/preprocessing.cpython-310.pyc | Bin 4804 -> 0 bytes {augmentation => data_processing}/cropping.py | 81 -------- data_processing/filtering.py | 9 - data_processing/post_process.py | 75 +++----- .../{preprocessing.py => pre_processing.py} | 86 +-------- data_processing/split_data.py | 181 ------------------ data_processing/split_to_folds.py | 42 ---- post_process.py | 123 ------------ 8 files changed, 31 insertions(+), 566 deletions(-) delete mode 100644 data_processing/__pycache__/preprocessing.cpython-310.pyc rename {augmentation => data_processing}/cropping.py (64%) delete mode 100644 data_processing/filtering.py rename data_processing/{preprocessing.py => pre_processing.py} (64%) delete mode 100644 data_processing/split_data.py delete mode 100644 data_processing/split_to_folds.py delete mode 100644 post_process.py diff --git a/data_processing/__pycache__/preprocessing.cpython-310.pyc b/data_processing/__pycache__/preprocessing.cpython-310.pyc deleted file mode 100644 index a19775be51537fb3a9ecf83b6a68dfcf0e5be707..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4804 zcma)9TW{RP6()z|F83;FwYtl;e2J4SP^nHFH%;BZinX;ABx~2J#x~J}rFXb1N_*3g zk|TrLhbl%@6-s!sv(=7*-=8ES1waf6bWGiT0RI6U*6%k&C` ztcK@FrsF?2uWA3J#_`8M;|5Cj9E59JZ)+XhVS2(Fj;UmpW1(j4R43)6RG-mKcWlQ# z8kcdhYFy6AsXFiERb6n3svdDhR6Xj9s(Q>BLv6N8opEPetzosxoe5{6Q+CQnYfd_o zTyv&)%9-YAXNKF(EYCP|sONdsIm2_#S)O;!p)C03=J|sQyzordwT#A#eB^V@xyVQP z80t&B#K%#;#%K5hpZr{RF5lAl6rcWFpJfossX;bVfa zQEsAq3sq~8cA!TrGGcAh)Q52l!`31!h5Gzh|)1rvrnuW*@<*oJNdmI9zCVaX#*oRPVR&ov=(-8jhnHK zzs;azWBnQ4>)3T&``W4rQi(`|9DavKkj5%G$+~@+?s88EZ%?LHt82Gb*QIrDZDoB` zkRoNS=B}@ORIRU9@6@Fwyl%^PjM~~=Y1Qwo-Mu2MM;pHIr4@u-uUj#teqS>8zSIN9 zY=@76uQf4`WuE&>9iIo@lDAa7TD^9ydi4s?D_7rsXX)3WFT$nerIn>SqPOWcqHw9@ z_vrZDcJNSmVsF@9;$GyrebH<9VHk8u;A~H^dbR)_{CP1Wh>u$AL6O$yE zA~Fq9F$KX?%o3Rcam+4%6exux0f=)H5) zn^C<*NVDj@k2;6mPmg-@)C-i_vGq(Ryl!S;jj0z_R2rCZiWg$D$zXRU_Qs2l656-; zNNiw@97;Ymj?iwv+l(GVyJWk&4*U3HXSBcJcY7|m9{?`!L#MF2d#s6p8yL6>@x}>p z;7PW&>=Xj;n0Qf);3yI+D*QhPNdu~Fui;Cx-SfCid118I_hq_?X7tvZ$54}2sFCJF zugj&`_o9ts2ug>ZAdxh{oJP_Jp&#-gknUlNA(>wS{!v!clahxqwI7?(eo4&)k*7@;J(*1*u#D5i0O0Us6Hj|IDS6by9n(CTMt3 z_9~DB%K#uQ>>L4&yn_xPj1a@I24(EI5*(VE7PEQCtCv02u>Lt`;NMx;ny{<^m+3Fm$Y{*APcWf{up;c_0uGdSXu&J6^bT ztesB=ggeb<@R`i1-lS2nq#63{rZW+A`#X_)Y9*(fh)=F3eu{00cZkpikNIcYb%SmY zx$Yk!56Szcbb7P;7P6pP(U~049qMWXAhvMO}NdkbivU|ahMs0n;hh#G#o1CJ=I`N*{9lH z$W-%j{vgvrI`~&I*DN`1&Yl^ALR^4JXgu`8Q+e`|A6Io@nA=GA_g; zu{BHI0Btgud?A|JoQ?~elDPaA2L87&7+12=Fr;geW2WR5>80qKWY7Q<>*3uc^|k@GgjfGPkjvm_M3S2U?Eti1wvFow?0BRm^+n}JpX*B%yV4Xs!gaEreh-<0%KVg{Sf&|9tB-CPEQ(;%PU1FWSLa znNq<7wb#eKxy;}4dn*VBzZ71_mqy#~R$doBqYo6C!fSQ>ZsgKOo$Q_E^_#11ea)#l zx%&FuwUuhp$lSbp`%c0ZR&L*`x%K6b?tHjbyCu}07s&bfJaMm~%pW!B7ei^1GN5PB zE`8r5>f{8{9$zR;Id*fW-Nx*elS}R$ZchX)C#xd!h#w!?0K1}OWl+DM@|8ewXreWtdk!$X_jOgp=6IxHhhOY!d*u7I$2ezBNO-O zRBw>-GD^)nXhhQ5_1ZhWlS#(4c=LOa1NRe`)bb@}hMOR!Zt5jOjw0Skgy9m)s$e|- z-Mlf&vg`unHcRQ#teisNHp)g;H*s5`cNEDR1#;Y9aE~#Oz@ZPT6^$aB)U#-_e`hae z(Z~K~{^ymkX}F&X6IZbtMJg%rbK(`bP8{8tl5mAcis(m2=ejcMx}6^1X%nA!-R&K( zo%D!zX~Gtfm!VT}biWaAf&2j_By`(amK9S8YLBYpZ0{`&olj!lwAI9ljc_k?%$v9G)|_%FNx*ZQ<_%`qwq39b*~!xEe*p?Mo2LK( diff --git a/augmentation/cropping.py b/data_processing/cropping.py similarity index 64% rename from augmentation/cropping.py rename to data_processing/cropping.py index 6310f74..c4eb046 100644 --- a/augmentation/cropping.py +++ b/data_processing/cropping.py @@ -1,84 +1,3 @@ -import numpy as np -import random -from skimage.measure import label as label_fn - - -def random_crop(image_stack, mask, image_size): - ''' - THIS FUNCTION DEFINES RANDOM IMAGE CROPPING. - :param image_stack: input image in size [Time Stamp, Image Dimension (Channel), Height, Width] - :param mask: input mask of the image, to filter out uninterested areas [Height, Width] - :param image_size: It determine how the data is partitioned into the NxN windows - :return: image_stack, mask - ''' - - H, W = image_stack.shape[2:] - - # skip random crop is image smaller than crop size - if H - image_size // 2 <= image_size: - return image_stack, mask - if W - image_size // 2 <= image_size: - return image_stack, mask - flag = True - for i in range(0,100): - h = np.random.randint(image_size, H - image_size // 2) - w = np.random.randint(image_size, W - image_size // 2) - - image_stack = image_stack[:, :, h - int(np.floor(image_size // 2)):int(np.ceil(h + image_size // 2)), - w - int(np.floor(image_size // 2)):int(np.ceil(w + image_size // 2))] - mask = mask[h - int(np.floor(image_size // 2)):int(np.ceil(h + image_size // 2)), - w - int(np.floor(image_size // 2)):int(np.ceil(w + image_size // 2))] - if 1 in mask: - break - return image_stack, mask - -def random_crop_around_aoi(img,mask,size = 32,min_area=0): - h,w = img.shape[2:] - mask_original = mask.copy() - size_h,size_w = size,size - - if h <= size and w <= size: - return img,mask - if h < size: - size_h = h - if w < size: - size_w = w - - if mask.max() == 0: - t,b,l,r = 0,h-1,0,w-1 - else: - mask = label_fn(mask,connectivity=2) - values = [value for value in np.unique(mask)[1:] if mask[mask==value].sum()/value >= min_area] - - if len(values) == 0: - t,b,l,r = 0,h-1,0,w-1 - else: - sval = values[random.randint(0,len(values)-1)] - mask[mask!=sval] = 0 - mask = ((mask / sval) * 255.0).astype(np.uint8) - pos = np.nonzero(mask) - t, b, l, r = pos[0].min(),pos[0].max(),pos[1].min(),pos[1].max() - - h_aoi,w_aoi = b-t,r-l - pt = random.randint(t+h_aoi//2, b-h_aoi//2),random.randint(l+w_aoi//2, r-w_aoi//2) - - max_up = pt[0] - max_left = pt[1] - min_up = max(0,size_h - (h - pt[0])) - min_left = max(0,size_w - (w - pt[1])) - - t_crop = pt[0] - min(max_up, random.randint(min_up, size_h-1)) - l_crop = pt[1] - min(max_left, random.randint(min_left, size_w-1)) - - cropped_img = img[:,:,t_crop:t_crop+size_h,l_crop:l_crop+size_w] - cropped_mask = mask_original[t_crop:t_crop+size_h,l_crop:l_crop+size_w] - - return cropped_img,cropped_mask - - - -###Crop images keep georefrenced - import rasterio import os from rasterio.windows import Window diff --git a/data_processing/filtering.py b/data_processing/filtering.py deleted file mode 100644 index ccc32ec..0000000 --- a/data_processing/filtering.py +++ /dev/null @@ -1,9 +0,0 @@ -import numpy as np - -def noise_filter(washed,mina): - values = np.unique(washed) - for val in values[1:]: - area = (washed[washed == val]>0).sum() - if(area<=mina): - washed[washed == val] = 0 - return washed \ No newline at end of file diff --git a/data_processing/post_process.py b/data_processing/post_process.py index bb029c6..e4cef0d 100644 --- a/data_processing/post_process.py +++ b/data_processing/post_process.py @@ -18,66 +18,51 @@ from shapely.wkt import dumps from shapely.ops import cascaded_union import geopandas as gpd -def post_process(raw,thresh = 0.5,mina=40,save=None): + +def post_process(pred,thresh = 0.5,thresh_b = 0.6,mina=100,mina_b=50): + if len(pred.shape) < 2: + return None + if len(pred.shape) == 2: + pred = pred[...,np.newaxis] - try: - ch = raw.shape[2] - except: - ch=1 - if(ch == 2): - rraw = ranger(raw) - - rbuilds = raw[...,0] - rborders = raw[...,1] - - nuclei = rbuilds * (1 - rborders) - - builds = raw[...,0] - - basins = label(nuclei>0.1,background = 0, connectivity = 2) - #Image.fromarray(basins>0).show() - #basins = noise_filter(basins, mina = 2 ) - basins = label(basins,background = 0, connectivity = 2) - washed = watershed(image = -builds, + ch = pred.shape[2] + buildings = pred[...,0] + if ch > 1: + borders = pred[...,1] + nuclei = buildings * (1.0 - borders) + + if ch == 3: + spacing = pred[...,2] + nuclei *= (1.0 - spacing) + + basins = label(nuclei>thresh_b,background = 0, connectivity = 2) + if mina_b > 0: + basins = noise_filter(basins, mina = mina_b) + basins = label(basins,background = 0, connectivity = 2) + + washed = watershed(image = -buildings, markers = basins, - mask = builds>thresh, + mask = buildings>thresh, watershed_line=False) - washed = label(washed,background = 0, connectivity = 2) - washed = noise_filter(washed, mina=thresh) - washed = label(washed,background = 0, connectivity = 2) - #col = colorize(washed) - #Image.fromarray(col).show() - + elif(ch == 1): - builds = raw[...,0] - washed = label(builds > thresh,background = 0, connectivity = 2) - washed = noise_filter(washed, mina=thresh) - washed = label(washed,background = 0, connectivity = 2) - #col = colorize(washed) - #Image.fromarray(col).show() - - else: - raise NotImplementedError( - ) + washed = buildings > thresh + + + washed = label(washed,background = 0, connectivity = 2) + washed = noise_filter(washed, mina=mina) + washed = label(washed,background = 0, connectivity = 2) return washed def noise_filter(washed,mina): values = np.unique(washed) - #a =0 - #print(values) for val in values[1:]: - #a+=1 area = (washed[washed == val]>0).sum() if(area<=mina): washed[washed == val] = 0 - #print(a) return washed -def ranger(x): - x1 = x.copy() - return np.tanh((x1 - 0.5)/0.1) * (0.5)+0.5 - def extract_poly(mask): shps = shapes(mask.astype(np.int16),mask>0) polys =[] diff --git a/data_processing/preprocessing.py b/data_processing/pre_processing.py similarity index 64% rename from data_processing/preprocessing.py rename to data_processing/pre_processing.py index ad29996..4922cc5 100644 --- a/data_processing/preprocessing.py +++ b/data_processing/pre_processing.py @@ -48,91 +48,7 @@ np.random.seed(42) - - -""" -https://github.com/geoaigroup/challenges/blob/main/ai4foodsecurity-challenge/lstm-cnn.ipynb - -The data are stored as numpy arrays with dimension height x width x bands x timesteps. -All of the reflectance values are in the range [0,1]. We also add -two spectral indices (NDWI and LSWI) and SAR bands (VV, VH, incidence angle/IA). -""" - -def add_lswi_channel(X): - _X = np.ndarray([HEIGHT, WIDTH, X.shape[2]+1, N_TIMESTEPS]) - # copy the values from the original array - for i in range(X.shape[2]): - _X[:,:,i,:] = X[:,:,i,:] - # calculate values for LSWI channel - for i in range(N_TIMESTEPS): - lswi = (X[:,:,NIR,i]-X[:,:,SWIR1,i])/(X[:,:,NIR,i]+X[:,:,SWIR1,i]) - _X[:,:,-1,i] = lswi - # make sure we didn't introduce any NaNs - _X[np.where(np.isnan(_X))] = 0 - return _X - -def add_ndwi_channel(X): - _X = np.ndarray([HEIGHT, WIDTH, X.shape[2]+1, N_TIMESTEPS]) - # copy the values from the original array - for i in range(X.shape[2]): - _X[:,:,i,:] = X[:,:,i,:] - # calculate values for NDWI channel - for i in range(N_TIMESTEPS): - ndwi = (X[:,:,GREEN,i]-X[:,:,SWIR1,i])/(X[:,:,GREEN,i]+X[:,:,SWIR1,i]) - _X[:,:,-1,i] = ndwi - # make sure we didn't introduce any NaNs - _X[np.where(np.isnan(_X))] = 0 - return _X - -def add_sar_channel(X, band, path): - _X = np.ndarray([HEIGHT, WIDTH, X.shape[2]+1, N_TIMESTEPS]) - # copy the values from the original array - for i in range(X.shape[2]): - _X[:,:,i,:] = X[:,:,i,:] - # load the corresponding SAR band - if band=='vv' or band=='VV': - sarpath = path.replace('pheno_timeseries', 'vv_timeseries') - #sarpath = path.replace('fixed_timeseries', 'vv_timeseries') - elif band=='vh' or band=='VH': - sarpath = path.replace('pheno_timeseries', 'vh_timeseries') - #sarpath = path.replace('fixed_timeseries', 'vh_timeseries') - elif band=='ia' or band=='IA': - sarpath = path.replace('pheno_timeseries', 'ia_timeseries') - #sarpath = path.replace('fixed_timeseries', 'ia_timeseries') - sar = np.load(sarpath).astype(np.float32) - for i in range(N_TIMESTEPS): - _X[:,:,-1,i] = sar[...,i] - # make sure we didn't introduce any NaNs - _X[np.where(np.isnan(_X))] = 0 - return _X - -def load_data(x_path, y_path, flatten=True, convert_nans=True): - # Load the time series image data - X = np.load(x_path).astype(np.float32) - # Load the associated labels - Y = np.load(y_path).astype(np.int8) - - # Convert all the NaNs to zeros - if convert_nans: - X[np.where(np.isnan(X))] = 0 - - X[np.where(X==0)] = 0.00000001 - # Add band indices - X = add_lswi_channel(X) - X = add_ndwi_channel(X) - X = add_sar_channel(X, 'vv', x_path) - X = add_sar_channel(X, 'vh', x_path) - X = add_sar_channel(X, 'ia', x_path) - if flatten: - # Reduce the h x w x b x t dataset to h*w x b x t - X = np.reshape(X, (X.shape[0]*X.shape[1], X.shape[2], X.shape[3])) - Y = np.reshape(Y, (Y.shape[0]*Y.shape[1])) - assert X.shape[0] == Y.shape[0] - return X, Y - - - -class loading_large_tile: +class LargeTiffLoader: def __init__(self,input_image_directory,input_mask_directory,image_suffix='.tif',mask_suffix='.tif'): self.image_directory=input_image_directory diff --git a/data_processing/split_data.py b/data_processing/split_data.py deleted file mode 100644 index 4546073..0000000 --- a/data_processing/split_data.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Sun Oct 13 12:15:42 2019 - -@author: hasan -""" -import json -import os -import pandas as pd -import shutil -import random - -def merge_lists(a,b): - if(len(a)>=len(b)): - for item in b: - a.append(item) - return a - else: - for item in a: - b.append(item) - return b - -def get_dicts(dict_dir): - dict_dir=os.path.expanduser((os.path.join(dict_dir,'labels_dictionary'))) - print(dict_dir) - with open(dict_dir,'r') as dfile: - labels=dfile.read() - label_file=json.loads(labels) - counter=0 - dis_dict=dict() - reg_dict=dict() - names_ac_dis=dict() - names_ac_reg=dict() - for label in label_file: - img_name=label_file[label]['img_name'] - # and len(label_file[label]['classes'])>0 - if(img_name.split('.')[0].endswith('_pre_disaster')): - this_region=img_name.split('_')[0] - this_disaster=label_file[label]['disaster'] - if(this_region not in reg_dict.keys()): - reg_dict[this_region]=1 - names_ac_reg[this_region]=[] - names_ac_reg[this_region].append(img_name.split('.')[0]) - else: - reg_dict[this_region]+=1 - names_ac_reg[this_region].append(img_name.split('.')[0]) - if(this_disaster not in dis_dict.keys()): - dis_dict[this_disaster]=1 - names_ac_dis[this_disaster]=[] - names_ac_dis[this_disaster].append(img_name.split('.')[0]) - else: - dis_dict[this_disaster]+=1 - names_ac_dis[this_disaster].append(img_name.split('.')[0]) - counter+=1 - ''' - df_reg=pd.DataFrame.from_dict(reg_dict,orient='index') - df_dis=pd.DataFrame.from_dict(dis_dict,orient='index') - df_reg.plot(kind='bar',title='Data distributions acc to regions') - df_dis.plot(kind='bar',title='Data distributions acc to disaster types') - mean1=int(df_reg.mean(axis=0)[0]) - mean2=int(df_dis.mean(axis=0)[0]) - print('mean1= '+str(mean1)) - print('mean2= '+str(mean2)) - print("number of images: "+str(counter)) - print(names_ac_dis['tsunami']) - ''' - return dis_dict,reg_dict,names_ac_dis,names_ac_reg,counter - -def copy_files(src,dst,file_list,extension=''): - print('start') - for files in file_list: - src_file_path = src + files+extension+'.png' - dst_file_path = dst + files+extension+'.png' - if os.path.exists(dst_file_path): - print(dst_file_path+" already exists") - else: - #print("Copying: " + dst_file_path) - try: - shutil.copyfile(src_file_path,dst_file_path) - print(dst_file_path) - except IOError: - a=0 - print(src_file_path + " does not exist") - #input("Please, press enter to continue.") - -def get_split(stat,names,count,ratio=0.9,var=0.1): - df_reg=pd.DataFrame.from_dict(stat,orient='index') - #mean=int(df_reg.mean(axis=0)[0])+1 - #print(str(mean)) - #imprE=int(float((1-ratio)*float(mean))) - evalstat=dict() - for key in stat.keys(): - temp=stat[key] - evalstat[key]={} - if(temp>=24): - evalstat[key]['count']=30 - evalstat[key]['keep']=False - else: - evalstat[key]['count']=temp - evalstat[key]['keep']=True - ''' - if(temp>=(mean*2)): - evalstat[key]['count']=int(imprE*(1+var)) - evalstat[key]['keep']=False - else: - if(int(float(float(temp)*(1+var)))>=mean): - evalstat[key]['count']=int(float((1-ratio)*temp)) - evalstat[key]['keep']=False - else: - - if(int(float(float(temp)*(1+var)))>=int(mean/2)): - evalstat[key]['count']=int(float((1-0.5)*temp)) - evalstat[key]['keep']=False - else: - evalstat[key]['count']=temp - evalstat[key]['keep']=True - ''' - print('new distribution' + str(evalstat)) - plot_dict=dict() - counter=0 - for key in evalstat.keys(): - plot_dict[key]=evalstat[key]['count'] - counter+=evalstat[key]['count'] - print("counter is : "+str(counter)) - df_new=pd.DataFrame.from_dict(plot_dict,orient='index') - df_reg.plot(kind='bar',title='Data distributions acc to regions') - df_new.plot(kind='bar',title='Data distributions acc to regions in eval set') - - - evallist=[] - trainlist=[] - for key in evalstat.keys(): - if(evalstat[key]['keep']==True): - templist=names[key] - random.shuffle(templist) - evallist=merge_lists(evallist,templist) - trainlist=merge_lists(trainlist,templist) - else: - templist=names[key] - random.shuffle(templist) - evallist=merge_lists(evallist,templist[:int(evalstat[key]['count'])]) - trainlist=merge_lists(trainlist,templist[int(evalstat[key]['count']):]) - random.shuffle(evallist) - random.shuffle(trainlist) - - print("length of val set: "+str(len(evallist))) - print("length of train set: "+str(len(trainlist))) - x=input("please press any key") - - return trainlist,evallist - - - -if __name__ == '__main__': - d_pth='/usr/local/NotSynced/xView2/xview2_data' - a,b,c,d,e = get_dicts(d_pth) - train,eval_ = get_split(b,d,e) - ''' - lists=[] - for key in d.keys(): - temp=d[key] - random.shuffle(temp) - lists=merge_lists(lists,temp) - random.shuffle(lists) - ''' - main_path=os.path.expanduser(os.getcwd()) - all_mskpth=os.path.join(main_path,'masks/') - src='/usr/local/NotSynced/xView2/train/images/' - destTrain=''.join([d_pth,'/train/data/']) - destEval=''.join([d_pth,'/val/data/']) - copy_files(src,destTrain,train) - copy_files(src,destEval,eval_) - - destTrain=''.join([d_pth,'/train/masks/']) - destEval=''.join([d_pth,'/val/masks/']) - - copy_files(all_mskpth,destTrain,train,extension='_mask') - copy_files(all_mskpth,destEval,eval_,extension='_mask') - - diff --git a/data_processing/split_to_folds.py b/data_processing/split_to_folds.py deleted file mode 100644 index a72c8c4..0000000 --- a/data_processing/split_to_folds.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Feb 5 11:16:29 2020 - -@author: hasan -""" -from sklearn.model_selection import StratifiedKFold -import os - -from tqdm import tqdm -import pandas as pd - -def main(): - path='/home/jamada/Desktop/OpenCitiesAI/Dataset/Tilestrain_tier_4/images' - items=[] - for image in os.listdir(path): - if('.png' in image ): - isplit=image.split('_') - region_id=f'{isplit[0]}_{isplit[1]}' - items.append({'id':image,'region_id':region_id,'x':isplit[2],'y':isplit[3].split('.')[0]}) - - df=pd.DataFrame(items,columns=['id','region_id','x','y']) - df['tile_id'] =df['region_id'].astype(str)+'_'+ df['x'].astype(str) + '_' + df['y'].astype(str) - X = df.groupby('tile_id')['region_id'].first().index.values - y = df.groupby('tile_id')['region_id'].first().values - - skf=StratifiedKFold(n_splits=5, random_state=98, shuffle=True) - for i,(tfold,vfold) in enumerate(skf.split(X,y)): - df.loc[df['tile_id'].isin(X[vfold]),'fold']=int(i) - - df.to_csv('folds4.csv') - folds=[int(fold) for fold in df.groupby('fold').first().index.values] - - - for fold in folds: - print(f'fold:\t{fold}') - print(df.loc[df['fold']==fold].set_index(['fold','region_id']).count(level='region_id')) - - -if __name__=='__main__': - main() \ No newline at end of file diff --git a/post_process.py b/post_process.py deleted file mode 100644 index cf0cdce..0000000 --- a/post_process.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Thu Oct 22 15:27:22 2020 - -@author: jamada -""" - - -import numpy as np -from skimage.morphology import watershed,dilation,square,erosion -from skimage.measure import label -from PIL import Image,ImageDraw -from Data.utils import colorize -from rasterio.features import shapes -import pandas as pd -from shapely.geometry import shape -from shapely.wkt import dumps -from shapely.ops import cascaded_union -import geopandas as gpd -def post_process(raw,thresh = 0.5,mina=40,save=None): - - try: - ch = raw.shape[2] - except: - ch=1 - if(ch == 2): - rraw = ranger(raw) - - rbuilds = raw[...,0] - rborders = raw[...,1] - - nuclei = rbuilds * (1 - rborders) - - builds = raw[...,0] - - basins = label(nuclei>0.1,background = 0, connectivity = 2) - #Image.fromarray(basins>0).show() - #basins = noise_filter(basins, mina = 2 ) - basins = label(basins,background = 0, connectivity = 2) - washed = watershed(image = -builds, - markers = basins, - mask = builds>thresh, - watershed_line=False) - washed = label(washed,background = 0, connectivity = 2) - washed = noise_filter(washed, mina=thresh) - washed = label(washed,background = 0, connectivity = 2) - #col = colorize(washed) - #Image.fromarray(col).show() - - elif(ch == 1): - builds = raw[...,0] - washed = label(builds > thresh,background = 0, connectivity = 2) - washed = noise_filter(washed, mina=thresh) - washed = label(washed,background = 0, connectivity = 2) - #col = colorize(washed) - #Image.fromarray(col).show() - - else: - raise NotImplementedError( - ) - - return washed - -def noise_filter(washed,mina): - values = np.unique(washed) - #a =0 - #print(values) - for val in values[1:]: - #a+=1 - area = (washed[washed == val]>0).sum() - if(area<=mina): - washed[washed == val] = 0 - #print(a) - return washed - -def ranger(x): - x1 = x.copy() - return np.tanh((x1 - 0.5)/0.1) * (0.5)+0.5 - -def extract_poly(mask): - shps = shapes(mask.astype(np.int16),mask>0) - polys =[] - - for shp,value in shps: - - p = shape(shp).buffer(0.0) - - typ = p.geom_type - if(typ == 'Polygon' or typ == 'MultiPolygon'): - polys.append(p.simplify(0.01)) - else: - continue - if(len(polys) == 0): - return None - else: - return cascaded_union(polys) - #break - - -def mask_to_polys(iid,mask,mina = 4): - vals = sorted(np.unique(mask)) - polys = [] - areas = [] - for i in vals[1:]: - poly = extract_poly(mask == i) - - if(poly is not None): - if(poly.area > mina): - polys.append(poly) - areas.append(poly.area) - gdf = gpd.GeoDataFrame( - {'Id' : list(range(1,len(polys)+1)), - 'geometry' : polys, - 'area' : areas - }) - return gdf - - - - - - \ No newline at end of file