Skip to content

Commit

Permalink
going thru data_processing
Browse files Browse the repository at this point in the history
  • Loading branch information
aghand0ur committed Dec 11, 2023
1 parent 0ee4de4 commit 16e7a63
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 566 deletions.
Binary file not shown.
81 changes: 0 additions & 81 deletions augmentation/cropping.py → data_processing/cropping.py
Original file line number Diff line number Diff line change
@@ -1,84 +1,3 @@
import numpy as np
import random
from skimage.measure import label as label_fn


def random_crop(image_stack, mask, image_size):
'''
THIS FUNCTION DEFINES RANDOM IMAGE CROPPING.
:param image_stack: input image in size [Time Stamp, Image Dimension (Channel), Height, Width]
:param mask: input mask of the image, to filter out uninterested areas [Height, Width]
:param image_size: It determine how the data is partitioned into the NxN windows
:return: image_stack, mask
'''

H, W = image_stack.shape[2:]

# skip random crop is image smaller than crop size
if H - image_size // 2 <= image_size:
return image_stack, mask
if W - image_size // 2 <= image_size:
return image_stack, mask
flag = True
for i in range(0,100):
h = np.random.randint(image_size, H - image_size // 2)
w = np.random.randint(image_size, W - image_size // 2)

image_stack = image_stack[:, :, h - int(np.floor(image_size // 2)):int(np.ceil(h + image_size // 2)),
w - int(np.floor(image_size // 2)):int(np.ceil(w + image_size // 2))]
mask = mask[h - int(np.floor(image_size // 2)):int(np.ceil(h + image_size // 2)),
w - int(np.floor(image_size // 2)):int(np.ceil(w + image_size // 2))]
if 1 in mask:
break
return image_stack, mask

def random_crop_around_aoi(img,mask,size = 32,min_area=0):
h,w = img.shape[2:]
mask_original = mask.copy()
size_h,size_w = size,size

if h <= size and w <= size:
return img,mask
if h < size:
size_h = h
if w < size:
size_w = w

if mask.max() == 0:
t,b,l,r = 0,h-1,0,w-1
else:
mask = label_fn(mask,connectivity=2)
values = [value for value in np.unique(mask)[1:] if mask[mask==value].sum()/value >= min_area]

if len(values) == 0:
t,b,l,r = 0,h-1,0,w-1
else:
sval = values[random.randint(0,len(values)-1)]
mask[mask!=sval] = 0
mask = ((mask / sval) * 255.0).astype(np.uint8)
pos = np.nonzero(mask)
t, b, l, r = pos[0].min(),pos[0].max(),pos[1].min(),pos[1].max()

h_aoi,w_aoi = b-t,r-l
pt = random.randint(t+h_aoi//2, b-h_aoi//2),random.randint(l+w_aoi//2, r-w_aoi//2)

max_up = pt[0]
max_left = pt[1]
min_up = max(0,size_h - (h - pt[0]))
min_left = max(0,size_w - (w - pt[1]))

t_crop = pt[0] - min(max_up, random.randint(min_up, size_h-1))
l_crop = pt[1] - min(max_left, random.randint(min_left, size_w-1))

cropped_img = img[:,:,t_crop:t_crop+size_h,l_crop:l_crop+size_w]
cropped_mask = mask_original[t_crop:t_crop+size_h,l_crop:l_crop+size_w]

return cropped_img,cropped_mask



###Crop images keep georefrenced

import rasterio
import os
from rasterio.windows import Window
Expand Down
9 changes: 0 additions & 9 deletions data_processing/filtering.py

This file was deleted.

75 changes: 30 additions & 45 deletions data_processing/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,66 +18,51 @@
from shapely.wkt import dumps
from shapely.ops import cascaded_union
import geopandas as gpd
def post_process(raw,thresh = 0.5,mina=40,save=None):

def post_process(pred,thresh = 0.5,thresh_b = 0.6,mina=100,mina_b=50):
if len(pred.shape) < 2:
return None
if len(pred.shape) == 2:
pred = pred[...,np.newaxis]

try:
ch = raw.shape[2]
except:
ch=1
if(ch == 2):
rraw = ranger(raw)

rbuilds = raw[...,0]
rborders = raw[...,1]

nuclei = rbuilds * (1 - rborders)

builds = raw[...,0]

basins = label(nuclei>0.1,background = 0, connectivity = 2)
#Image.fromarray(basins>0).show()
#basins = noise_filter(basins, mina = 2 )
basins = label(basins,background = 0, connectivity = 2)
washed = watershed(image = -builds,
ch = pred.shape[2]
buildings = pred[...,0]
if ch > 1:
borders = pred[...,1]
nuclei = buildings * (1.0 - borders)

if ch == 3:
spacing = pred[...,2]
nuclei *= (1.0 - spacing)

basins = label(nuclei>thresh_b,background = 0, connectivity = 2)
if mina_b > 0:
basins = noise_filter(basins, mina = mina_b)
basins = label(basins,background = 0, connectivity = 2)

washed = watershed(image = -buildings,
markers = basins,
mask = builds>thresh,
mask = buildings>thresh,
watershed_line=False)
washed = label(washed,background = 0, connectivity = 2)
washed = noise_filter(washed, mina=thresh)
washed = label(washed,background = 0, connectivity = 2)
#col = colorize(washed)
#Image.fromarray(col).show()


elif(ch == 1):
builds = raw[...,0]
washed = label(builds > thresh,background = 0, connectivity = 2)
washed = noise_filter(washed, mina=thresh)
washed = label(washed,background = 0, connectivity = 2)
#col = colorize(washed)
#Image.fromarray(col).show()

else:
raise NotImplementedError(
)
washed = buildings > thresh


washed = label(washed,background = 0, connectivity = 2)
washed = noise_filter(washed, mina=mina)
washed = label(washed,background = 0, connectivity = 2)

return washed

def noise_filter(washed,mina):
values = np.unique(washed)
#a =0
#print(values)
for val in values[1:]:
#a+=1
area = (washed[washed == val]>0).sum()
if(area<=mina):
washed[washed == val] = 0
#print(a)
return washed

def ranger(x):
x1 = x.copy()
return np.tanh((x1 - 0.5)/0.1) * (0.5)+0.5

def extract_poly(mask):
shps = shapes(mask.astype(np.int16),mask>0)
polys =[]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,91 +48,7 @@
np.random.seed(42)




"""
https://github.com/geoaigroup/challenges/blob/main/ai4foodsecurity-challenge/lstm-cnn.ipynb
The data are stored as numpy arrays with dimension height x width x bands x timesteps.
All of the reflectance values are in the range [0,1]. We also add
two spectral indices (NDWI and LSWI) and SAR bands (VV, VH, incidence angle/IA).
"""

def add_lswi_channel(X):
_X = np.ndarray([HEIGHT, WIDTH, X.shape[2]+1, N_TIMESTEPS])
# copy the values from the original array
for i in range(X.shape[2]):
_X[:,:,i,:] = X[:,:,i,:]
# calculate values for LSWI channel
for i in range(N_TIMESTEPS):
lswi = (X[:,:,NIR,i]-X[:,:,SWIR1,i])/(X[:,:,NIR,i]+X[:,:,SWIR1,i])
_X[:,:,-1,i] = lswi
# make sure we didn't introduce any NaNs
_X[np.where(np.isnan(_X))] = 0
return _X

def add_ndwi_channel(X):
_X = np.ndarray([HEIGHT, WIDTH, X.shape[2]+1, N_TIMESTEPS])
# copy the values from the original array
for i in range(X.shape[2]):
_X[:,:,i,:] = X[:,:,i,:]
# calculate values for NDWI channel
for i in range(N_TIMESTEPS):
ndwi = (X[:,:,GREEN,i]-X[:,:,SWIR1,i])/(X[:,:,GREEN,i]+X[:,:,SWIR1,i])
_X[:,:,-1,i] = ndwi
# make sure we didn't introduce any NaNs
_X[np.where(np.isnan(_X))] = 0
return _X

def add_sar_channel(X, band, path):
_X = np.ndarray([HEIGHT, WIDTH, X.shape[2]+1, N_TIMESTEPS])
# copy the values from the original array
for i in range(X.shape[2]):
_X[:,:,i,:] = X[:,:,i,:]
# load the corresponding SAR band
if band=='vv' or band=='VV':
sarpath = path.replace('pheno_timeseries', 'vv_timeseries')
#sarpath = path.replace('fixed_timeseries', 'vv_timeseries')
elif band=='vh' or band=='VH':
sarpath = path.replace('pheno_timeseries', 'vh_timeseries')
#sarpath = path.replace('fixed_timeseries', 'vh_timeseries')
elif band=='ia' or band=='IA':
sarpath = path.replace('pheno_timeseries', 'ia_timeseries')
#sarpath = path.replace('fixed_timeseries', 'ia_timeseries')
sar = np.load(sarpath).astype(np.float32)
for i in range(N_TIMESTEPS):
_X[:,:,-1,i] = sar[...,i]
# make sure we didn't introduce any NaNs
_X[np.where(np.isnan(_X))] = 0
return _X

def load_data(x_path, y_path, flatten=True, convert_nans=True):
# Load the time series image data
X = np.load(x_path).astype(np.float32)
# Load the associated labels
Y = np.load(y_path).astype(np.int8)

# Convert all the NaNs to zeros
if convert_nans:
X[np.where(np.isnan(X))] = 0

X[np.where(X==0)] = 0.00000001
# Add band indices
X = add_lswi_channel(X)
X = add_ndwi_channel(X)
X = add_sar_channel(X, 'vv', x_path)
X = add_sar_channel(X, 'vh', x_path)
X = add_sar_channel(X, 'ia', x_path)
if flatten:
# Reduce the h x w x b x t dataset to h*w x b x t
X = np.reshape(X, (X.shape[0]*X.shape[1], X.shape[2], X.shape[3]))
Y = np.reshape(Y, (Y.shape[0]*Y.shape[1]))
assert X.shape[0] == Y.shape[0]
return X, Y



class loading_large_tile:
class LargeTiffLoader:

def __init__(self,input_image_directory,input_mask_directory,image_suffix='.tif',mask_suffix='.tif'):
self.image_directory=input_image_directory
Expand Down
Loading

0 comments on commit 16e7a63

Please sign in to comment.