going thru data_processing

geoaigroup · Dec 11, 2023 · 16e7a63 · 16e7a63
1 parent 0ee4de4
commit 16e7a63
Show file tree

Hide file tree

Showing 8 changed files with 31 additions and 566 deletions.
diff --git a/data_processing/__pycache__/preprocessing.cpython-310.pyc b/data_processing/__pycache__/preprocessing.cpython-310.pyc
diff --git a/augmentation/cropping.py → data_processing/cropping.py b/augmentation/cropping.py → data_processing/cropping.py
@@ -1,84 +1,3 @@
-import numpy as np
-import random
-from skimage.measure import label as label_fn
-
-
-def random_crop(image_stack, mask, image_size):
-    '''
-    THIS FUNCTION DEFINES RANDOM IMAGE CROPPING.
-     :param image_stack: input image in size [Time Stamp, Image Dimension (Channel), Height, Width]
-    :param mask: input mask of the image, to filter out uninterested areas [Height, Width]
-    :param image_size: It determine how the data is partitioned into the NxN windows
-    :return: image_stack, mask
-    '''
-
-    H, W = image_stack.shape[2:]
-
-    # skip random crop is image smaller than crop size
-    if H - image_size // 2 <= image_size:
-        return image_stack, mask
-    if W - image_size // 2 <= image_size:
-        return image_stack, mask
-    flag = True
-    for i in range(0,100):
-        h = np.random.randint(image_size, H - image_size // 2)
-        w = np.random.randint(image_size, W - image_size // 2)
-
-        image_stack = image_stack[:, :, h - int(np.floor(image_size // 2)):int(np.ceil(h + image_size // 2)),
-                    w - int(np.floor(image_size // 2)):int(np.ceil(w + image_size // 2))]
-        mask = mask[h - int(np.floor(image_size // 2)):int(np.ceil(h + image_size // 2)),
-            w - int(np.floor(image_size // 2)):int(np.ceil(w + image_size // 2))]
-        if 1 in mask:
-            break
-    return image_stack, mask
-
-def random_crop_around_aoi(img,mask,size = 32,min_area=0):
-    h,w = img.shape[2:]
-    mask_original = mask.copy()
-    size_h,size_w = size,size
-
-    if h <= size and w <= size:
-        return img,mask
-    if h < size:
-        size_h = h
-    if w < size:
-        size_w = w
-
-    if mask.max() == 0:
-        t,b,l,r = 0,h-1,0,w-1
-    else:
-        mask = label_fn(mask,connectivity=2)
-        values = [value for value in np.unique(mask)[1:] if mask[mask==value].sum()/value >= min_area]
-
-        if len(values) == 0:
-            t,b,l,r = 0,h-1,0,w-1
-        else:
-            sval = values[random.randint(0,len(values)-1)]
-            mask[mask!=sval] = 0
-            mask = ((mask / sval) * 255.0).astype(np.uint8)
-            pos = np.nonzero(mask)
-            t, b, l, r = pos[0].min(),pos[0].max(),pos[1].min(),pos[1].max()
-
-    h_aoi,w_aoi = b-t,r-l
-    pt = random.randint(t+h_aoi//2, b-h_aoi//2),random.randint(l+w_aoi//2, r-w_aoi//2)
-
-    max_up = pt[0]
-    max_left = pt[1]
-    min_up = max(0,size_h - (h - pt[0]))
-    min_left = max(0,size_w - (w - pt[1]))
-
-    t_crop = pt[0] - min(max_up, random.randint(min_up, size_h-1))
-    l_crop = pt[1] - min(max_left, random.randint(min_left, size_w-1))
-
-    cropped_img = img[:,:,t_crop:t_crop+size_h,l_crop:l_crop+size_w]
-    cropped_mask = mask_original[t_crop:t_crop+size_h,l_crop:l_crop+size_w]
-
-    return cropped_img,cropped_mask
-
-
-
-###Crop images keep georefrenced
-
 import rasterio
 import os
 from rasterio.windows import Window

diff --git a/data_processing/filtering.py b/data_processing/filtering.py
diff --git a/data_processing/post_process.py b/data_processing/post_process.py
@@ -18,66 +18,51 @@
 from shapely.wkt import dumps
 from shapely.ops import cascaded_union
 import geopandas as gpd
-def post_process(raw,thresh = 0.5,mina=40,save=None):
+
+def post_process(pred,thresh = 0.5,thresh_b = 0.6,mina=100,mina_b=50):
+    if len(pred.shape) < 2:
+        return None
+    if len(pred.shape) == 2:
+        pred = pred[...,np.newaxis]
 
-    try:
-        ch = raw.shape[2]
-    except:
-        ch=1
-    if(ch == 2):
-        rraw = ranger(raw)
-
-        rbuilds = raw[...,0]
-        rborders = raw[...,1]
-
-        nuclei = rbuilds * (1 - rborders)
-
-        builds = raw[...,0]
-
-        basins = label(nuclei>0.1,background = 0, connectivity = 2)
-        #Image.fromarray(basins>0).show()
-        #basins = noise_filter(basins, mina = 2 )
-        basins = label(basins,background = 0, connectivity = 2)
-        washed = watershed(image = -builds,
+    ch = pred.shape[2]
+    buildings = pred[...,0]
+    if ch > 1:
+        borders = pred[...,1]
+        nuclei = buildings * (1.0 - borders)
+
+        if ch == 3:
+            spacing = pred[...,2]
+            nuclei *= (1.0 - spacing)
+
+        basins = label(nuclei>thresh_b,background = 0, connectivity = 2)
+        if mina_b > 0:
+            basins = noise_filter(basins, mina = mina_b)
+            basins = label(basins,background = 0, connectivity = 2)
+
+        washed = watershed(image = -buildings,
                            markers = basins,
-                           mask = builds>thresh,
+                           mask = buildings>thresh,
                            watershed_line=False)
-        washed = label(washed,background = 0, connectivity = 2)
-        washed = noise_filter(washed, mina=thresh)
-        washed = label(washed,background = 0, connectivity = 2)
-        #col = colorize(washed)
-        #Image.fromarray(col).show()
-
+
     elif(ch == 1):
-        builds = raw[...,0]
-        washed  = label(builds > thresh,background = 0, connectivity = 2)
-        washed = noise_filter(washed, mina=thresh)
-        washed = label(washed,background = 0, connectivity = 2)
-        #col = colorize(washed)
-        #Image.fromarray(col).show()
-
-    else:
-        raise NotImplementedError(
-            )
+        washed  = buildings > thresh 
+
+
+    washed = label(washed,background = 0, connectivity = 2)
+    washed = noise_filter(washed, mina=mina)
+    washed = label(washed,background = 0, connectivity = 2)
 
     return washed
 
 def noise_filter(washed,mina):
     values = np.unique(washed)
-    #a =0
-    #print(values)
     for val in values[1:]:
-        #a+=1
         area = (washed[washed == val]>0).sum()
         if(area<=mina):  
             washed[washed == val] = 0
-    #print(a)
     return washed
 
-def ranger(x):
-    x1 = x.copy()
-    return np.tanh((x1 - 0.5)/0.1) * (0.5)+0.5
-
 def extract_poly(mask):
     shps = shapes(mask.astype(np.int16),mask>0)
     polys =[]

diff --git a/data_processing/preprocessing.py → data_processing/pre_processing.py b/data_processing/preprocessing.py → data_processing/pre_processing.py
@@ -48,91 +48,7 @@
 np.random.seed(42)
 
 
-
-
-"""
-https://github.com/geoaigroup/challenges/blob/main/ai4foodsecurity-challenge/lstm-cnn.ipynb
-
-The data are stored as numpy arrays with dimension height x width x bands x timesteps. 
-All of the reflectance values are in the range [0,1]. We also add 
-two spectral indices (NDWI and LSWI) and SAR bands (VV, VH, incidence angle/IA).
-"""
-
-def add_lswi_channel(X):
-    _X = np.ndarray([HEIGHT, WIDTH, X.shape[2]+1, N_TIMESTEPS])
-    # copy the values from the original array
-    for i in range(X.shape[2]):
-        _X[:,:,i,:] = X[:,:,i,:]
-    # calculate values for LSWI channel
-    for i in range(N_TIMESTEPS):
-        lswi = (X[:,:,NIR,i]-X[:,:,SWIR1,i])/(X[:,:,NIR,i]+X[:,:,SWIR1,i])
-        _X[:,:,-1,i] = lswi
-    # make sure we didn't introduce any NaNs
-    _X[np.where(np.isnan(_X))] = 0
-    return _X
-
-def add_ndwi_channel(X):
-    _X = np.ndarray([HEIGHT, WIDTH, X.shape[2]+1, N_TIMESTEPS])
-    # copy the values from the original array
-    for i in range(X.shape[2]):
-        _X[:,:,i,:] = X[:,:,i,:]
-    # calculate values for NDWI channel
-    for i in range(N_TIMESTEPS):
-        ndwi = (X[:,:,GREEN,i]-X[:,:,SWIR1,i])/(X[:,:,GREEN,i]+X[:,:,SWIR1,i])
-        _X[:,:,-1,i] = ndwi
-    # make sure we didn't introduce any NaNs
-    _X[np.where(np.isnan(_X))] = 0
-    return _X
-
-def add_sar_channel(X, band, path):
-    _X = np.ndarray([HEIGHT, WIDTH, X.shape[2]+1, N_TIMESTEPS])
-    # copy the values from the original array
-    for i in range(X.shape[2]):
-        _X[:,:,i,:] = X[:,:,i,:]
-    # load the corresponding SAR band
-    if band=='vv' or band=='VV':
-        sarpath = path.replace('pheno_timeseries', 'vv_timeseries')
-        #sarpath = path.replace('fixed_timeseries', 'vv_timeseries')
-    elif band=='vh' or band=='VH':
-        sarpath = path.replace('pheno_timeseries', 'vh_timeseries')
-        #sarpath = path.replace('fixed_timeseries', 'vh_timeseries')
-    elif band=='ia' or band=='IA':
-        sarpath = path.replace('pheno_timeseries', 'ia_timeseries')
-        #sarpath = path.replace('fixed_timeseries', 'ia_timeseries')
-    sar = np.load(sarpath).astype(np.float32)
-    for i in range(N_TIMESTEPS):
-        _X[:,:,-1,i] = sar[...,i]
-    # make sure we didn't introduce any NaNs
-    _X[np.where(np.isnan(_X))] = 0
-    return _X
-
-def load_data(x_path, y_path, flatten=True, convert_nans=True):
-    # Load the time series image data
-    X = np.load(x_path).astype(np.float32)
-    # Load the associated labels
-    Y = np.load(y_path).astype(np.int8)
-
-    # Convert all the NaNs to zeros
-    if convert_nans:
-        X[np.where(np.isnan(X))] = 0
-
-    X[np.where(X==0)] = 0.00000001
-    # Add band indices
-    X = add_lswi_channel(X)
-    X = add_ndwi_channel(X)
-    X = add_sar_channel(X, 'vv', x_path)
-    X = add_sar_channel(X, 'vh', x_path)
-    X = add_sar_channel(X, 'ia', x_path)
-    if flatten:
-        # Reduce the h x w x b x t dataset to h*w x b x t
-        X = np.reshape(X, (X.shape[0]*X.shape[1], X.shape[2], X.shape[3]))
-        Y = np.reshape(Y, (Y.shape[0]*Y.shape[1]))
-    assert X.shape[0] == Y.shape[0] 
-    return X, Y 
-
-
-
-class loading_large_tile:
+class LargeTiffLoader:
 
     def __init__(self,input_image_directory,input_mask_directory,image_suffix='.tif',mask_suffix='.tif'):
         self.image_directory=input_image_directory