diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml new file mode 100644 index 0000000..0ea6ebd --- /dev/null +++ b/.github/workflows/dev.yml @@ -0,0 +1,69 @@ +name: dev + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + + - name: checkout code repository + uses: actions/checkout@v3 + + - name: run pre-commit + uses: pre-commit/action@v3.0.0 + + tests: + + name: Python ${{ matrix.python-version }} ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ['3.7', '3.8', '3.9', '3.10'] + include: + - environment-file: environment-dev.yml + miniforge-variant: Mambaforge + miniforge-version: 4.14.0-0 + + defaults: + run: + shell: bash -l {0} + + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v2 + + - name: setup conda environment with mambaforge + uses: conda-incubator/setup-miniconda@v2 + with: + use-mamba: true + activate-environment: detectree-dev + python-version: ${{ matrix.python-version }} + condarc-file: ${{ matrix.condarc-file }} + environment-file: ${{ matrix.environment-file }} + miniforge-variant: ${{ matrix.miniforge-variant }} + miniforge-version: ${{ matrix.miniforge-version }} + + - name: install detectree + run: | + pip install . + conda list + conda info --all + + - name: test docs + run: make -C ./docs html + + - name: test code + run: | + coverage run --source ./detectree --module pytest --verbose + coverage xml -i + coverage report -m + + - name: upload coverage report + uses: codecov/codecov-action@v3 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml deleted file mode 100644 index 3bd0ab9..0000000 --- a/.github/workflows/tests.yml +++ /dev/null @@ -1,73 +0,0 @@ -name: tests - -on: - push: - branches: [main] - pull_request: - branches: [main] - -jobs: - - build: - - name: Python ${{ matrix.python-version }} ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python-version: ['3.6', '3.7', '3.8', '3.9'] - - defaults: - run: - shell: bash -l {0} - - steps: - - - name: Checkout repo - uses: actions/checkout@v2 - with: - fetch-depth: 2 - - - name: Cache conda - uses: actions/cache@v2 - env: - CACHE_NUMBER: 0 - with: - path: ~/conda_pkgs_dir - key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment-dev.yml') }} - - - name: Setup conda environment - uses: conda-incubator/setup-miniconda@v2 - with: - python-version: ${{ matrix.python-version }} - use-only-tar-bz2: true # required for caching - show-channel-urls: true - channel-priority: strict - activate-environment: detectree-dev - environment-file: environment-dev.yml - - - name: Install detectree - run: | - pip install .[laszip] - conda list - conda info --all - - - name: Lint code - run: | - isort detectree --check-only - black . --check --diff - flake8 . - pydocstyle detectree - - - name: Test docs - run: make -C ./docs html - - - name: Test code - run: | - coverage run --source ./detectree --module pytest --verbose - coverage xml -i - coverage report -m - - - name: Upload coverage report - uses: codecov/codecov-action@v1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5e3297c..b486d59 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,30 +1,34 @@ -exclude: 'docs|node_modules|migrations|.git|.tox' +exclude: 'docs|node_modules|migrations|.tox' default_stages: [commit] fail_fast: true repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v4.3.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.18.2 + hooks: + - id: check-github-workflows + - repo: https://github.com/psf/black - rev: 21.5b1 + rev: 22.10.0 hooks: - id: black - repo: https://github.com/timothycrosley/isort - rev: 5.8.0 + rev: 5.10.1 hooks: - id: isort - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 + rev: 5.0.4 hooks: - id: flake8 - args: ['--config=setup.cfg'] additional_dependencies: [flake8-isort] - repo: https://github.com/pycqa/pydocstyle @@ -32,4 +36,3 @@ repos: hooks: - id: pydocstyle files: ^detectree - args: ['--config=setup.cfg'] diff --git a/detectree/classifier.py b/detectree/classifier.py index 102bdd6..e7a9f85 100644 --- a/detectree/classifier.py +++ b/detectree/classifier.py @@ -35,63 +35,55 @@ def __init__( """ Initialize the classifier. - See the `background `_ example notebook for - more details. + See the `background `_ example notebook for details. Parameters ---------- num_estimators : int, optional - The maximum number of estimators at which boosting is terminated. - Directly passed to the `n_estimators` keyword argument of - `sklearn.ensemble.AdaBoostClassifier`. If no value is provided, - the default value set in `settings.CLF_DEFAULT_NUM_ESTIMATORS` - will be taken. + The maximum number of estimators at which boosting is terminated. Directly + passed to the `n_estimators` keyword argument of + `sklearn.ensemble.AdaBoostClassifier`. If no value is provided, the default + value set in `settings.CLF_DEFAULT_NUM_ESTIMATORS` will be taken. sigmas : list-like, optional - The list of scale parameters (sigmas) to build the Gaussian filter - bank that will be used to compute the pixel-level features. The - provided argument will be passed to the initialization method of - the `PixelFeaturesBuilder` class. If no value is provided, the - default value set in `settings.GAUSS_DEFAULT_SIGMAS` will be taken. + The list of scale parameters (sigmas) to build the Gaussian filter bank that + will be used to compute the pixel-level features. The provided argument will + be passed to the initialization method of the `PixelFeaturesBuilder` class. + If no value is provided, the default value set in + `settings.GAUSS_DEFAULT_SIGMAS` will be taken. num_orientations : int, optional - The number of equally-distributed orientations to build the - Gaussian filter bank that will be used to compute the pixel-level - features. The provided argument will be passed to the - initialization method of the `PixelFeaturesBuilder` class. If no - value is provided, the default value set in - `settings.GAUSS_DEFAULT_NUM_ORIENTATIONS` will be taken. + The number of equally-distributed orientations to build the Gaussian filter + bank that will be used to compute the pixel-level features. The provided + argument will be passed to the initialization method of the + `PixelFeaturesBuilder` class. If no value is provided, the default value set + in `settings.GAUSS_DEFAULT_NUM_ORIENTATIONS` will be taken. neighborhood : array-like, optional - The base neighborhood structure that will be used to compute the - entropy features. The provided argument will be passed to the - initialization method of the `PixelFeaturesBuilder` class. If no - value is provided, a square with a side size of - `2 * min_neighborhood_range + 1` will be used. + The base neighborhood structure that will be used to compute the entropy + features. Theprovided argument will be passed to the initialization method + of the `PixelFeaturesBuilder` class. If no value is provided, a square with + a side size of `2 * min_neighborhood_range + 1` will be used. min_neighborhood_range : int, optional - The range (i.e., the square radius) of the smallest neigbhorhood - window that will be used to compute the entropy features. The - provided argument will be passed to the initialization method of - the `PixelFeaturesBuilder` class. If no value is provided, the - default value set in + The range (i.e., the square radius) of the smallest neigbhorhood window that + will be used to compute the entropy features. The provided argument will be + passed to the initialization method of the `PixelFeaturesBuilder` class. If + no value is provided, the default value set in `settings.ENTROPY_DEFAULT_MIN_NEIGHBORHOOD_RANGE` will be taken. num_neighborhoods : int, optional The number of neigbhorhood windows (whose size follows a geometric - progression starting at `min_neighborhood_range`) that will be - used to compute the entropy features. The provided argument will - be passed to the initialization method of the - `PixelFeaturesBuilder` class. If no value is provided, the default - value set in `settings.ENTROPY_DEFAULT_NUM_NEIGHBORHOODS` will be - taken. + progression starting at `min_neighborhood_range`) that will be used to + compute the entropy features. The provided argument will be passed to the + initialization method of the `PixelFeaturesBuilder` class. If no value is + provided, the default value set in + `settings.ENTROPY_DEFAULT_NUM_NEIGHBORHOODS` will be taken. tree_val : int, optional - The value that designates tree pixels in the response images. The - provided argument will be passed to the initialization method of - the `PixelResponseBuilder` class. If no value is provided, the - default value set in `settings.RESPONSE_DEFAULT_TREE_VAL` will be - taken. + The value that designates tree pixels in the response images. The provided + argument will be passed to the initialization method of the + `PixelResponseBuilder` class. If no value is provided, the default value set + in `settings.RESPONSE_DEFAULT_TREE_VAL` will be taken. nontree_val : int, optional - The value that designates non-tree pixels in the response images. - The provided argument will be passed to the initialization method - of the `PixelResponseBuilder` class. If no value is provided, the - default value set in `settings.RESPONSE_DEFAULT_NONTREE_VAL` will - be taken. + The value that designates non-tree pixels in the response images. The + provided argument will be passed to the initialization method of the + `PixelResponseBuilder` class. If no value is provided, the default value set + in `settings.RESPONSE_DEFAULT_NONTREE_VAL` will be taken. adaboost_kws : key-value pairings, optional Keyword arguments that will be passed to `sklearn.ensemble.AdaBoostClassifier`. @@ -129,39 +121,37 @@ def train_classifier( """ Train a classifier. - See the `background `_ example notebook for - more details. + See the `background `_ example notebook for more + details. Parameters ---------- split_df : pandas DataFrame, optional - Data frame with the train/test split + Data frame with the train/test split. response_img_dir : str representing path to a directory, optional - Path to the directory where the response tiles are located. - Required if providing `split_df`. Otherwise `response_img_dir` - might either be ignored if providing `response_img_filepaths`, or - be used as the directory where the images whose filename matches - `img_filename_pattern` are to be located. + Path to the directory where the response tiles are located. Required if + providing `split_df`. Otherwise `response_img_dir` might either be ignored + if providing `response_img_filepaths`, or be used as the directory where the + images whose filename matches `img_filename_pattern` are to be located. img_filepaths : list-like, optional - List of paths to the input tiles whose features will be used to - train the classifier. Ignored if `split_df` is provided. + List of paths to the input tiles whose features will be used to train the + classifier. Ignored if `split_df` is provided. response_img_filepaths : list-like, optional - List of paths to the binary response tiles that will be used to - train the classifier. Ignored if `split_df` is provided. + List of paths to the binary response tiles that will be used to train the + classifier. Ignored if `split_df` is provided. img_dir : str representing path to a directory, optional Path to the directory where the images whose filename matches `img_filename_pattern` are to be located. Ignored if `split_df` or `img_filepaths` is provided. img_filename_pattern : str representing a file-name pattern, optional - Filename pattern to be matched in order to obtain the list of - images. If no value is provided, the default value set in - `settings.IMG_DEFAULT_FILENAME_PATTERN` will be taken. Ignored if - `split_df` or `img_filepaths` is provided. + Filename pattern to be matched in order to obtain the list of images. If no + value is provided, the default value set in + `settings.IMG_DEFAULT_FILENAME_PATTERN` will be taken. Ignored if `split_df` + or `img_filepaths` is provided. method : {'cluster-I', 'cluster-II'}, optional - Method used in the train/test split + Method used in the train/test split. img_cluster : int, optional - The label of the cluster of tiles. Only used if `method` is - 'cluster-II' + The label of the cluster of tiles. Only used if `method` is 'cluster-II'. Returns ------- @@ -169,27 +159,21 @@ def train_classifier( The trained classifier """ if split_df is None and response_img_filepaths is None: - # this is the only case that needs argument tweaking: - # otherwise, if we pass `img_filepaths`/`img_dir` to - # `build_features` and `response_img_dir` to `build_response`, the - # latter would build a response with all the image files in - # `response_img_dir`. Instead, we need to build the response only + # this is the only case that needs argument tweaking: otherwise, if we pass + # `img_filepaths`/`img_dir` to `build_features` and `response_img_dir` to + # `build_response`, the latter would build a response with all the image + # files in `response_img_dir`. Instead, we need to build the response only # for the files speficied in `img_filepaths`/`img_dir` if img_filepaths is None: - # TODO: this is copied from `build_features` - ideally, we - # should DRY it + # TODO: this is copied from `build_features` - ideally, we should DRY it if img_filename_pattern is None: - img_filename_pattern = ( - settings.IMG_DEFAULT_FILENAME_PATTERN - ) + img_filename_pattern = settings.IMG_DEFAULT_FILENAME_PATTERN if img_dir is None: raise ValueError( "Either `split_df`, `img_filepaths` or `img_dir` must " "be provided" ) - img_filepaths = glob.glob( - path.join(img_dir, img_filename_pattern) - ) + img_filepaths = glob.glob(path.join(img_dir, img_filename_pattern)) response_img_filepaths = [ path.join(response_img_dir, path.basename(img_filepath)) @@ -229,22 +213,22 @@ def train_classifiers(self, split_df, response_img_dir): """ Train a classifier for each first-level cluster in `split_df`. - See the `background `_ example notebook for - more details. + See the `background `_ example notebook for more + details. Parameters ---------- split_df : pandas DataFrame - Data frame with the train/test split, which must have an - `img_cluster` column with the first-level cluster labels. + Data frame with the train/test split, which must have an `img_cluster`. + column with the first-level cluster labels. response_img_dir : str representing path to a directory Path to the directory where the response tiles are located. Returns ------- clf_dict : dictionary - Dictionary mapping a scikit-learn AdaBoostClassifier to each - first-level cluster label + Dictionary mapping a scikit-learn AdaBoostClassifier to each first-level + cluster label. """ if "img_cluster" not in split_df: raise ValueError( @@ -283,39 +267,36 @@ def __init__( """ Initialize the classifier instance. - See the `background `_ example notebook for - more details. + See the `background `_ example notebook for more + details. Parameters ---------- tree_val : int, optional - Label used to denote tree pixels in the predicted images. If no - value is provided, the default value set in - `settings.CLF_DEFAULT_TREE_VAL` will be taken. + Label used to denote tree pixels in the predicted images. If no value is + provided, the default value set in `settings.CLF_DEFAULT_TREE_VAL` will be + taken. nontree_val : int, optional - Label used to denote non-tree pixels in the predicted images. If - no value is provided, the default value set in - `settings.CLF_DEFAULT_NONTREE_VAL` will be taken. + Label used to denote non-tree pixels in the predicted images. If no value is + provided, the default value set in `settings.CLF_DEFAULT_NONTREE_VAL` will + be taken. refine : bool, optional - Whether the pixel-level classification should be refined by - optimizing the consistence between neighboring pixels. If no value - is provided, the default value set in `settings.CLF_DEFAULT_REFINE` - will be taken. + Whether the pixel-level classification should be refined by optimizing the + consistence between neighboring pixels. If no value is provided, the default + value set in `settings.CLF_DEFAULT_REFINE` will be taken. refine_beta : int, optional - Parameter of the refinement procedure that controls the - smoothness of the labelling. Larger values lead to smoother shapes. - If no value is provided, the default value set in - `settings.CLF_DEFAULT_REFINE_BETA` will be taken. + Parameter of the refinement procedure that controls the smoothness of the + labelling. Larger values lead to smoother shapes. If no value is provided, + the default value set in `settings.CLF_DEFAULT_REFINE_BETA` will be taken. refine_int_rescale : int, optional - Parameter of the refinement procedure that controls the precision - of the transformation of float to integer edge weights, required - for the employed graph cuts algorithm. Larger values lead to - greater precision. If no value is provided, the default value set - in `settings.CLF_DEFAULT_REFINE_INT_RESCALE` will be taken. + Parameter of the refinement procedure that controls the precision of the + transformation of float to integer edge weights, required for the employed + graph cuts algorithm. Larger values lead to greater precision. If no value + is provided, the default value set in + `settings.CLF_DEFAULT_REFINE_INT_RESCALE` will be taken. pixel_features_builder_kws : dict, optional - Keyword arguments that will be passed to - `detectree.PixelFeaturesBuilder`, which customize how the pixel - features are built. + Keyword arguments that will be passed to `detectree.PixelFeaturesBuilder`, + which customize how the pixel features are built. """ super(Classifier, self).__init__() @@ -347,24 +328,23 @@ def classify_img(self, img_filepath, clf, output_filepath=None): Parameters ---------- img_filepath : str, file object or pathlib.Path object - Path to a file, URI, file object opened in binary ('rb') mode, or - a Path object representing the image to be classified. The value - will be passed to `rasterio.open` + Path to a file, URI, file object opened in binary ('rb') mode, or a Path + object representing the image to be classified. The value will be passed to + `rasterio.open`. clf : scikit-learn AdaBoostClassifier - Trained classifier + Trained classifier. output_filepath : str, file object or pathlib.Path object, optional - Path to a file, URI, file object opened in binary ('rb') mode, or - a Path object representing where the predicted image is to be - dumped. The value will be passed to `rasterio.open` in 'write' mode + Path to a file, URI, file object opened in binary ('rb') mode, or a Path + object representing where the predicted image is to be dumped. The value + will be passed to `rasterio.open` in 'write' mode. Returns ------- y_pred : numpy ndarray - Array with the pixel responses + Array with the pixel responses. """ - # ACHTUNG: Note that we do not use keyword-only arguments in this - # method because `output_filepath` works as the only "optional" - # argument + # ACHTUNG: Note that we do not use keyword-only arguments in this method because + # `output_filepath` works as the only "optional" argument src = rio.open(img_filepath) img_shape = src.shape @@ -381,17 +361,15 @@ def classify_img(self, img_filepath, clf, output_filepath=None): P_nontree = p_nontree.reshape(img_shape) P_tree = p_tree.reshape(img_shape) - # The AdaBoost probabilities are floats between 0 and 1, and the - # graph cuts algorithm requires an integer representation. - # Therefore, we will multiply the probabilities by an arbitrary - # large number and then transform the result to integers. For - # instance, we could use a `refine_int_rescale` of `100` so that - # the probabilities are rescaled into integers between 0 and 100 - # like percentages). The larger `refine_int_rescale`, the greater - # the precision. - # ACHTUNG: the data term when the pixel is a tree is - # `log(1 - P_tree)`, i.e., `log(P_nontree)`, so the two lines - # below are correct + # The AdaBoost probabilities are floats between 0 and 1, and the graph cuts + # algorithm requires an integer representation. Therefore, we will multiply + # the probabilities by an arbitrary large number and then transform the + # result to integers. For instance, we could use a `refine_int_rescale` of + # `100` so that the probabilities are rescaled into integers between 0 and + # 100 like percentages). The larger `refine_int_rescale`, the greater the + # precision. + # ACHTUNG: the data term when the pixel is a tree is `log(1 - P_tree)`, + # i.e., `log(P_nontree)`, so the two lines below are correct D_tree = (self.refine_int_rescale * np.log(P_nontree)).astype(int) D_nontree = (self.refine_int_rescale * np.log(P_tree)).astype(int) # TODO: option to choose Moore/Von Neumann neighborhood? @@ -406,8 +384,8 @@ def classify_img(self, img_filepath, clf, output_filepath=None): y_pred = np.full(img_shape, self.nontree_val) y_pred[g.get_grid_segments(node_ids)] = self.tree_val - # TODO: make the profile of output rasters more customizable (e.g., via - # the `settings` module) + # TODO: make the profile of output rasters more customizable (e.g., via the + # `settings` module) # output_filepath = path.join(output_dir, # f"tile_{tile_start}-{tile_end}.tif") if output_filepath is not None: @@ -435,13 +413,9 @@ def _classify_imgs(self, img_filepaths, clf, output_dir): # filename, ext = path.splitext(path.basename(img_filepath)) # pred_img_filepath = path.join( # output_dir, f"{filename}-pred{ext}") - pred_img_filepath = path.join( - output_dir, path.basename(img_filepath) - ) + pred_img_filepath = path.join(output_dir, path.basename(img_filepath)) pred_imgs_lazy.append( - dask.delayed(self.classify_img)( - img_filepath, clf, pred_img_filepath - ) + dask.delayed(self.classify_img)(img_filepath, clf, pred_img_filepath) ) pred_img_filepaths.append(pred_img_filepath) @@ -463,11 +437,9 @@ def classify_imgs( """ Use trained classifier(s) to predict tree pixels in multiple images. - Use `clf` or `clf_dict` for the classifier(s) depending on the train/ - test split method, and dump the predicted tree/non-tree images to - `output_dir`. See the `background `_ example - notebook for more details. - + Use `clf` or `clf_dict` for the classifier(s) depending on the train/ test split + method, and dump the predicted tree/non-tree images to `output_dir`. See the + `background `_ example notebook for more details. Parameters ---------- @@ -478,13 +450,12 @@ def classify_imgs( clf : scikit-learn AdaBoostClassifier Trained classifier. clf_dict : dictionary - Dictionary mapping a trained scikit-learn AdaBoostClassifier to - each first-level cluster label. + Dictionary mapping a trained scikit-learn AdaBoostClassifier to each + first-level cluster label. method : {'cluster-I', 'cluster-II'}, optional Method used in the train/test split. img_cluster : int, optional - The label of the cluster of tiles. Only used if `method` is - 'cluster-II'. + The label of the cluster of tiles. Only used if `method` is 'cluster-II'. Returns ------- @@ -499,9 +470,7 @@ def classify_imgs( if method == "cluster-I": if clf is None: - raise ValueError( - "If using 'cluster-I' method, `clf` must be provided" - ) + raise ValueError("If using 'cluster-I' method, `clf` must be provided") return self._classify_imgs( split_df[~split_df["train"]]["img_filepath"], clf, output_dir ) @@ -511,9 +480,7 @@ def classify_imgs( if clf_dict is not None: clf = clf_dict[img_cluster] else: - raise ValueError( - "Either `clf` or `clf_dict` must be provided" - ) + raise ValueError("Either `clf` or `clf_dict` must be provided") return self._classify_imgs( utils.get_img_filepaths(split_df, img_cluster, False), @@ -523,11 +490,11 @@ def classify_imgs( if clf_dict is None: raise ValueError( - "If using 'cluster-II' method and not providing " - "`img_cluster`, `clf_dict` must be provided" + "If using 'cluster-II' method and not providing `img_cluster`, " + "`clf_dict` must be provided" ) pred_imgs = {} - for img_cluster, img_cluster_df in split_df.groupby("img_cluster"): + for img_cluster, _ in split_df.groupby("img_cluster"): pred_imgs[img_cluster] = self._classify_imgs( utils.get_img_filepaths(split_df, img_cluster, False), clf_dict[img_cluster], diff --git a/detectree/cli/main.py b/detectree/cli/main.py index 1e338c4..2e9dc43 100644 --- a/detectree/cli/main.py +++ b/detectree/cli/main.py @@ -11,9 +11,8 @@ # utils for the CLI class _OptionEatAll(click.Option): - # Option that can take an unlimided number of arguments - # Copied from Stephen Rauch's answer in stack overflow. - # https://bit.ly/2kstLhe + # Option that can take an unlimided number of arguments Copied from Stephen Rauch's + # answer in stack overflow. https://bit.ly/2kstLhe def __init__(self, *args, **kwargs): self.save_other_options = kwargs.pop("save_other_options", True) nargs = kwargs.pop("nargs", -1) @@ -46,9 +45,7 @@ def parser_process(value, state): retval = super(_OptionEatAll, self).add_to_parser(parser, ctx) for name in self.opts: - our_parser = parser._long_opt.get(name) or parser._short_opt.get( - name - ) + our_parser = parser._long_opt.get(name) or parser._short_opt.get(name) if our_parser: self._eat_all_parser = our_parser self._previous_parser_process = our_parser.process @@ -337,9 +334,7 @@ def classify_img( """Use a trained classifier to predict tree pixels in an image.""" logger = ctx.obj["LOGGER"] - logger.info( - "Classifying %s with classifier of %s", img_filepath, clf_filepath - ) + logger.info("Classifying %s with classifier of %s", img_filepath, clf_filepath) pixel_features_builder_kws = _dict_from_kws(pixel_features_builder_kws) c = dtr.Classifier( diff --git a/detectree/filters.py b/detectree/filters.py index e6325b8..be52ab8 100644 --- a/detectree/filters.py +++ b/detectree/filters.py @@ -1,12 +1,46 @@ """Utilities to produce filters.""" import numpy as np -from scipy.ndimage.filters import _gaussian_kernel1d from skimage.filters import gabor_kernel __all__ = ["get_texture_kernel", "get_gabor_filter_bank"] +def _gaussian_kernel1d(sigma, order, radius): + """ + Compute a 1-D Gaussian convolution kernel. + + From https://github.com/scipy/scipy/blob/v1.9.2/scipy/ndimage/_filters.py#L179-L207 + Copying it here since it is not part of scipy's public API. + See https://github.com/martibosch/detectree/issues/12 + """ + if order < 0: + raise ValueError("order must be non-negative") + exponent_range = np.arange(order + 1) + sigma2 = sigma * sigma + x = np.arange(-radius, radius + 1) + phi_x = np.exp(-0.5 / sigma2 * x**2) + phi_x = phi_x / phi_x.sum() + + if order == 0: + return phi_x + else: + # f(x) = q(x) * phi(x) = q(x) * exp(p(x)) + # f'(x) = (q'(x) + q(x) * p'(x)) * phi(x) + # p'(x) = -1 / sigma ** 2 + # Implement q'(x) + q(x) * p'(x) as a matrix operator and apply to the + # coefficients of q(x) + q = np.zeros(order + 1) + q[0] = 1 + D = np.diag(exponent_range[1:], 1) # D @ q(x) = q'(x) + P = np.diag(np.ones(order) / -sigma2, -1) # P @ q(x) = q(x) * p'(x) + Q_deriv = D + P + for _ in range(order): + q = Q_deriv.dot(q) + q = (x[:, None] ** exponent_range).dot(q) + return q * phi_x + + def _get_gaussian_kernel1d(sigma, *, order=0, truncate=4.0): """Based on scipy.ndimage.filters.gaussian_filter1d.""" sd = float(sigma) diff --git a/detectree/image_descriptor.py b/detectree/image_descriptor.py index e4c937f..d614987 100644 --- a/detectree/image_descriptor.py +++ b/detectree/image_descriptor.py @@ -14,42 +14,36 @@ ] -def compute_image_descriptor( - img_rgb, kernels, response_bins_per_axis, num_color_bins -): +def compute_image_descriptor(img_rgb, kernels, response_bins_per_axis, num_color_bins): """ Compute a GIST descriptor for an RGB image array. - See the `background `_ example notebook for more - details. + See the `background `_ example notebook for more details. Parameters ---------- img_rgb : array-like - The image in RGB format, i.e., in a 3-D array + The image in RGB format, i.e., in a 3-D array. kernels : list-like - List of kernel 2-D arrays that correspond to the filter bank + List of kernel 2-D arrays that correspond to the filter bank. response_bins_per_axis : int - Number of spatial bins per axis into which the responses to the filter - bank will be aggreated. For example, a value of 2 will aggregate the - responses into the four quadrants of the image (i.e., 2x2, 2 bins in - each axis of the image). + Number of spatial bins per axis into which the responses to the filter bank will + be aggreated. For example, a value of 2 will aggregate the responses into the + four quadrants of the image (i.e., 2x2, 2 bins in each axis of the image). num_color_bins : int - Number of color bins per axis of the L*a*b color space with which - the joint color histogram will be computed + Number of color bins per axis of the L*a*b color space with which the joint + color histogram will be computed. Returns ------- img_descr : array-like - Vector representing GIST descriptor of `img_rgb` + Vector representing GIST descriptor of `img_rgb`. """ # gist descriptor - num_blocks = response_bins_per_axis ** 2 + num_blocks = response_bins_per_axis**2 gist_descr = np.zeros(len(kernels) * num_blocks) img_gray = color.rgb2gray(img_rgb) - block_shape = tuple( - size // response_bins_per_axis for size in img_gray.shape - ) + block_shape = tuple(size // response_bins_per_axis for size in img_gray.shape) divides_evenly = True for size in img_gray.shape: if size % response_bins_per_axis != 0: @@ -73,14 +67,11 @@ def compute_image_descriptor( # color descriptor img_lab = color.rgb2lab(img_rgb) - img_lab_dn = img_lab.reshape( - img_lab.shape[0] * img_lab.shape[1], img_lab.shape[2] - ) + img_lab_dn = img_lab.reshape(img_lab.shape[0] * img_lab.shape[1], img_lab.shape[2]) H, _ = np.histogramdd(img_lab_dn, bins=num_color_bins) color_descr = H.flatten() - # normalize the gist and color descriptors to the l1 norm and concatenate - # them + # normalize the gist and color descriptors to the l1 norm and concatenate them img_descr = np.concatenate( [ preprocessing.normalize(row.reshape(1, -1), norm="l1").flatten() @@ -97,30 +88,28 @@ def compute_image_descriptor_from_filepath( """ Compute a GIST descriptor for RGB image file. - See the `background `_ example notebook for more - details. + See the `background `_ example notebook for more details. Parameters ---------- img_filepath : str, file object or pathlib.Path object - Path to a file, URI, file object opened in binary ('rb') mode, or a - Path object representing the image for which a GIST descriptor will be - computed. The value will be passed to `rasterio.open`. + Path to a file, URI, file object opened in binary ('rb') mode, or a Path object + representing the image for which a GIST descriptor will be computed. The value + will be passed to `rasterio.open`. kernels : list-like - List of kernel 2-D arrays that correspond to the filter bank + List of kernel 2-D arrays that correspond to the filter bank. response_bins_per_axis : int - Number of spatial bins per axis into which the responses to the filter - bank will be aggreated. For example, a value of 2 will aggregate the - responses into the four quadrants of the image (i.e., 2x2, 2 bins in - each axis of the image). + Number of spatial bins per axis into which the responses to the filter bank will + be aggreated. For example, a value of 2 will aggregate the responses into the + four quadrants of the image (i.e., 2x2, 2 bins in each axis of the image). num_color_bins : int - Number of color bins per axis of the L*a*b color space with which - the joint color histogram will be computed + Number of color bins per axis of the L*a*b color space with which the joint + color histogram will be computed. Returns ------- img_descr : array-like - Vector representing GIST descriptor of `img_rgb` + Vector representing GIST descriptor of `img_rgb`. """ img_rgb = utils.img_rgb_from_filepath(img_filepath) return compute_image_descriptor( diff --git a/detectree/lidar.py b/detectree/lidar.py index 3e71cd7..b68f766 100644 --- a/detectree/lidar.py +++ b/detectree/lidar.py @@ -15,17 +15,16 @@ def rasterize_lidar(lidar_filepath, lidar_tree_values, ref_img_filepath): """Rasterize a LiDAR file. - Transforms a LiDAR file into a raster aligned to `ref_img_filepath`, where - each pixel of the target raster represents the number of LiDAR points of - the classes set in `lidar_tree_values` that occur in the pixel's geographic - extent. + Transforms a LiDAR file into a raster aligned to `ref_img_filepath`, where each + pixel of the target raster represents the number of LiDAR points of the classes set + in `lidar_tree_values` that occur in the pixel's geographic extent. Parameters ---------- lidar_filepath : str, file object or pathlib.Path object - Path to a file, URI, file object opened in binary ('rb') mode, or a - Path object representing the LiDAR file from which a tree canopy mask - will be computed. The value will be passed to `laspy.file.File`. + Path to a file, URI, file object opened in binary ('rb') mode, or a Path object + representing the LiDAR file from which a tree canopy mask will be computed. The + value will be passed to `laspy.file.File`. lidar_tree_values : int or list-like LiDAR point classes that correspond to trees. ref_img_filepath : str, file object or pathlib.Path object @@ -34,7 +33,7 @@ def rasterize_lidar(lidar_filepath, lidar_tree_values, ref_img_filepath): Returns ------- lidar_arr : numpy ndarray - Array with the rasterized lidar + Array with the rasterized lidar. """ las = laspy.read(lidar_filepath) c = np.array(las.classification) @@ -79,17 +78,15 @@ def __init__( ---------- tree_threshold : numeric, optional Threshold of lidar points classified as tree by pixel at which - point the pixel is considered a tree. As a rule of thumb, the value - can be set to result of dividing the point density of the lidar - (e.g., pts/m^2) by the pixel area (e.g., m^2). + point the pixel is considered a tree. As a rule of thumb, the value can be + set to result of dividing the point density of the lidar (e.g., pts/m^2) by + the pixel area (e.g., m^2). output_dtype : str or numpy dtype, optional The desired data type of the output raster canopy masks. output_tree_val : int, optional - The value that designates tree pixels in the output raster canopy - masks. + The value that designates tree pixels in the output raster canopy masks. output_nodata : int, optional - The value that designates non-tree pixels in the output raster - canopy masks. + The value that designates non-tree pixels in the output raster canopy masks. """ if tree_threshold is None: tree_threshold = settings.LIDAR_TREE_THRESHOLD @@ -122,23 +119,21 @@ def to_canopy_mask( Parameters ---------- lidar_filepath : str, file object or pathlib.Path object - Path to a file, URI, file object opened in binary ('rb') mode, or a - Path object representing the LiDAR file from which a tree canopy - mask will be computed. The value will be passed to - `laspy.file.File`. + Path to a file, URI, file object opened in binary ('rb') mode, or a Path + object representing the LiDAR file from which a tree canopy mask will be + computed. The value will be passed to `laspy.file.File`. lidar_tree_values : int or list-like - LiDAR point classes that correspond to trees + LiDAR point classes that correspond to trees. ref_img_filepath : str, file object or pathlib.Path object - Reference raster image to which the LiDAR data will be rasterized + Reference raster image to which the LiDAR data will be rasterized. output_filepath : str, file object or pathlib.Path object, optional - Path to a file, URI, file object opened in binary ('rb') mode, or - a Path object representing where the predicted image is to be - dumped. The value will be passed to `rasterio.open` in 'write' - mode. + Path to a file, URI, file object opened in binary ('rb') mode, or a Path + object representing where the predicted image is to be dumped. The value + will be passed to `rasterio.open` in 'write' mode. postprocess_func : function - Post-processing function which takes as input the rasterized lidar - as a boolean ndarray and returns a the post-processed lidar also as - a boolean ndarray. + Post-processing function which takes as input the rasterized lidar as a + boolean ndarray and returns a the post-processed lidar also as a boolean + ndarray. postprocess_func_args : list-like, optional Arguments to be passed to `postprocess_func`. postprocess_func_kws : dict, optional @@ -154,9 +149,7 @@ def to_canopy_mask( # iterations=self.num_opening_iterations), # iterations=self.num_dilation_iterations).astype( # self.output_dtype) * self.output_tree_val - lidar_arr = rasterize_lidar( - lidar_filepath, lidar_tree_values, ref_img_filepath - ) + lidar_arr = rasterize_lidar(lidar_filepath, lidar_tree_values, ref_img_filepath) canopy_arr = lidar_arr >= self.tree_threshold if postprocess_func is not None: canopy_arr = postprocess_func( @@ -169,9 +162,7 @@ def to_canopy_mask( if output_filepath is not None: with rio.open(ref_img_filepath) as src: meta = src.meta.copy() - meta.update( - dtype=self.output_dtype, count=1, nodata=self.output_nodata - ) + meta.update(dtype=self.output_dtype, count=1, nodata=self.output_nodata) with rio.open(output_filepath, "w", **meta) as dst: dst.write(canopy_arr, 1) diff --git a/detectree/pixel_features.py b/detectree/pixel_features.py index 6d0ecde..586fbb2 100644 --- a/detectree/pixel_features.py +++ b/detectree/pixel_features.py @@ -54,45 +54,41 @@ def __init__( """ Initialize the pixel feature builder. - See the `background `_ example notebook for - more details. + See the `background `_ example notebook for more + details. Parameters ---------- sigmas : list-like, optional - The list of scale parameters (sigmas) to build the Gaussian filter - bank that will be used to compute the pixel-level features. The - provided argument will be passed to the initialization method of - the `PixelFeaturesBuilder` class. If no value is provided, the - default value set in `settings.GAUSS_DEFAULT_SIGMAS` will be taken. + The list of scale parameters (sigmas) to build the Gaussian filter bank that + will be used to compute the pixel-level features. The provided argument will + be passed to the initialization method of the `PixelFeaturesBuilder` + class. If no value is provided, the default value set in + `settings.GAUSS_DEFAULT_SIGMAS` will be taken. num_orientations : int, optional - The number of equally-distributed orientations to build the - Gaussian filter bank that will be used to compute the pixel-level - features. The provided argument will be passed to the - initialization method of the `PixelFeaturesBuilder` class. If no - value is provided, the default value set in - `settings.GAUSS_DEFAULT_NUM_ORIENTATIONS` will be taken. + The number of equally-distributed orientations to build the Gaussian filter + bank that will be used to compute the pixel-level features. The provided + argument will be passed to the initialization method of the + `PixelFeaturesBuilder` class. If no value is provided, the default value set + in `settings.GAUSS_DEFAULT_NUM_ORIENTATIONS` will be taken. neighborhood : array-like, optional - The base neighborhood structure that will be used to compute the - entropy features. The provided argument will be passed to the - initialization method of the `PixelFeaturesBuilder` class. If no - value is provided, a square with a side size of - `2 * min_neighborhood_range + 1` will be used. + The base neighborhood structure that will be used to compute the entropy + features. The provided argument will be passed to the initialization method + of the `PixelFeaturesBuilder` class. If no value is provided, a square with + a side size of `2 * min_neighborhood_range + 1` will be used. min_neighborhood_range : int, optional - The range (i.e., the square radius) of the smallest neigbhorhood - window that will be used to compute the entropy features. The - provided argument will be passed to the initialization method of - the `PixelFeaturesBuilder` class. If no value is provided, the - default value set in + The range (i.e., the square radius) of the smallest neigbhorhood window that + will be used to compute the entropy features. The provided argument will be + passed to the initialization method of the `PixelFeaturesBuilder` class. If + no value is provided, the default value set in `settings.ENTROPY_DEFAULT_MIN_NEIGHBORHOOD_RANGE` will be taken. num_neighborhoods : int, optional The number of neigbhorhood windows (whose size follows a geometric - progression starting at `min_neighborhood_range`) that will be - used to compute the entropy features. The provided argument will - be passed to the initialization method of the - `PixelFeaturesBuilder` class. If no value is provided, the default - value set in `settings.ENTROPY_DEFAULT_NUM_NEIGHBORHOODS` will be - taken. + progression starting at `min_neighborhood_range`) that will be used to + compute the entropy features. The provided argument will be passed to the + initialization method of the `PixelFeaturesBuilder` class. If no value is + provided, the default value set in + `settings.ENTROPY_DEFAULT_NUM_NEIGHBORHOODS` will be taken. """ # preprocess technical keyword arguments # texture features @@ -121,9 +117,7 @@ def __init__( # num_neighborhoods = len(neighborhoods) if neighborhood is None: if min_neighborhood_range is None: - min_neighborhood_range = ( - settings.ENTROPY_DEFAULT_MIN_NEIGHBORHOOD_RANGE - ) + min_neighborhood_range = settings.ENTROPY_DEFAULT_MIN_NEIGHBORHOOD_RANGE neighborhood = morphology.square(2 * min_neighborhood_range + 1) self.neighborhood = neighborhood if num_neighborhoods is None: @@ -156,8 +150,8 @@ def build_features_from_arr(self, img_rgb): responses : numpy ndarray Array with the pixel responses """ - # the third component `_` is actually the number of channels in RGB, - # which is already defined in the constant `NUM_RGB_CHANNELS` + # the third component `_` is actually the number of channels in RGB, which is + # already defined in the constant `NUM_RGB_CHANNELS` num_rows, num_cols, _ = img_rgb.shape num_pixels = num_rows * num_cols img_lab = color.rgb2lab(img_rgb) @@ -175,9 +169,7 @@ def build_features_from_arr(self, img_rgb): A, np.log(np.dot(B, img_xyz_vec.transpose()) + 1) ).transpose() X[:, :NUM_LAB_CHANNELS] = img_lab_vec - X[ - :, NUM_LAB_CHANNELS : NUM_LAB_CHANNELS + NUM_ILL_CHANNELS - ] = img_ill_vec + X[:, NUM_LAB_CHANNELS : NUM_LAB_CHANNELS + NUM_ILL_CHANNELS] = img_ill_vec # texture features # tpf.compute_texture_features(X_img[:, self.texture_slice], @@ -187,9 +179,7 @@ def build_features_from_arr(self, img_rgb): for j, orientation in enumerate(range(self.num_orientations)): # theta = orientation / num_orientations * np.pi theta = orientation * 180 / self.num_orientations - oriented_kernel_arr = ndi.interpolation.rotate( - base_kernel_arr, theta - ) + oriented_kernel_arr = ndi.interpolation.rotate(base_kernel_arr, theta) img_filtered = ndi.convolve(img_lab_l, oriented_kernel_arr) img_filtered_vec = img_filtered.flatten() X[ @@ -209,9 +199,7 @@ def build_features_from_arr(self, img_rgb): transform.downscale_local_mean(img_lab_l, (factor, factor)), img_lab_l.shape, ).astype(np.uint16) - X[:, entropy_start + i] = rank.entropy( - img, self.neighborhood - ).flatten() + X[:, entropy_start + i] = rank.entropy(img, self.neighborhood).flatten() return X @@ -222,9 +210,9 @@ def build_features_from_filepath(self, img_filepath): Parameters ---------- img_filepath : str, file object or pathlib.Path object - Path to a file, URI, file object opened in binary ('rb') mode, or - a Path object to the RGB image for which the features will be - computed. The value will be passed to `rasterio.open`. + Path to a file, URI, file object opened in binary ('rb') mode, or a Path + object to the RGB image for which the features will be computed. The value + will be passed to `rasterio.open`. Returns ------- @@ -250,26 +238,24 @@ def build_features( Parameters ---------- split_df : pd.DataFrame - Data frame + Data frame with the train/test split. img_filepaths : list of image file paths, optional - List of images to be transformed into features. Alternatively, the - same information can be provided by means of the `img_dir` and - `img_filename_pattern` keyword arguments. Ignored if providing - `split_df` + List of images to be transformed into features. Alternatively, the same + information can be provided by means of the `img_dir` and + `img_filename_pattern` keyword arguments. Ignored if providing `split_df`. img_dir : str representing path to a directory, optional Path to the directory where the images whose filename matches `img_filename_pattern` are to be located. Ignored if `split_df` or `img_filepaths` is provided. img_filename_pattern : str representing a file-name pattern, optional - Filename pattern to be matched in order to obtain the list of - images. If no value is provided, the default value set in - `settings.IMG_DEFAULT_FILENAME_PATTERN` will be taken. Ignored if - `split_df` or `img_filepaths` is provided. + Filename pattern to be matched in order to obtain the list of images. If no + value is provided, the default value set in + `settings.IMG_DEFAULT_FILENAME_PATTERN` will be taken. Ignored if `split_df` + or `img_filepaths` is provided. method : {'cluster-I', 'cluster-II'}, optional Method used in the train/test split img_cluster : int, optional - The label of the cluster of images. Only used if `method` is - 'cluster-II' + The label of the cluster of images. Only used if `method` is 'cluster-II'. Returns ------- X : numpy ndarray @@ -289,28 +275,21 @@ def build_features( else: if img_cluster is None: raise ValueError( - "If `method` is 'cluster-II', `img_cluster` must be " - "provided" + "If `method` is 'cluster-II', `img_cluster` must be " "provided" ) - img_filepaths = utils.get_img_filepaths( - split_df, img_cluster, True - ) + img_filepaths = utils.get_img_filepaths(split_df, img_cluster, True) else: if img_filepaths is None: if img_filename_pattern is None: - img_filename_pattern = ( - settings.IMG_DEFAULT_FILENAME_PATTERN - ) + img_filename_pattern = settings.IMG_DEFAULT_FILENAME_PATTERN if img_dir is None: raise ValueError( "Either `split_df`, `img_filepaths` or `img_dir` must " "be provided" ) - img_filepaths = glob.glob( - path.join(img_dir, img_filename_pattern) - ) + img_filepaths = glob.glob(path.join(img_dir, img_filename_pattern)) values = [ dask.delayed(self.build_features_from_filepath)(img_filepath) diff --git a/detectree/pixel_response.py b/detectree/pixel_response.py index b601589..c1b8275 100644 --- a/detectree/pixel_response.py +++ b/detectree/pixel_response.py @@ -17,14 +17,14 @@ class NonBinaryResponseError(Exception): class PixelResponseBuilder(object): """Customize how pixel responses (tree/non-tree labels) are computed.""" - # It is really not necessary to use a class for this, but we do so for the - # sake of API consistency with the `pixel_features` module + # It is really not necessary to use a class for this, but we do so for the sake of + # API consistency with the `pixel_features` module def __init__(self, *, tree_val=None, nontree_val=None): """ Initialize the pixel response builder. - See the `background `_ example notebook for - more details. + See the `background `_ example notebook for more + details. Parameters ---------- @@ -59,8 +59,8 @@ def build_response_from_arr(self, img_binary): response_arr[response_arr == self.tree_val] = 1 response_arr[response_arr == self.nontree_val] = 0 - # check that the provided `img_binary` is actually binary, i.e., - # consists only of `tree_val` and `nontree_val` values + # check that the provided `img_binary` is actually binary, i.e., consists only + # of `tree_val` and `nontree_val` values if ((response_arr != 0) & (response_arr != 1)).any(): raise NonBinaryResponseError @@ -73,10 +73,9 @@ def build_response_from_filepath(self, img_filepath): Parameters ---------- img_filepath : str, file object or pathlib.Path object - Path to a file, URI, file object opened in binary ('rb') mode, or - a Path object representing the binary (tree/non-tree) image to be - transformed into the response. The value will be passed to - `rasterio.open`. + Path to a file, URI, file object opened in binary ('rb') mode, or a Path + object representing the binary (tree/non-tree) image to be transformed into + the response. The value will be passed to `rasterio.open`. Returns ------- @@ -111,33 +110,30 @@ def build_response( Parameters ---------- split_df : pd.DataFrame - Data frame + Data frame with the train/test split. response_img_dir : str representing path to a directory, optional - Path to the directory where the response images are located. - Required if providing `split_df`. Otherwise `response_img_dir` - might either be ignored if providing `response_img_filepaths`, or - be used as the directory where the images whose filename matches - `img_filename_pattern` are to be located. + Path to the directory where the response images are located. Required if + providing `split_df`. Otherwise `response_img_dir` might either be ignored + if providing `response_img_filepaths`, or be used as the directory where the + images whose filename matches `img_filename_pattern` are to be located. response_img_filepaths : list of image file paths, optional - List of images to be transformed into the response. Alternatively, - the same information can be provided by means of the `img_dir` and - `img_filename_pattern` keyword arguments. Ignored if providing - `split_df` + List of images to be transformed into the response. Alternatively, the same + information can be provided by means of the `img_dir` and + `img_filename_pattern` keyword arguments. Ignored if providing `split_df`. img_filename_pattern : str representing a file-name pattern, optional - Filename pattern to be matched in order to obtain the list of - images. If no value is provided, the default value set in - `settings.IMG_DEFAULT_FILENAME_PATTERN` will be taken. Ignored if - `split_df` or `img_filepaths` is provided. + Filename pattern to be matched in order to obtain the list of images. If no + value is provided, the default value set in + `settings.IMG_DEFAULT_FILENAME_PATTERN` will be taken. Ignored if `split_df` + or `img_filepaths` is provided. method : {'cluster-I', 'cluster-II'}, optional - Method used in the train/test split + Method used in the train/test split. img_cluster : int, optional - The label of the cluster of images. Only used if `method` is - 'cluster-II' + The label of the cluster of images. Only used if `method` is 'cluster-II'. Returns ------- responses : numpy ndarray - Array with the pixel responses + Array with the pixel responses. """ if split_df is not None: if response_img_dir is None: @@ -156,24 +152,17 @@ def build_response( else: if img_cluster is None: raise ValueError( - "If `method` is 'cluster-II', `img_cluster` must be " - "provided" + "If `method` is 'cluster-II', `img_cluster` must be " "provided" ) - img_filepaths = utils.get_img_filepaths( - split_df, img_cluster, True - ) + img_filepaths = utils.get_img_filepaths(split_df, img_cluster, True) response_img_filepaths = img_filepaths.apply( - lambda filepath: path.join( - response_img_dir, path.basename(filepath) - ) + lambda filepath: path.join(response_img_dir, path.basename(filepath)) ) else: if response_img_filepaths is None: if img_filename_pattern is None: - img_filename_pattern = ( - settings.IMG_DEFAULT_FILENAME_PATTERN - ) + img_filename_pattern = settings.IMG_DEFAULT_FILENAME_PATTERN if response_img_dir is None: raise ValueError( "Either `split_df`, `response_img_filepaths` or " @@ -188,8 +177,6 @@ def build_response( # no need for dask here values = [] for response_img_filepath in response_img_filepaths: - values.append( - self.build_response_from_filepath(response_img_filepath) - ) + values.append(self.build_response_from_filepath(response_img_filepath)) return np.vstack(values).flatten() diff --git a/detectree/train_test_split.py b/detectree/train_test_split.py index b129444..2bcea47 100644 --- a/detectree/train_test_split.py +++ b/detectree/train_test_split.py @@ -31,49 +31,47 @@ def __init__( """ Initialize the training selector. - The arguments provided to the initialization method will determine how - the image descriptors are computed. See the `background - `_ example notebook for more details. + The arguments provided to the initialization method will determine how the image + descriptors are computed. See the `background `_ example + notebook for more details. Parameters ---------- img_filepaths : list-like, optional - List of paths to the input tiles whose features will be used to - train the classifier. + List of paths to the input tiles whose features will be used to train the + classifier. img_dir : str representing path to a directory, optional Path to the directory where the images whose filename matches - `img_filename_pattern` are to be located. Ignored if - `img_filepaths` is provided. + `img_filename_pattern` are to be located. Ignored if `img_filepaths` is + provided. img_filename_pattern : str representing a file-name pattern, optional - Filename pattern to be matched in order to obtain the list of - images. If no value is provided, the default value set in + Filename pattern to be matched in order to obtain the list of images. If no + value is provided, the default value set in `settings.IMG_DEFAULT_FILENAME_PATTERN` will be taken. Ignored if `img_filepaths` is provided. gabor_frequencies : tuple, optional - Set of frequencies used to build the Gabor filter bank. If no value - is provided (default), the value will be taken from + Set of frequencies used to build the Gabor filter bank. If no value is + provided (default), the value will be taken from `settings.GIST_DEFAULT_GABOR_FREQUENCIES`. gabor_num_orientations : int or tuple, optional - Number of orientations used to build the Gabor filter bank. If an - integer is provided, the corresponding number of orientations will - be used for each scale (determined by `gabor_frequencies`). If a - tuple is provided, each element will determine the number of - orientations that must be used at its matching scale (determined - by `gabor_frequencies`) - thus the tuple must match the length of - `gabor_frequencies`. If no value is provided (default), the value - will be taken from `seettings.GIST_DEFAULT_GABOR_NUM_ORIENTATIONS`. + Number of orientations used to build the Gabor filter bank. If an integer is + provided, the corresponding number of orientations will be used for each + scale (determined by `gabor_frequencies`). If a tuple is provided, each + element will determine the number of orientations that must be used at its + matching scale (determined by `gabor_frequencies`) - thus the tuple must + match the length of `gabor_frequencies`. If no value is provided (default), + the value will be taken from + `seettings.GIST_DEFAULT_GABOR_NUM_ORIENTATIONS`. response_bins_per_axis : int, optional - Number of spatial bins per axis into which the responses to the - Gabor filter bank will be aggreated. For example, a value of 2 - will aggregate the responses into the four quadrants of the image - (i.e., 2x2, 2 bins in each axis of the image). If no value is - provided (default), the value will be taken from - `seettings.GIST_DEFAULT_RESPONSE_BINS_PER_AXIS`. + Number of spatial bins per axis into which the responses to the Gabor filter + bank will be aggreated. For example, a value of 2 will aggregate the + responses into the four quadrants of the image (i.e., 2x2, 2 bins in each + axis of the image). If no value is provided (default), the value will be + taken from `seettings.GIST_DEFAULT_RESPONSE_BINS_PER_AXIS`. num_color_bins : int, optional - Number of bins in each dimension used to compute a joint color - histogram in the L*a*b color space. If no value is provided - (default), the value will be taken from - `seettings.GIST_DEFAULT_NUM_COLOR_BINS`. + Number of bins in each dimension used to compute a joint color histogram in + the L*a*b color space. If no value is provided (default), the value will be + taken from `seettings.GIST_DEFAULT_NUM_COLOR_BINS`. """ super(TrainingSelector, self).__init__() @@ -83,13 +81,9 @@ def __init__( if gabor_frequencies is None: gabor_frequencies = settings.GIST_DEFAULT_GABOR_FREQUENCIES if gabor_num_orientations is None: - gabor_num_orientations = ( - settings.GIST_DEFAULT_GABOR_NUM_ORIENTATIONS - ) + gabor_num_orientations = settings.GIST_DEFAULT_GABOR_NUM_ORIENTATIONS if response_bins_per_axis is None: - response_bins_per_axis = ( - settings.GIST_DEFAULT_RESPONSE_BINS_PER_AXIS - ) + response_bins_per_axis = settings.GIST_DEFAULT_RESPONSE_BINS_PER_AXIS if num_color_bins is None: num_color_bins = settings.GIST_DEFAULT_NUM_COLOR_BINS @@ -134,9 +128,7 @@ def descr_feature_matrix(self): # for img_filepath in self.img_filepaths # ] values = [ - dask.delayed( - image_descriptor.compute_image_descriptor_from_filepath - )( + dask.delayed(image_descriptor.compute_image_descriptor_from_filepath)( img_filepath, kernels, self.response_bins_per_axis, @@ -150,13 +142,12 @@ def descr_feature_matrix(self): self._descr_feature_matrix = np.vstack(feature_rows) - # TODO: cache as instance attribute (or even use property with and - # pass this method's arguments to init), and then let people - # interactively choose the number of PCA components until they're - # happy with the represented variance? I vote yes. - # TODO: cache this (via persistence): if `img_filepaths` and the - # technical parameters coincide, load from a file instead of - # recomputing it + # TODO: cache as instance attribute (or even use property with and pass this + # method's arguments to init), and then let people interactively choose the + # number of PCA components until they're happy with the represented + # variance? I vote yes. + # TODO: cache this (via persistence): if `img_filepaths` and the technical + # parameters coincide, load from a file instead of recomputing it # TODO: return copy? return self._descr_feature_matrix @@ -178,16 +169,15 @@ def train_test_split( Parameters ---------- method : {'cluster-I', 'cluster-II'}, optional (default 'cluster-II') - Method used in the train/test split + Method used in the train/test split. num_components : int, optional (default 12) - Number of principal components into which the image descriptors - should be represented when applying the *k*-means clustering. + Number of principal components into which the image descriptors should be + represented when applying the *k*-means clustering. num_img_clusters : int, optional (default 4) - Number of first-level image clusters of the 'cluster-II' `method`. - Ignored if `method` is 'cluster-I'. + Number of first-level image clusters of the 'cluster-II' `method`. Ignored + if `method` is 'cluster-I'. train_prop : float, optional - Overall proportion of images/tiles that must be selected for - training. + Overall proportion of images/tiles that must be selected for training. return_evr : bool, optional (default False) Whether the explained variance ratio of the principal component analysis should be returned @@ -195,9 +185,9 @@ def train_test_split( Returns ------- split_df : pandas.DataFrame - The train/test split data frame + The train/test split data frame. evr : numeric, optional - Expected variance ratio of the principal component analysis + Expected variance ratio of the principal component analysis. """ X = self.descr_feature_matrix pca = decomposition.PCA(n_components=num_components).fit(X) @@ -213,9 +203,9 @@ def train_test_split( ) if method == "cluster-I": - km = cluster.KMeans( - n_clusters=int(np.ceil(train_prop * len(df))) - ).fit(X_pca) + km = cluster.KMeans(n_clusters=int(np.ceil(train_prop * len(df)))).fit( + X_pca + ) closest, _ = metrics.pairwise_distances_argmin_min( km.cluster_centers_, df[X_cols] ) @@ -226,24 +216,19 @@ def train_test_split( def cluster_train_test_split(img_cluster_ser): X_cluster_df = df.loc[img_cluster_ser.index, X_cols] - # use `ceil` to avoid zeros, which might completely ignore a - # significant image cluster + # use `ceil` to avoid zeros, which might completely ignore a significant + # image cluster num_train = int(np.ceil(train_prop * len(X_cluster_df))) - cluster_km = cluster.KMeans(n_clusters=num_train).fit( - X_cluster_df - ) + cluster_km = cluster.KMeans(n_clusters=num_train).fit(X_cluster_df) closest, _ = metrics.pairwise_distances_argmin_min( cluster_km.cluster_centers_, X_cluster_df ) train_idx = X_cluster_df.iloc[closest].index - return [ - True if i in train_idx else False - for i in X_cluster_df.index - ] - - df["img_cluster"] = cluster.KMeans( - n_clusters=num_img_clusters - ).fit_predict(X_pca) + return [True if i in train_idx else False for i in X_cluster_df.index] + + df["img_cluster"] = cluster.KMeans(n_clusters=num_img_clusters).fit_predict( + X_pca + ) df["train"] = df.groupby("img_cluster")["img_cluster"].transform( cluster_train_test_split ) diff --git a/detectree/utils.py b/detectree/utils.py index be27550..3d616f1 100644 --- a/detectree/utils.py +++ b/detectree/utils.py @@ -41,28 +41,27 @@ def split_into_tiles( Parameters ---------- input_filepath : str, file object or pathlib.Path object - Path to a file, URI, file object opened in binary ('rb') mode, or a - Path object representing the image to be classified. The value will be - passed to `rasterio.open` + Path to a file, URI, file object opened in binary ('rb') mode, or a Path object + representing the image to be classified. The value will be passed to + `rasterio.open` output_dir : str or pathlib.Path object - Path to the directory where the predicted images are to be dumped + Path to the directory where the predicted images are to be dumped. tile_width : int, optional - Tile width in pixels. If no value is provided (default), the value - will be taken from `settings.TILE_DEFAULT_WIDTH`. + Tile width in pixels. If no value is provided (default), the value will be taken + from `settings.TILE_DEFAULT_WIDTH`. tile_height : int, optional - Tile height in pixels. If no value is provided (default), the value - will be taken from `settings.TILE_DEFAULT_HEIGHT`. + Tile height in pixels. If no value is provided (default), the value will be + taken from `settings.TILE_DEFAULT_HEIGHT`. output_filename : str, optional - Template to be string-formatted in order to name the output tiles. If - no value is provided (default), the value will be taken from + Template to be string-formatted in order to name the output tiles. If no value + is provided (default), the value will be taken from `settings.TILE_DEFAULT_OUTPUT_FILENAME`. only_full_tiles : bool, optional (default False) - Whether only full tiles (of size `tile_width`x`tile_height`) should be - dumped. + Whether only full tiles (of size `tile_width`x`tile_height`) should be dumped. keep_empty_tiles : bool, optional (default False) - Whether tiles containing only no-data pixels should be dumped + Whether tiles containing only no-data pixels should be dumped. custom_meta : dict, optional - Custom meta data for the output tiles + Custom meta data for the output tiles. Returns ------- @@ -105,19 +104,16 @@ def _get_window_transform(width, height): if tqdm is not None: iterator = tqdm(iterator) - # tests whether a given tile should be dumped or not. Since there are - # two possible tests that depend on the arguments provided by the user, - # we will use a list of tests and then check whether any test must be - # applied. This mechanism avoids having to check whether tests must be - # applied at each iteration (see the if/else at the end of this - # function). + # tests whether a given tile should be dumped or not. Since there are two + # possible tests that depend on the arguments provided by the user, we will use + # a list of tests and then check whether any test must be applied. This + # mechanism avoids having to check whether tests must be applied at each + # iteration (see the if/else at the end of this function). tests = [] if only_full_tiles: def test_full_tile(window): - return ( - window.width == tile_width and window.height == tile_height - ) + return window.width == tile_width and window.height == tile_height tests.append(test_full_tile) @@ -133,9 +129,7 @@ def inner_loop(window, transform): meta["width"], meta["height"] = window.width, window.height output_filepath = path.join( output_dir, - output_filename.format( - int(window.col_off), int(window.row_off) - ), + output_filename.format(int(window.col_off), int(window.row_off)), ) with rio.open(output_filepath, "w", **meta) as dst: dst.write(src.read(window=window)) @@ -157,15 +151,14 @@ def img_rgb_from_filepath(img_filepath): """ Read an RGB image file into a 3-D array. - See the `background `_ example notebook for more - details. + See the `background `_ example notebook for more details. Parameters ---------- img_filepath : str, file object or pathlib.Path object - Path to a file, URI, file object opened in binary ('rb') mode, or a - Path object representing the image for which a GIST descriptor will be - computed. The value will be passed to `rasterio.open`. + Path to a file, URI, file object opened in binary ('rb') mode, or a Path object + representing the image for which a GIST descriptor will be computed. The value + will be passed to `rasterio.open`. """ with rio.open(img_filepath) as src: arr = src.read() @@ -181,17 +174,17 @@ def get_img_filepaths(split_df, img_cluster, train): Parameters ---------- split_df : pandas DataFrame - Data frame with the train/test split + Data frame with the train/test split. img_cluster : int - The label of the cluster of tiles + The label of the cluster of tiles. train : bool Whether the list of training (True) or testing (False) tiles must be - returned + returned. Returns ------- img_filepaths : pandas Series - List of paths to image files + List of paths to image files. """ if train: train_cond = split_df["train"] @@ -216,13 +209,13 @@ def log(message, *, level=None, name=None, filename=None): Parameters ---------- message : string - the content of the message to log + the content of the message to log. level : int - one of the logger.level constants + one of the logger.level constants. name : string - name of the logger + name of the logger. filename : string - name of the log file + name of the log file. """ if level is None: level = settings.log_level @@ -233,8 +226,8 @@ def log(message, *, level=None, name=None, filename=None): # if logging to file is turned on if settings.log_file: - # get the current logger (or create a new one, if none), then log - # message at requested level + # get the current logger (or create a new one, if none), then log message at + # requested level logger = get_logger(level=level, name=name, filename=filename) if level == lg.DEBUG: logger.debug(message) @@ -245,17 +238,17 @@ def log(message, *, level=None, name=None, filename=None): elif level == lg.ERROR: logger.error(message) - # if logging to console is turned on, convert message to ascii and print to - # the console + # if logging to console is turned on, convert message to ascii and print to the + # console if settings.log_console: - # capture current stdout, then switch it to the console, print the - # message, then switch back to what had been the stdout. this prevents - # logging to notebook - instead, it goes to console + # capture current stdout, then switch it to the console, print the message, then + # switch back to what had been the stdout. this prevents logging to notebook - + # instead, it goes to console standard_out = sys.stdout sys.stdout = sys.__stdout__ - # convert message to ascii for console display so it doesn't break - # windows terminals + # convert message to ascii for console display so it doesn't break windows + # terminals message = ( unicodedata.normalize("NFKD", str(message)) .encode("ascii", errors="replace") @@ -272,11 +265,11 @@ def get_logger(*, level=None, name=None, filename=None): Parameters ---------- level : int - one of the logger.level constants + one of the logger.level constants. name : string - name of the logger + name of the logger. filename : string - name of the log file + name of the log file. Returns ------- @@ -306,9 +299,7 @@ def get_logger(*, level=None, name=None, filename=None): # create file handler and log formatter and set them up handler = lg.FileHandler(log_filename, encoding="utf-8") - formatter = lg.Formatter( - "%(asctime)s %(levelname)s %(name)s %(message)s" - ) + formatter = lg.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(level) diff --git a/environment-dev.yml b/environment-dev.yml index 9e36d18..19416cb 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -15,6 +15,8 @@ dependencies: - pip - pip: - pymaxflow>=1.0.0 + - laszip + - pre-commit - rasterio>=1.0.0 - scikit-image - scikit-learn diff --git a/paper/paper.bib b/paper/paper.bib index 2b7e44c..50513a3 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -29,7 +29,7 @@ @article{boykov2004experimental pages={1124--1137}, year={2004}, publisher={IEEE}, - doi={10.1109/TPAMI.2004.60} + doi={10.1109/TPAMI.2004.60} } @inproceedings{yang2009tree, diff --git a/paper/paper.md b/paper/paper.md index 8ed5672..d9af12d 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -3,7 +3,7 @@ title: 'DetecTree: Tree detection from aerial imagery in Python' tags: - Python - tree detection - - image segmentation + - image segmentation - remote sensing images - GIS authors: diff --git a/pyproject.toml b/pyproject.toml index a8f43fe..ed76fd6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,9 @@ [tool.black] -line-length = 79 +line-length = 88 + +[tool.isort] +known_first_party = "detectree" +default_section = "THIRDPARTY" +forced_separate = "test_detectree" +line_length = 88 +profile = "black" diff --git a/setup.cfg b/setup.cfg index d7c2d5b..c4b5795 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,18 +5,10 @@ universal=1 description-file=README.md [flake8] +docstring-convention = numpy +max-line-length = 88 exclude = ./build/* ignore = E203,W503 per-file-ignores = detectree/__init__.py:F401 tests/test_detectree.py:F401 - -[isort] -known_first_party = detectree -default_section = THIRDPARTY -forced_separate = test_detectree -line_length = 79 -profile = black - -[pydocstyle] -convention = numpy diff --git a/setup.py b/setup.py index b75a3e9..1a159ea 100644 --- a/setup.py +++ b/setup.py @@ -15,8 +15,10 @@ "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", ] here = path.abspath(path.dirname(__file__)) diff --git a/tests/test_detectree.py b/tests/test_detectree.py index b91ae5b..f13f3e5 100644 --- a/tests/test_detectree.py +++ b/tests/test_detectree.py @@ -12,13 +12,7 @@ from sklearn import ensemble import detectree as dtr -from detectree import ( - filters, - image_descriptor, - pixel_features, - pixel_response, - utils, -) +from detectree import filters, image_descriptor, pixel_features, pixel_response, utils from detectree.cli import main @@ -36,17 +30,10 @@ def test_base_imports(self): from dask import diagnostics from rasterio import windows from scipy import ndimage as ndi - from scipy.ndimage.filters import _gaussian_kernel1d from skimage import color, measure, morphology from skimage.filters import gabor_kernel, rank from skimage.util import shape - from sklearn import ( - cluster, - decomposition, - ensemble, - metrics, - preprocessing, - ) + from sklearn import cluster, decomposition, ensemble, metrics, preprocessing class TestTrainTestSplit(unittest.TestCase): @@ -55,8 +42,7 @@ def setUp(self): self.img_filepaths = glob.glob(path.join(self.img_dir, "*.tif")) def test_init(self): - # if providing `img_filepaths`, `img_dir` and `img_filename_pattern` - # are ignored + # if providing `img_filepaths`, `img_dir` and `img_filename_pattern` are ignored ts = dtr.TrainingSelector(img_filepaths=self.img_filepaths) self.assertEqual( ts.img_filepaths, @@ -80,11 +66,8 @@ def test_init(self): ) # if not providing `img_filepaths`, inexistent `img_dir` or non-tif - # `img_filename_pattern` will result in an empty `img_filepaths` - # attribute - self.assertEqual( - len(dtr.TrainingSelector(img_dir="foo").img_filepaths), 0 - ) + # `img_filename_pattern` will result in an empty `img_filepaths` attribute + self.assertEqual(len(dtr.TrainingSelector(img_dir="foo").img_filepaths), 0) self.assertEqual( len( dtr.TrainingSelector( @@ -98,9 +81,8 @@ def test_init(self): len(dtr.TrainingSelector(img_dir=self.img_dir).img_filepaths), 0 ) - # even when providing an integer in the `gabor_num_orientations` - # argument, the respective attribute will be a tuple after `__init__` - # is executed + # even when providing an integer in the `gabor_num_orientations` argument, the + # respective attribute will be a tuple after `__init__` is executed self.assertIsInstance( dtr.TrainingSelector( img_filepaths=self.img_filepaths, gabor_num_orientations=8 @@ -148,17 +130,17 @@ def test_image_descriptor(self): ) self.assertEqual( len(img_descr), - len(kernels) * response_bins_per_axis ** 2 + num_color_bins ** 3, + len(kernels) * response_bins_per_axis**2 + num_color_bins**3, ) - # TODO: more technical test, e.g., passing an all-zero filter bank - # should return an all-zero gist descriptor + # TODO: more technical test, e.g., passing an all-zero filter bank should return + # an all-zero gist descriptor - # TODO: more technical test, e.g., ensure that a negative number of - # bins raises some NumPy error + # TODO: more technical test, e.g., ensure that a negative number of bins raises + # some numpy error - # TODO: more technical test, e.g., ensure that all values in - # `img_descr_row` are within the unit norm + # TODO: more technical test, e.g., ensure that all values in `img_descr_row` are + # within the unit norm class TestPixelFeatures(unittest.TestCase): @@ -181,8 +163,7 @@ def test_build_features(self): num_pixel_features = self.pfb.num_pixel_features shape_i = ( - len(self.split_i_df[self.split_i_df["train"]]) - * self.pixels_per_img, + len(self.split_i_df[self.split_i_df["train"]]) * self.pixels_per_img, num_pixel_features, ) shape_ii = ( @@ -209,9 +190,7 @@ def test_build_features(self): # test providing `method` explicitly (and `split_df`) self.assertEqual( - self.pfb.build_features( - split_df=self.split_i_df, method="cluster-I" - ).shape, + self.pfb.build_features(split_df=self.split_i_df, method="cluster-I").shape, shape_i, ) self.assertEqual( @@ -223,21 +202,19 @@ def test_build_features(self): shape_ii, ) - # test that `method='cluster-I'` will ignore the 'img_cluster' column - # of the split data frame + # test that `method='cluster-I'` will ignore the 'img_cluster' column of the + # split data frame self.assertEqual( self.pfb.build_features( split_df=self.split_ii_df, method="cluster-I" ).shape, ( - len(self.split_ii_df[self.split_ii_df["train"]]) - * self.pixels_per_img, + len(self.split_ii_df[self.split_ii_df["train"]]) * self.pixels_per_img, num_pixel_features, ), ) - # test that `method='cluster-II'` and non-None `img_cluster` raises a - # ValueError + # test that `method='cluster-II'` and non-None `img_cluster` raises a ValueError self.assertRaises( ValueError, self.pfb.build_features, @@ -245,9 +222,8 @@ def test_build_features(self): method="cluster-II", ) - # test that `method='cluster-II'` raises a `ValueError` if `split_df` - # does not have a `img_cluster` column (when using the method - # 'cluster-I') + # test that `method='cluster-II'` raises a `ValueError` if `split_df` does not + # have a `img_cluster` column (when using the method 'cluster-I') self.assertRaises( ValueError, self.pfb.build_features, @@ -256,9 +232,7 @@ def test_build_features(self): ) # test providing `img_filepaths` - img_filepaths = self.split_i_df[self.split_i_df["train"]][ - "img_filepath" - ] + img_filepaths = self.split_i_df[self.split_i_df["train"]]["img_filepath"] # the shape of the feature matrix below is the same as `shape_i` self.assertEqual( @@ -266,15 +240,15 @@ def test_build_features(self): (len(img_filepaths) * self.pixels_per_img, num_pixel_features), ) - # test providing `img_dir`. In this case all the images (not only the - # ones selected for training) will be transformed into feature vectors + # test providing `img_dir`. In this case all the images (not only the ones + # selected for training) will be transformed into feature vectors self.assertEqual( self.pfb.build_features(img_dir=self.img_dir).shape, (len(self.split_i_df) * self.pixels_per_img, num_pixel_features), ) - # test that if none of `split_df`, `img_filepaths` or `img_dir` are - # provided, a `ValueError` is raised + # test that if none of `split_df`, `img_filepaths` or `img_dir` are provided, a + # `ValueError` is raised self.assertRaises(ValueError, self.pfb.build_features) @@ -315,8 +289,7 @@ def test_build_response(self): self.assertTrue(np.all(unique_response == np.arange(2))) # test shapes shape_i = ( - len(self.split_i_df[self.split_i_df["train"]]) - * self.pixels_per_img, + len(self.split_i_df[self.split_i_df["train"]]) * self.pixels_per_img, ) shape_ii = ( len( @@ -328,8 +301,8 @@ def test_build_response(self): * self.pixels_per_img, ) - # test for `response_i` and `response_ii`, which have been obtained by - # providing `method` implicitly (and `split_df`) + # test for `response_i` and `response_ii`, which have been obtained by providing + # `method` implicitly (and `split_df`) self.assertEqual(response_i.shape, shape_i) self.assertEqual(response_ii.shape, shape_ii) @@ -352,18 +325,15 @@ def test_build_response(self): shape_ii, ) - # test that `method='cluster-I'` will ignore the 'img_cluster' column - # of the split data frame + # test that `method='cluster-I'` will ignore the 'img_cluster' column of the + # split data frame self.assertEqual( self.prb.build_response( split_df=self.split_ii_df, response_img_dir=self.response_img_dir, method="cluster-I", ).shape, - ( - len(self.split_ii_df[self.split_ii_df["train"]]) - * self.pixels_per_img, - ), + (len(self.split_ii_df[self.split_ii_df["train"]]) * self.pixels_per_img,), ) # test that when providing `split_df`, `response_img_dir` is required @@ -374,8 +344,7 @@ def test_build_response(self): method="cluster-II", ) - # test that `method='cluster-II'` and non-None `img_cluster` raises a - # ValueError + # test that `method='cluster-II'` and non-None `img_cluster` raises a ValueError self.assertRaises( ValueError, self.prb.build_response, @@ -384,9 +353,8 @@ def test_build_response(self): method="cluster-II", ) - # test that `method='cluster-II'` raises a `ValueError` if `split_df` - # does not have a `img_cluster` column (when using the method - # 'cluster-I') + # test that `method='cluster-II'` raises a `ValueError` if `split_df` does not + # have a `img_cluster` column (when using the method 'cluster-I') self.assertRaises( ValueError, self.prb.build_response, @@ -396,29 +364,23 @@ def test_build_response(self): ) # test providing `img_filepaths` - img_filepaths = self.split_i_df[self.split_i_df["train"]][ - "img_filepath" - ].apply( - lambda filepath: path.join( - self.response_img_dir, path.basename(filepath) - ) + img_filepaths = self.split_i_df[self.split_i_df["train"]]["img_filepath"].apply( + lambda filepath: path.join(self.response_img_dir, path.basename(filepath)) ) # the shape of the feature matrix below is the same as `shape_i` self.assertEqual( - self.prb.build_response( - response_img_filepaths=img_filepaths - ).shape, + self.prb.build_response(response_img_filepaths=img_filepaths).shape, (len(img_filepaths) * self.pixels_per_img,), ) - # test that if none of `split_df`, `img_filepaths` or `img_dir` are - # provided, a `ValueError` is raised + # test that if none of `split_df`, `img_filepaths` or `img_dir` are provided, a + # `ValueError` is raised self.assertRaises(ValueError, self.prb.build_response) - # test that providing a response whose pixel values are not - # exclusively the `tree_val` and `nontree_val` attributes of the - # `PixelResponseBuilder` instance raises a `ValueError` + # test that providing a response whose pixel values are not exclusively the + # `tree_val` and `nontree_val` attributes of the `PixelResponseBuilder` instance + # raises a `ValueError` self.assertRaises( ValueError, self.prb.build_response_from_filepath, @@ -450,8 +412,7 @@ def setUp(self): self.tmp_output_dir = path.join(self.data_dir, "tmp_output") os.mkdir(self.tmp_output_dir) - # TODO: test init arguments of `ClassifierTrainer` other than - # `num_estimators` + # TODO: test init arguments of `ClassifierTrainer` other than `num_estimators` num_estimators = 2 # to speed-up the tests self.ct = dtr.ClassifierTrainer(num_estimators=num_estimators) # cache this first trained classifier to reuse it below @@ -469,10 +430,10 @@ def tearDown(self): def test_classifier_trainer(self): - # test that all the combinations of arguments of the `train_classifier` - # method return an instance of `sklearn.ensemble.AdaBoostClassifier` - # option 1a: `split_df` and `response_img_dir` with implicit method - # (note that we are using `self.clf` obtained in `setUp`) + # test that all the combinations of arguments of the `train_classifier` method + # return an instance of `sklearn.ensemble.AdaBoostClassifier` option 1a: + # `split_df` and `response_img_dir` with implicit method (note that we are using + # `self.clf` obtained in `setUp`) self.assertIsInstance(self.clf, ensemble.AdaBoostClassifier) self.assertIsInstance( self.ct.train_classifier( @@ -501,9 +462,7 @@ def test_classifier_trainer(self): ensemble.AdaBoostClassifier, ) # option 2: `img_filepaths` and `response_img_dir` - img_filepaths = self.split_i_df[self.split_i_df["train"]][ - "img_filepath" - ] + img_filepaths = self.split_i_df[self.split_i_df["train"]]["img_filepath"] self.assertIsInstance( self.ct.train_classifier( img_filepaths=img_filepaths, @@ -513,9 +472,7 @@ def test_classifier_trainer(self): ) # option 3: `img_filepaths` and `response_img_filepaths` response_img_filepaths = img_filepaths.apply( - lambda filepath: path.join( - self.response_img_dir, path.basename(filepath) - ) + lambda filepath: path.join(self.response_img_dir, path.basename(filepath)) ) self.assertIsInstance( self.ct.train_classifier( @@ -524,15 +481,13 @@ def test_classifier_trainer(self): ), ensemble.AdaBoostClassifier, ) - # from here below, we use `self.tmp_train_dir`, which is a directory - # with only one image, namely `self.train_filename`, so that the - # training does not take long + # from here below, we use `self.tmp_train_dir`, which is a directory with only + # one image, namely `self.train_filename`, so that the training does not take + # long img_dir = self.tmp_train_dir # here we could use `img_dir` or `self.img_dir` img_filepaths = [path.join(self.img_dir, self.train_filename)] - response_img_filepaths = [ - path.join(self.response_img_dir, self.train_filename) - ] + response_img_filepaths = [path.join(self.response_img_dir, self.train_filename)] # option 4: `img_dir` and `response_img_dir` self.assertIsInstance( self.ct.train_classifier( @@ -564,20 +519,19 @@ def test_classifier_trainer(self): ensemble.AdaBoostClassifier, ) - # test that either `split_df`, `img_filepaths` or `img_dir` must be - # provided + # test that either `split_df`, `img_filepaths` or `img_dir` must be provided self.assertRaises(ValueError, self.ct.train_classifier) - # test that `train_classifiers` raises a `ValueError` if `split_df` - # doesn't have a 'img_cluster' column + # test that `train_classifiers` raises a `ValueError` if `split_df` doesn't have + # a 'img_cluster' column self.assertRaises( ValueError, self.ct.train_classifiers, split_df=self.split_i_df, response_img_dir=self.response_img_dir, ) - # test that `train_classifiers` returns a dict otherwise - # (note that we are using `self.clf_dict` obtained in `setUp`) + # test that `train_classifiers` returns a dict otherwise (note that we are using + # `self.clf_dict` obtained in `setUp`) self.assertIsInstance(self.clf_dict, dict) def _test_imgs_exist_and_rm(self, pred_imgs): @@ -592,11 +546,8 @@ def test_classifier(self): img_filepath = self.split_i_df.iloc[0]["img_filepath"] # test that `classify_img` returns a ndarray - self.assertIsInstance( - c.classify_img(img_filepath, self.clf), np.ndarray - ) - # test that `classify_img` with `output_filepath` returns a ndarray - # and dumps it + self.assertIsInstance(c.classify_img(img_filepath, self.clf), np.ndarray) + # test that `classify_img` with `output_filepath` returns a ndarray and dumps it output_filepath = path.join(self.tmp_output_dir, "foo.tif") y_pred = c.classify_img(img_filepath, self.clf, output_filepath) self.assertIsInstance(y_pred, np.ndarray) @@ -604,16 +555,14 @@ def test_classifier(self): # remove it so that the output dir is clean in the tests below os.remove(output_filepath) - # test that `classify_imgs` with implicit `cluster-I` method returns a - # list and that the images have been dumped - pred_imgs = c.classify_imgs( - self.split_i_df, self.tmp_output_dir, clf=self.clf - ) + # test that `classify_imgs` with implicit `cluster-I` method returns a list and + # that the images have been dumped + pred_imgs = c.classify_imgs(self.split_i_df, self.tmp_output_dir, clf=self.clf) self.assertIsInstance(pred_imgs, list) self._test_imgs_exist_and_rm(pred_imgs) - # test that `classify_imgs` with implicit `cluster-II` method, `clf` - # and `img_label` returns a list and that the images have been dumped + # test that `classify_imgs` with implicit `cluster-II` method, `clf` and + # `img_label` returns a list and that the images have been dumped pred_imgs = c.classify_imgs( self.split_ii_df, self.tmp_output_dir, @@ -632,8 +581,8 @@ def test_classifier(self): self.assertIsInstance(pred_imgs, list) self._test_imgs_exist_and_rm(pred_imgs) - # test that `classify_imgs` with implicit `cluster-II` method and - # `clf_dict` returns a dict and that the images have been dumped + # test that `classify_imgs` with implicit `cluster-II` method and `clf_dict` + # returns a dict and that the images have been dumped pred_imgs = c.classify_imgs( self.split_ii_df, self.tmp_output_dir, clf_dict=self.clf_dict ) @@ -651,8 +600,8 @@ def test_classifier(self): self.assertRaises( ValueError, c.classify_imgs, self.split_ii_df, self.tmp_output_dir ) - # test that `clf_dict=None` with 'cluster-II' and `img_cluster=None` - # raises a `ValueError`, even when providing a non-None `clf` + # test that `clf_dict=None` with 'cluster-II' and `img_cluster=None` raises a + # `ValueError`, even when providing a non-None `clf` self.assertRaises( ValueError, c.classify_imgs, @@ -667,9 +616,7 @@ def test_classifier(self): c = dtr.Classifier(refine=False) img_filepath = self.split_i_df.iloc[0]["img_filepath"] # test that `classify_img` returns a ndarray - self.assertIsInstance( - c.classify_img(img_filepath, self.clf), np.ndarray - ) + self.assertIsInstance(c.classify_img(img_filepath, self.clf), np.ndarray) class TestLidarToCanopy(unittest.TestCase): @@ -707,8 +654,8 @@ def test_lidar_to_canopy(self): postprocess_func_args=[ndi.generate_binary_structure(2, 2)], postprocess_func_kws={"border_value": 0}, ) - # test that `to_canopy_mask` with `output_filepath` returns a ndarray - # and dumps it + # test that `to_canopy_mask` with `output_filepath` returns a ndarray and dumps + # it output_filepath = path.join(self.tmp_dir, "foo.tif") y_pred = ltc.to_canopy_mask( self.lidar_filepath, @@ -746,12 +693,10 @@ def test_split_into_tiles(self): self.assertTrue(len(tiles) <= len(maybe_empty_tiles)) def test_get_img_filepaths(self): - self.assertRaises( - ValueError, utils.get_img_filepaths, self.split_i_df, 0, True - ) + self.assertRaises(ValueError, utils.get_img_filepaths, self.split_i_df, 0, True) def test_logging(self): - # Taken from OSMnx + # Taken from osmnx # https://github.com/gboeing/osmnx/blob/master/tests/test_osmnx.py import logging as lg @@ -768,9 +713,7 @@ def setUp(self): self.models_dir = path.join(self.data_dir, "models") self.response_img_dir = path.join(self.data_dir, "response_img") - self.split_ii_filepath = path.join( - self.data_dir, "split_cluster-II.csv" - ) + self.split_ii_filepath = path.join(self.data_dir, "split_cluster-II.csv") self.tmp_dir = path.join(self.data_dir, "tmp") os.mkdir(self.tmp_dir)