Skip to content

Commit

Permalink
biosphere 0.3.0 (#96)
Browse files Browse the repository at this point in the history
* Integrate biosphere=0.3.0.

* Python.

* R

* Changelog.

* Readme.

* Version numbers.
  • Loading branch information
mlondschien authored Mar 15, 2022
1 parent 559016d commit 157394c
Show file tree
Hide file tree
Showing 22 changed files with 240 additions and 137 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@

# Changelog

## 0.5.0 - (2022-03-15)

**Breaking changes:**

- The parameters `random_forest_mtry` and `random_forest_n_trees` of `Control` have been renamed to `random_forest_max_features` and `random_forest_n_estimators`.
- The default value for `random_forest_max_features` now is `floor(sqrt(d))`.

**New features:**

- The parameter `random_forest_max_features` now can be supplied with a fraction `0 < f < 1`, an integer `i>=1`, `None` (Python, Rust) / `NULL` (R) and `"sqrt"`. Then, for each split, repsectively `floor(f d)`, `i`, `d` or `floor(sqrt(d))` features are considered.

**Other changes:**

- Bump `biosphere` dependency to 0.3.0

## 0.4.4 - (2022-02-22)

**Other changes:**
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "changeforest"
description = "Classifier based non-parametric change point detection."
authors = ["Malte Londschien <malte@londschien.ch>"]
repository = "https://github.com/mlondschien/changeforest/"
version = "0.4.4"
version = "0.5.0"
edition = "2021"
readme = "README.md"
license = "BSD-3-Clause"
Expand All @@ -15,7 +15,7 @@ name = "changeforest"
[dependencies]
ndarray = "0.15"
rand = "0.8"
biosphere = "0.2.2"
biosphere = "0.3.0"

[dev-dependencies]
rstest = "0.12"
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ very few trees:

```python
In [6]: from changeforest import Control
...: changeforest(iris, "random_forest", "bs", Control(random_forest_n_trees=10))
...: changeforest(iris, "random_forest", "bs", Control(random_forest_n_estimators=10))
Out[6]:
best_split max_gain p_value
(0, 150] 50 96.071 0.01
Expand Down Expand Up @@ -185,7 +185,7 @@ which can be passed to `changeforest`. The following
will build random forests with very few trees:

```R
> changeforest(X, "random_forest", "bs", Control(random_forest_n_trees=10))
> changeforest(X, "random_forest", "bs", Control(random_forest_n_estimators=10))
... TODO
```

Expand Down
5 changes: 3 additions & 2 deletions changeforest-py/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "changeforest_py"
version = "0.4.4"
version = "0.5.0"
edition = "2021"

[lib]
Expand All @@ -14,4 +14,5 @@ name = "changeforest"
numpy = "0.15"
changeforest = { path = "../" }
ndarray = "0.15.3"
pyo3 = {version = "0.15", features = ["extension-module"]}
pyo3 = {version = "0.15", features = ["extension-module"]}
biosphere = "0.3.0"
2 changes: 1 addition & 1 deletion changeforest-py/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ very few trees:

```python
In [6]: from changeforest import Control
...: changeforest(iris, "random_forest", "bs", Control(random_forest_n_trees=10))
...: changeforest(iris, "random_forest", "bs", Control(random_forest_n_estimators=10))
Out[6]:
best_split max_gain p_value
(0, 150] 50 96.071 0.01
Expand Down
8 changes: 4 additions & 4 deletions changeforest-py/changeforest/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ def __init__(
number_of_wild_segments="default",
seeded_segments_alpha="default",
seed="default",
random_forest_n_trees="default",
random_forest_n_estimators="default",
random_forest_max_depth="default",
random_forest_mtry="default",
random_forest_max_features="default",
random_forest_n_jobs="default",
):
self.minimal_relative_segment_length = _to_float(
Expand All @@ -28,9 +28,9 @@ def __init__(
self.number_of_wild_segments = _to_int(number_of_wild_segments)
self.seeded_segments_alpha = _to_float(seeded_segments_alpha)
self.seed = _to_int(seed)
self.random_forest_n_trees = _to_int(random_forest_n_trees)
self.random_forest_n_estimators = _to_int(random_forest_n_estimators)
self.random_forest_max_depth = _to_int(random_forest_max_depth)
self.random_forest_mtry = _to_int(random_forest_mtry)
self.random_forest_max_features = _to_int(random_forest_max_features)
self.random_forest_n_jobs = _to_int(random_forest_n_jobs)


Expand Down
2 changes: 1 addition & 1 deletion changeforest-py/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "changeforest"
description = "Classifier based non-parametric change point detection"
readme = "README.md"
version = "0.4.4"
version = "0.5.0"
requires-python = ">=3.7"
author = "Malte Londschien <malte@londschien.ch>"
urls = {homepage = "https://github.com/mlondschien/changeforest/"}
Expand Down
86 changes: 71 additions & 15 deletions changeforest-py/src/control.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use biosphere::MaxFeatures;
use changeforest::Control;
use pyo3::prelude::{PyObject, PyResult, Python};
use pyo3::exceptions;
use pyo3::prelude::{pyclass, FromPyObject, PyAny, PyErr, PyResult};
use pyo3::prelude::{PyObject, Python};

pub fn control_from_pyobj(py: Python, obj: Option<PyObject>) -> PyResult<Control> {
let mut control = Control::default();
Expand Down Expand Up @@ -44,39 +47,92 @@ pub fn control_from_pyobj(py: Python, obj: Option<PyObject>) -> PyResult<Control
if let Ok(pyvalue) = obj.getattr(py, "seed") {
if let Ok(value) = pyvalue.extract::<u64>(py) {
control = control.with_seed(value);
control.random_forest_parameters =
control.random_forest_parameters.with_seed(value);
}
};

if let Ok(pyvalue) = obj.getattr(py, "random_forest_n_trees") {
if let Ok(value) = pyvalue.extract::<usize>(py) {
control = control.with_random_forest_n_trees(value);
if let Ok(pyvalue) = obj.getattr(py, "seeded_segments_alpha") {
if let Ok(value) = pyvalue.extract::<f64>(py) {
control = control.with_seeded_segments_alpha(value);
}
};

if let Ok(pyvalue) = obj.getattr(py, "random_forest_max_depth") {
if let Ok(value) = pyvalue.extract::<Option<usize>>(py) {
control = control.with_random_forest_max_depth(value);
if let Ok(pyvalue) = obj.getattr(py, "random_forest_n_estimators") {
if let Ok(value) = pyvalue.extract::<usize>(py) {
control.random_forest_parameters =
control.random_forest_parameters.with_n_estimators(value);
}
};

if let Ok(pyvalue) = obj.getattr(py, "random_forest_mtry") {
if let Ok(pyvalue) = obj.getattr(py, "random_forest_max_depth") {
if let Ok(value) = pyvalue.extract::<Option<usize>>(py) {
control = control.with_random_forest_mtry(value);
control.random_forest_parameters =
control.random_forest_parameters.with_max_depth(value);
}
};

if let Ok(pyvalue) = obj.getattr(py, "random_forest_n_jobs") {
if let Ok(value) = pyvalue.extract::<Option<usize>>(py) {
control = control.with_random_forest_n_jobs(value);
if let Ok(pyvalue) = obj.getattr(py, "random_forest_max_features") {
if let Ok(value) = pyvalue.extract::<PyMaxFeatures>(py) {
control.random_forest_parameters = control
.random_forest_parameters
.with_max_features(value.value);
}
};

if let Ok(pyvalue) = obj.getattr(py, "seeded_segments_alpha") {
if let Ok(value) = pyvalue.extract::<f64>(py) {
control = control.with_seeded_segments_alpha(value);
if let Ok(pyvalue) = obj.getattr(py, "random_forest_n_jobs") {
if let Ok(value) = pyvalue.extract::<Option<i32>>(py) {
control.random_forest_parameters =
control.random_forest_parameters.with_n_jobs(value);
}
};
}

Ok(control)
}

#[pyclass(name = "MaxFeatures")]
pub struct PyMaxFeatures {
pub value: MaxFeatures,
}

impl PyMaxFeatures {
fn new(value: MaxFeatures) -> Self {
PyMaxFeatures { value }
}
}

impl FromPyObject<'_> for PyMaxFeatures {
fn extract(ob: &'_ PyAny) -> PyResult<Self> {
if let Ok(value) = ob.extract::<usize>() {
Ok(PyMaxFeatures::new(MaxFeatures::Value(value)))
} else if let Ok(value) = ob.extract::<f64>() {
if value > 1. || value <= 0. {
Err(PyErr::new::<exceptions::PyTypeError, _>(format!(
"Got max_features {}",
value
)))
} else {
Ok(PyMaxFeatures::new(MaxFeatures::Fraction(value)))
}
} else if let Ok(value) = ob.extract::<Option<String>>() {
if value.is_none() {
Ok(PyMaxFeatures::new(MaxFeatures::None))
} else {
if value.as_ref().unwrap() == "sqrt" {
Ok(PyMaxFeatures::new(MaxFeatures::Sqrt))
} else {
Err(PyErr::new::<exceptions::PyTypeError, _>(format!(
"Unknown value for max_features: {}",
value.unwrap()
)))
}
}
} else {
Err(PyErr::new::<exceptions::PyTypeError, _>(format!(
"Unknown value for max_features: {}",
ob
)))
}
}
}
6 changes: 4 additions & 2 deletions changeforest-py/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ fn changeforest(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
fn changeforest(
py: Python<'_>,
X: PyReadonlyArray2<f64>,
method: String,
segmentation_type: String,
method: Option<String>,
segmentation_type: Option<String>,
control: Option<PyObject>,
) -> PyResult<MyBinarySegmentationResult> {
let control = control_from_pyobj(py, control).unwrap();
let method = method.unwrap_or("random_forest".to_string());
let segmentation_type = segmentation_type.unwrap_or("bs".to_string());
Ok(MyBinarySegmentationResult {
result: wrapper::changeforest(&X.as_array(), &method, &segmentation_type, &control),
})
Expand Down
19 changes: 12 additions & 7 deletions changeforest-py/tests/test_changeforest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import numpy as np
import pytest

from changeforest import changeforest
from changeforest import Control, changeforest


@pytest.mark.parametrize("method", ["knn", "change_in_mean", "random_forest"])
@pytest.mark.parametrize("segmentation_type", ["sbs", "wbs", "bs"])
def test_changeforest(iris_dataset, method, segmentation_type):
result = changeforest(iris_dataset, method, segmentation_type)
result = changeforest(
iris_dataset,
method,
segmentation_type,
control=Control(minimal_relative_segment_length=0.1),
)
np.testing.assert_array_equal(result.split_points(), [50, 100])


Expand All @@ -17,10 +22,10 @@ def test_changeforest_repr(iris_dataset):
result.__repr__()
== """\
best_split max_gain p_value
(0, 150] 50 96.322 0.01
¦--(0, 50] 23 -10.204 1
°--(50, 150] 100 53.822 0.01
¦--(50, 100] 79 -18.63 1
°--(100, 150] 134 -10.839 1\
(0, 150] 50 96.233 0.01
¦--(0, 50] 2 -14.191 1
°--(50, 150] 100 52.799 0.01
¦--(50, 100] 53 5.44 0.29
°--(100, 150] 136 -2.398 0.96\
"""
)
18 changes: 9 additions & 9 deletions changeforest-py/tests/test_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@
# model_selection_alpha
("iris", "bs", "knn", {"model_selection_alpha": 0.001}, []),
("iris", "bs", "knn", {"model_selection_alpha": 0.05}, [50, 100]),
# random_forest_ntree
# random_forest_n_estimators
# This is impressive and unexpected.
("iris", "bs", "random_forest", {"random_forest_n_trees": 1}, [37, 52, 99]),
("iris", "bs", "random_forest", {"random_forest_n_trees": 100}, [50, 100]),
("iris", "bs", "random_forest", {"random_forest_n_estimators": 1}, [47, 99]),
("iris", "bs", "random_forest", {"random_forest_n_estimators": 100}, [50, 100]),
# Use X_test instead
("X_test", "bs", "random_forest", {"random_forest_n_trees": 1}, []),
("X_test", "bs", "random_forest", {"random_forest_n_trees": 1.0}, []),
("X_test", "bs", "random_forest", {"random_forest_n_trees": 100}, [5]),
("X_test", "bs", "random_forest", {"random_forest_n_estimators": 1}, []),
("X_test", "bs", "random_forest", {"random_forest_n_estimators": 1.0}, []),
("X_test", "bs", "random_forest", {"random_forest_n_estimators": 100}, [5]),
("X_correlated", "bs", "random_forest", {"random_forest_max_depth": 1}, []),
("X_correlated", "bs", "random_forest", {"random_forest_max_depth": 2}, [49]),
("iris", "bs", "random_forest", {"model_selection_n_permutations": 10}, []),
Expand Down Expand Up @@ -98,10 +98,10 @@ def test_control_segmentation_parameters(
@pytest.mark.parametrize(
"key, default_value, another_value",
[
("random_forest_n_trees", 100, 11),
("minimal_relative_segment_length", 0.1, 0.05),
("random_forest_n_estimators", 100, 11),
("minimal_relative_segment_length", 0.01, 0.05),
("seed", 0, 1),
("random_forest_mtry", None, 1),
("random_forest_max_features", "default", 1),
("random_forest_max_depth", 8, None),
],
)
Expand Down
2 changes: 1 addition & 1 deletion changeforest-r/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: changeforest
Type: Package
Title: Classifier Based Non-Parametric Change Point Detection
Version: 0.4.4
Version: 0.5.0
Author: Malte Londschien
Maintainer: Malte Londschien <malte@londschien.ch>
Description: Perform classifier based multivariate, non-parametric change point detection.
Expand Down
48 changes: 24 additions & 24 deletions changeforest-r/R/control.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,30 @@
Control = R6::R6Class(
"control",
list(
minimal_relative_segment_length = NULL,
minimal_gain_to_split = NULL,
model_selection_alpha = NULL,
model_selection_n_permutations = NULL,
number_of_wild_segments = NULL,
seeded_segments_alpha = NULL,
seed = NULL,
random_forest_n_trees = NULL,
random_forest_mtry = NULL,
random_forest_max_depth = NULL,
random_forest_n_jobs = NULL,
minimal_relative_segment_length = "default",
minimal_gain_to_split = "default",
model_selection_alpha = "default",
model_selection_n_permutations = "default",
number_of_wild_segments = "default",
seeded_segments_alpha = "default",
seed = "default",
random_forest_n_estimators = "default",
random_forest_max_features = "default",
random_forest_max_depth = "default",
random_forest_n_jobs = "default",

initialize = function(
minimal_relative_segment_length = NULL,
minimal_gain_to_split = NULL,
model_selection_alpha = NULL,
model_selection_n_permutations = NULL,
number_of_wild_segments = NULL,
seeded_segments_alpha = NULL,
seed = NULL,
random_forest_n_trees = NULL,
random_forest_mtry = NULL,
random_forest_max_depth = NULL,
random_forest_n_jobs = NULL
minimal_relative_segment_length = "default",
minimal_gain_to_split = "default",
model_selection_alpha = "default",
model_selection_n_permutations = "default",
number_of_wild_segments = "default",
seeded_segments_alpha = "default",
seed = "default",
random_forest_n_estimators = "default",
random_forest_max_features = "default",
random_forest_max_depth = "default",
random_forest_n_jobs = "default"
) {
self$minimal_relative_segment_length = minimal_relative_segment_length
self$minimal_gain_to_split = minimal_gain_to_split
Expand All @@ -37,8 +37,8 @@ Control = R6::R6Class(
self$number_of_wild_segments = number_of_wild_segments
self$seeded_segments_alpha = seeded_segments_alpha
self$seed = seed
self$random_forest_n_trees = random_forest_n_trees
self$random_forest_mtry = random_forest_mtry
self$random_forest_n_estimators = random_forest_n_estimators
self$random_forest_max_features = random_forest_max_features
self$random_forest_max_depth = random_forest_max_depth
self$random_forest_n_jobs = random_forest_n_jobs
}
Expand Down
Loading

0 comments on commit 157394c

Please sign in to comment.