Updated code of experiments for new article (stratified parametric mo…

…dels)
iuliivasilev · Aug 6, 2024 · 0b95e25 · 0b95e25
1 parent ca3da13
commit 0b95e25
Show file tree

Hide file tree

Showing 4 changed files with 149 additions and 138 deletions.
diff --git a/demonstration/Articles/Stratified and stacking model.ipynb b/demonstration/Articles/Stratified and stacking model.ipynb
diff --git a/demonstration/Dissertation/Chapter2_Tree.ipynb b/demonstration/Dissertation/Chapter2_Tree.ipynb
@@ -2,18 +2,17 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 1,
    "id": "48c8325a",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "The line_profiler extension is already loaded. To reload it, use:\n",
-      "  %reload_ext line_profiler\n",
-      "The scalene extension is already loaded. To reload it, use:\n",
-      "  %reload_ext scalene\n"
+      "Scalene extension successfully loaded. Note: Scalene currently only\n",
+      "supports CPU+GPU profiling inside Jupyter notebooks. For full Scalene\n",
+      "profiling, use the command line version.\n"
      ]
     }
    ],
@@ -113,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "id": "339bab3a",
    "metadata": {},
    "outputs": [],
@@ -123,7 +122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "id": "2d3ca1a4",
    "metadata": {},
    "outputs": [

diff --git a/tests/PARAMS/SCHEME_PARAM.py b/tests/PARAMS/SCHEME_PARAM.py
@@ -1,13 +1,26 @@
+#CRAID_param_grid = {
+#     "depth": [10],  # 10
+#     "balance": [None],  # [None, "balance", "balance+correct", "only_log_rank"]
+#     "criterion": ["peto", "tarone-ware", "wilcoxon", "logrank"],  # "ibswei", "maxcombo",
+#     "min_samples_leaf": [0.05, 0.01, 0.001],
+#     "l_reg": [0, 0.01, 0.1, 0.5, 0.9],  # [0, 0.001, 0.01, 0.1, 0.2]
+#     "leaf_model": ["base", "base_zero_after"],  # , "base_zero_after"
+#     'cut': [False],  # True,
+#     "woe": [True],
+#     "signif": [0.05, 0.1, 1.0],
+#     "max_features": [1.0]
+# }
+
 CRAID_param_grid = {
-    "depth": [10],  # 10
-    "balance": [None],  # [None, "balance", "balance+correct", "only_log_rank"]
-    "criterion": ["peto", "tarone-ware", "wilcoxon", "logrank"],  # "ibswei", "maxcombo",
-    "min_samples_leaf": [0.05, 0.01, 0.001],
-    "l_reg": [0, 0.01, 0.1, 0.5, 0.9],  # [0, 0.001, 0.01, 0.1, 0.2]
-    "leaf_model": ["base", "base_zero_after"],  # , "base_zero_after"
+    "depth": [0, 1, 2, 5, 7, 10],  # 10
+    "balance": [None],
+    "criterion": ["logrank"],  # ["peto", "tarone-ware", "wilcoxon", "logrank"],
+    "min_samples_leaf": [0.3, 0.1, 0.05, 0.01, 0.001],
+    "l_reg": [0],
+    "leaf_model": ["base", "WeibullAFT", "LogNormalAFT", "LogLogisticAFT", "CoxPH"],
     'cut': [False],  # True,
     "woe": [True],
-    "signif": [0.05, 0.1, 1.0],
+    "signif": [0.05],
     "max_features": [1.0]
 }
 

diff --git a/tests/test_experiments.py b/tests/test_experiments.py
@@ -210,15 +210,17 @@ def run(dataset="GBSG", with_self=["TREE", "BSTR", "BOOST"],
         for alg in with_self:
             PARAMS_[dataset][alg]["categ"] = [categ]
             PARAMS_[dataset][alg]["ens_metric_name"] = [best_metric]
-            PARAMS_[dataset][alg]["mode_wei"] = [mode_wei]
+            if not (mode_wei is None):
+                PARAMS_[dataset][alg]["mode_wei"] = [mode_wei]
             experim.add_method(SELF_ALGS[alg], PARAMS_[dataset][alg])
     experim.run_effective(X, y, dir_path=dir_path, verbose=1)
     return experim
 
 
 @pytest.fixture(scope="module")
 def dir_path():
-    return os.path.join(os.getcwd(), "experiment_results", "phd_normal_res_with_jit")  # "many_ds")
+    return os.path.join(os.getcwd(), "experiment_results", "Backblaze")  # "many_ds")
+    # return os.path.join(os.getcwd(), "experiment_results", "phd_normal_res_with_jit")  # "many_ds")
 
 
 # @pytest.mark.skip(reason="no way of currently testing this")
@@ -233,16 +235,18 @@ def dir_path():
     "mode_wei", ["linear"]  # "exp", "sigmoid"
 )
 @pytest.mark.parametrize(
-    "dataset",  ["rott2", "PBC", "WUHAN", "GBSG", "support2", "smarto"]
+    "dataset",  ["backblaze16_18", "backblaze18_21", "backblaze21_23"]
+    # ["rott2", "PBC", "WUHAN", "GBSG", "support2", "smarto"]
     # ["backblaze16_18", "backblaze18_21", "backblaze21_23"]
 )
-def test_dataset_exp(dir_path, dataset, mode_wei, best_metric, bins_sch="origin", mode="CV+SAMPLE"):  # CV+SAMPLE
+def test_dataset_exp(dir_path, dataset, mode_wei, best_metric, bins_sch="origin", mode="CV"):  # CV+SAMPLE
     # mode_wei = None
     # NORMAL_SHORT_QUANTILE_TIME _
     # prefix = f"{best_metric}_STRATTIME+_EXT10_NORMAL_EQ_REG_CLEVERBOOST_SUM_ALL_BINS_{bins_sch}"
     # "scsurv", "bstr_full_WB", SHORT_CNT_DIFF_
 
-    prefix = f"{best_metric}_BOOST_linear"
+    # prefix = f"{best_metric}_BOOST_linear"
+    prefix = f"{best_metric}_STRAT_TREE"
     # prefix = f"{best_metric}_STRATTIME+_PARBSTR_test_wide_{bins_sch}"
     # prefix = f"{best_metric}_STRATTIME+_EXT10_STABLE_EQ_REG_PARBSTR_ALL_BINS_{bins_sch}"
     # prefix = f"{best_metric}_STRATTIME+_EXT10_NORMAL_EQ_REG_TREE_ALL_BINS_{bins_sch}"
@@ -260,7 +264,7 @@ def test_dataset_exp(dir_path, dataset, mode_wei, best_metric, bins_sch="origin"
     storage_path = os.path.join("D:", os.sep, "Vasilev", "SA", dataset)
     if not os.path.exists(storage_path):
         os.makedirs(storage_path)
-    res_exp = run(dataset, with_self=["BOOST"], with_external=False, mode=mode,  # CLEVERBOOST
+    res_exp = run(dataset, with_self=["TREE"], with_external=False, mode=mode,  # CLEVERBOOST
                   #  dir_path=storage_path+"\\",
                   bins_sch=bins_sch, best_metric=best_metric, mode_wei=mode_wei)  # ["TREE", "PARBSTR", "BSTR", "BOOST"]