kubeedge · jaypume · Oct 31, 2023 · Aug 21, 2023 · Aug 23, 2023 · Oct 30, 2023
diff --git a/core/storymanager/rank/rank.py b/core/storymanager/rank/rank.py
@@ -196,7 +196,7 @@ def _get_selected(self, test_cases, test_results) -> pd.DataFrame:
         all_df = copy.deepcopy(self.all_df)
         selected_df = pd.DataFrame(all_df, columns=header)
         selected_df = selected_df.drop_duplicates(header[:-2])
-
+        # pylint: disable=E1136
         paradigms = self.selected_dataitem.get("paradigms")
         if paradigms != ["all"]:
             selected_df = selected_df.loc[selected_df["paradigm"].isin(paradigms)]

diff --git a/core/testcasecontroller/algorithm/paradigm/lifelong_learning/lifelong_learning.py b/core/testcasecontroller/algorithm/paradigm/lifelong_learning/lifelong_learning.py
@@ -24,7 +24,7 @@
 from core.testcasecontroller.metrics import get_metric_func
 from core.common.utils import get_file_format, is_local_dir
 
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 
 class LifelongLearning(ParadigmBase):
     # pylint: disable=too-many-locals
@@ -162,6 +162,102 @@ def run(self):
                 #BWT, FWT = self.compute(key, matrix)
                 self.system_metric_info[SystemMetricType.Matrix.value][key] = matrix
 
+        elif mode == 'hard-example-mining':
+            dataset_files = self._split_dataset(splitting_dataset_times=rounds)
+            # pylint: disable=C0103
+            # pylint: disable=C0206
+            # pylint: disable=C0201
+            # pylint: disable=W1203
+            my_dict = {}
+            for r in range(rounds + 1):
+                train_dataset_file, eval_dataset_file = dataset_files[r]
+                if r == 0:
+                    self.cloud_task_index = self._train(self.cloud_task_index,
+                                                    train_dataset_file,
+                                                    r)
+
+                    self.edge_task_index, tasks_detail, res = self.my_eval(
+                                                    self.cloud_task_index,
+                                                    eval_dataset_file,
+                                                    r)
+
+                else:
+                    infer_dataset_file, eval_dataset_file = dataset_files[r]
+                    inference_results, unseen_task_train_samples = self._inference(
+                                                    self.cloud_task_index,
+                                                    infer_dataset_file,
+                                                    r)
+                    samples_transfer_ratio_info.append((inference_results,
+                                                unseen_task_train_samples.x))
+
+                    # If no unseen task samples in the this round, starting the next round
+                    if len(unseen_task_train_samples.x) <= 0:
+                        continue
+
+                    self.cloud_task_index = self._train(self.cloud_task_index,
+                                                        unseen_task_train_samples,
+                                                        r)
+
+                tmp_dict = {}
+                for j in range(1, rounds+1):
+                    _, eval_dataset_file = dataset_files[j]
+                    self.edge_task_index, tasks_detail, res = self.my_eval(
+                                                    self.cloud_task_index,
+                                                    eval_dataset_file,
+                                                    r)
+                    LOGGER.info(f"train from round {r}")
+                    LOGGER.info(f"test round {j}")
+                    LOGGER.info(f"all scores: {res}")
+                    score_list = tmp_dict.get("all", ['' for i in range(rounds)])
+                    score_list[j-1] = res
+                    tmp_dict["all"] = score_list
+                    task_avg_score = {'accuracy':0.0}
+                    i = 0
+                    for detail in tasks_detail:
+                        i += 1
+                        scores = detail.scores
+                        entry = detail.entry
+                        LOGGER.info(f"{entry} scores: {scores}")
+                        task_avg_score['accuracy'] += scores['accuracy']
+                        score_list = tmp_dict.get(entry, ['' for i in range(rounds)])
+                        score_list[j-1] = scores
+                        tmp_dict[entry] = score_list
+                    task_avg_score['accuracy'] = task_avg_score['accuracy']/i
+                    score_list = tmp_dict.get("task_avg", [{'accuracy':0.0} for i in range(rounds)])
+                    score_list[j-1] = task_avg_score
+                    tmp_dict["task_avg"] = score_list
+
+                for key in tmp_dict.keys():
+                    scores_list = my_dict.get(key, [])
+                    scores_list.append(tmp_dict[key])
+                    my_dict[key] = scores_list
+                    LOGGER.info(f"{key} scores: {scores_list}")
+
+
+            self.edge_task_index, tasks_detail, res = self.my_eval(self.cloud_task_index,
+                                                      self.dataset.test_url,
+                                                      rounds + 1)
+            task_avg_score = {'accuracy':0.0}
+            i = 0
+            for detail in tasks_detail:
+                i += 1
+                scores = detail.scores
+                entry = detail.entry
+                LOGGER.info(f"{entry} scores: {scores}")
+                task_avg_score['accuracy'] += scores['accuracy']
+            task_avg_score['accuracy'] = task_avg_score['accuracy']/i
+            self.system_metric_info[SystemMetricType.Task_Avg_Acc.value] = task_avg_score
+            LOGGER.info(task_avg_score)
+            test_res, unseen_task_train_samples = self._inference(self.edge_task_index,
+                                                              self.dataset.test_url,
+                                                              "test")
+            for key in my_dict.keys():
+                LOGGER.info(f"{key} scores: {my_dict[key]}")
+            for key in my_dict.keys():
+                matrix = my_dict[key]
+                #BWT, FWT = self.compute(key, matrix)
+                self.system_metric_info[SystemMetricType.Matrix.value][key] = matrix
+
         elif mode != 'multi-inference':
             dataset_files = self._split_dataset(splitting_dataset_times=rounds)
             # pylint: disable=C0103
@@ -214,6 +310,7 @@ def _inference(self, edge_task_index, data_index_file, rounds):
             os.makedirs(unseen_task_saved_dir)
 
         os.environ["INFERENCE_RESULT_DIR"] = output_dir
+        os.environ["OUTPUT_URL"] = output_dir
         os.environ["MODEL_URLS"] = f"{edge_task_index}"
 
         inference_dataset = self.dataset.load_data(data_index_file, "eval",
@@ -234,7 +331,7 @@ def _inference(self, edge_task_index, data_index_file, rounds):
         for i, _ in enumerate(inference_dataset.x):
             data = BaseDataSource(data_type="test")
             data.x = inference_dataset.x[i:(i + 1)]
-            res, is_unseen_task, _ = job.inference(data, **kwargs)
+            res, is_unseen_task, _ = job.inference_2(data, **kwargs)
             inference_results.append(res)
             if is_unseen_task:
                 unseen_tasks.append(inference_dataset.x[i])
@@ -257,7 +354,7 @@ def _train(self, cloud_task_index, train_dataset, rounds):
 
         os.environ["CLOUD_KB_INDEX"] = cloud_task_index
         os.environ["OUTPUT_URL"] = train_output_dir
-        if rounds <= 1:
+        if rounds < 1:
             os.environ["HAS_COMPLETED_INITIAL_TRAINING"] = 'False'
         else:
             os.environ["HAS_COMPLETED_INITIAL_TRAINING"] = 'True'

diff --git a/core/testenvmanager/dataset/dataset.py b/core/testenvmanager/dataset/dataset.py
@@ -168,6 +168,12 @@ def split_dataset(self, dataset_url, dataset_format, ratio, method="default",
                                               output_dir=output_dir,
                                               times=times)
 
+        if method == "hard-example_splitting":
+            return self._hard_example_splitting(dataset_url, dataset_format, ratio,
+                                              data_types=dataset_types,
+                                              output_dir=output_dir,
+                                              times=times)
+
         raise ValueError(f"dataset splitting method({method}) is not supported,"
                          f"currently, method supports 'default'.")
 
@@ -320,6 +326,44 @@ def _city_splitting(self, data_file, data_format, ratio,
 
         return data_files
 
+    def _hard_example_splitting(self, data_file, data_format, ratio,
+                              data_types=None, output_dir=None, times=1):
+        if not data_types:
+            data_types = ("train", "eval")
+
+        if not output_dir:
+            output_dir = tempfile.mkdtemp()
+
+        all_data = self._read_data_file(data_file, data_format)
+
+        data_files = []
+
+        all_num = len(all_data)
+        step = int(all_num / (times*2))
+        data_files.append((
+            self._get_dataset_file(all_data[:int((all_num * ratio)/2)], output_dir,
+                                       data_types[0], 0, data_format),
+            self._get_dataset_file(all_data[int((all_num * ratio)/2):int(all_num/2)], output_dir,
+                                       data_types[1], 0, data_format)))
+        index = 1
+        while index <= times:
+            if index == times:
+                new_dataset = all_data[int(all_num/2)+step*(index-1):]
+            else:
+                new_dataset = all_data[int(all_num/2)+step*(index-1): int(all_num/2)+step*index]
+
+            new_num = len(new_dataset)
+
+            data_files.append((
+                self._get_dataset_file(new_dataset[:int(new_num * ratio)], output_dir,
+                                       data_types[0], index, data_format),
+                self._get_dataset_file(new_dataset[int(new_num * ratio):], output_dir,
+                                       data_types[1], index, data_format)))
+
+            index += 1
+
+        return data_files
+
     @classmethod
     def load_data(cls, file: str, data_type: str, label=None, use_raw=False, feature_process=None):
         """

diff --git a/...entation-of-Cloud-Robotics/leaderboard-of-SAM-based-Edge-Cloud-Collaboration.md b/...entation-of-Cloud-Robotics/leaderboard-of-SAM-based-Edge-Cloud-Collaboration.md
@@ -0,0 +1,6 @@
+# Leaderboard of SAM-based Edge-Cloud Collaboration
+
+
+| rank |          algorithm          |      accuracy      |    Task_Avg_Acc    |     paradigm     | basemodel |    task_definition     |    task_allocation     | unseen_sample_recognition | basemodel-learning_rate | basemodel-epochs | task_definition-origins | task_allocation-origins | unseen_sample_recognition-threhold | time                |
+| :--: | :-------------------------: | :----------------: | :----------------: | :--------------: | :-------: | :--------------------: | :--------------------: | :-----------------------: | :---------------------: | :--------------: | :---------------------: | :---------------------: | :--------------------------------: | ------------------- |
+|  1   | sam_rfnet_lifelong_learning | 0.7052917006987501 | 0.6258875117354328 | lifelonglearning | BaseModel | TaskDefinitionByOrigin | TaskAllocationByOrigin |     HardSampleMining      |         0.0001          |        1         |   ['front', 'garden']   |   ['front', 'garden']   |                0.95                | 2023-08-24 12:43:19 |
diff --git a/...-in-semantic-segmentation-of-Cloud-Robotics/leaderboard-of-lifelong-learning.md b/...-in-semantic-segmentation-of-Cloud-Robotics/leaderboard-of-lifelong-learning.md
@@ -0,0 +1,5 @@
+# Leaderboard of lifelong learning
+
+| rank |        algorithm        |      accuracy      |         BWT         |         FWT         |     paradigm     | basemodel |    task_definition     |    task_allocation     | basemodel-learning_rate | basemodel-epochs | task_definition-origins | task_allocation-origins |        time         |
+| :--: | :---------------------: | :----------------: | :-----------------: | :-----------------: | :--------------: | :-------: | :--------------------: | :--------------------: | :---------------------: | :--------------: | :---------------------: | :---------------------: | :-----------------: |
+|  1   | rfnet_lifelong_learning | 0.5206033189775575 | 0.04239649121511442 | 0.02299711942108413 | lifelonglearning | BaseModel | TaskDefinitionByOrigin | TaskAllocationByOrigin |         0.0001          |        10        |   ['front', 'garden']   |   ['front', 'garden']   | 2023-05-24 15:07:57 |
diff --git a/...ption Systems Based on Edge-Cloud Collaboration with Large Foundation Models.md b/...ption Systems Based on Edge-Cloud Collaboration with Large Foundation Models.md
@@ -63,7 +63,7 @@ The overall workflow of the system is as follows:
 
 ![plugin-workflow](images/plugin-workflow.jpg)
 
-The Hard Example Mining Module will be implemented in the unknown sample recognition module of Ianvs. The Edge Inference Module will be implemented in the known sample inference module of Ianvs' edge-side knowledge management. The Cloud Inference Module will be implemented in the unknown task processing module of Ianvs' cloud-side knowledge management.
+The Hard Example Mining Module will be implemented in the unknown sample recognition module of Ianvs. The Edge Inference Module will be implemented in the known sample inference module of Ianvs' edge-side knowledge management. The Cloud Inference Module will be implemented in the unknown sample inference module. The Liflelong Training module will be implemented in the unknown task processing module of Ianvs' cloud-side knowledge management.
 
 ### 3.2 Implementation of SAM-based Semantic Segmentation
 

diff --git a/docs/proposals/algorithms/lifelong-learning/images/joint-inference.jpg b/docs/proposals/algorithms/lifelong-learning/images/joint-inference.jpg
diff --git a/docs/proposals/algorithms/lifelong-learning/images/plugin-workflow.jpg b/docs/proposals/algorithms/lifelong-learning/images/plugin-workflow.jpg
diff --git a/examples/resources/third_party/sedna-0.4.1-py3-none-any.whl b/examples/resources/third_party/sedna-0.4.1-py3-none-any.whl