Open source instance retrieval configs (#394)

Summary: Pull Request resolved: #394 1. Open source oxford and paris configs. 2. Improve ordering of config options to make more semantic. Reviewed By: prigoyal Differential Revision: D30145623 fbshipit-source-id: e6335b76069ee87998f140c3cddf8e2a9500cd8c
facebookresearch · Aug 10, 2021 · a4225a9 · a4225a9
1 parent a879fb3
commit a4225a9
Show file tree

Hide file tree

Showing 3 changed files with 139 additions and 23 deletions.
diff --git a/configs/config/benchmark/instance_retrieval/eval_resnet_1gpu_roxford.yaml b/configs/config/benchmark/instance_retrieval/eval_resnet_1gpu_roxford.yaml
@@ -0,0 +1,57 @@
+# @package _global_
+config:
+  DISTRIBUTED:
+    NUM_PROC_PER_NODE: 1
+  MODEL:
+    FEATURE_EVAL_SETTINGS:
+      EVAL_MODE_ON: True
+      FREEZE_TRUNK_ONLY: True
+      EXTRACT_TRUNK_FEATURES_ONLY: True
+      SHOULD_FLATTEN_FEATS: false
+      LINEAR_EVAL_FEAT_POOL_OPS_MAP: [
+          ["res5", ["Identity", []]],
+      ]
+    TRUNK:
+      NAME: resnet
+      RESNETS:
+        DEPTH: 50
+    WEIGHTS_INIT:
+      ############################# OSS model ####################################
+      PARAMS_FILE: <your model weights>
+      STATE_DICT_KEY_NAME: classy_state_dict
+      ############ example settings for torchvision model rn50 ###################
+      # PARAMS_FILE: https://download.pytorch.org/models/resnet50-19c8e357.pth
+      # STATE_DICT_KEY_NAME: ""
+      # APPEND_PREFIX: "trunk.base_model._feature_blocks."
+  IMG_RETRIEVAL:
+    ############################# Dataset Information #############################
+    # With RN50 trained supervised on Imagenet1k, we expect: (e: 72.1 / m: 53.04 / h: 22.57)
+    TRAIN_DATASET_NAME: rparis6k
+    EVAL_DATASET_NAME: roxford5k
+    DATASET_PATH: <enter dataset path>
+    # Number of training samples to use. -1 uses all the samples in the dataset.
+    NUM_TRAINING_SAMPLES: -1
+    # Number of query samples to use. -1 uses all the samples in the dataset.
+    NUM_QUERY_SAMPLES: -1
+    # Number of database samples to use. -1 uses all the samples in the dataset.
+    NUM_DATABASE_SAMPLES: -1
+    # Experiments w/ RN-50 have shown that cropping the bbx degrades performance.
+    # Sets data limits for the number of training, query, and database samples.
+    DEBUG_MODE: False
+    ############################# Feature Processing Hypers #############################
+    RESIZE_IMG: 1024
+    TRAIN_PCA_WHITENING: True
+    # rmac has yielded the best results.
+    FEATS_PROCESSING_TYPE: rmac
+    SPATIAL_LEVELS: 3
+    # valid only for GeM pooling of features
+    GEM_POOL_POWER: 4.0
+    # RN50 - res4
+    # N_PCA: 1024
+    # RN50 - res5
+    N_PCA: 2048
+    # Whether or not to crop the region of interest.
+    CROP_QUERY_ROI: False
+    # Whether or not to apply L2 norm after the features have been post-processed.
+    # Normalization is heavily recommended based on experiments run.
+    NORMALIZE_FEATURES: True
diff --git a/configs/config/benchmark/instance_retrieval/eval_resnet_1gpu_rparis.yaml b/configs/config/benchmark/instance_retrieval/eval_resnet_1gpu_rparis.yaml
@@ -0,0 +1,57 @@
+# @package _global_
+config:
+  DISTRIBUTED:
+    NUM_PROC_PER_NODE: 1
+  MODEL:
+    FEATURE_EVAL_SETTINGS:
+      EVAL_MODE_ON: True
+      FREEZE_TRUNK_ONLY: True
+      EXTRACT_TRUNK_FEATURES_ONLY: True
+      SHOULD_FLATTEN_FEATS: false
+      LINEAR_EVAL_FEAT_POOL_OPS_MAP: [
+          ["res5", ["Identity", []]],
+      ]
+    TRUNK:
+      NAME: resnet
+      RESNETS:
+        DEPTH: 50
+    WEIGHTS_INIT:
+      ############################# OSS model ####################################
+      PARAMS_FILE: <your model weights>
+      STATE_DICT_KEY_NAME: classy_state_dict
+      ############ example settings for torchvision model rn50 ###################
+      # PARAMS_FILE: https://download.pytorch.org/models/resnet50-19c8e357.pth
+      # STATE_DICT_KEY_NAME: ""
+      # APPEND_PREFIX: "trunk.base_model._feature_blocks."
+  IMG_RETRIEVAL:
+    ############################# Dataset Information #############################
+    # With RN50 trained supervised on Imagenet1k, we expect: (e: 85.87 / m: 69.31 / h: 45.12)
+    TRAIN_DATASET_NAME: roxford5k
+    EVAL_DATASET_NAME: rparis6k
+    DATASET_PATH: <enter dataset path>
+    # Number of training samples to use. -1 uses all the samples in the dataset.
+    NUM_TRAINING_SAMPLES: -1
+    # Number of query samples to use. -1 uses all the samples in the dataset.
+    NUM_QUERY_SAMPLES: -1
+    # Number of database samples to use. -1 uses all the samples in the dataset.
+    NUM_DATABASE_SAMPLES: -1
+    # Experiments w/ RN-50 have shown that cropping the bbx degrades performance.
+    # Sets data limits for the number of training, query, and database samples.
+    DEBUG_MODE: False
+    ############################# Feature Processing Hypers #############################
+    RESIZE_IMG: 1024
+    TRAIN_PCA_WHITENING: True
+    # rmac has yielded the best results.
+    FEATS_PROCESSING_TYPE: rmac
+    SPATIAL_LEVELS: 3
+    # valid only for GeM pooling of features
+    GEM_POOL_POWER: 4.0
+    # RN50 - res4
+    # N_PCA: 1024
+    # RN50 - res5
+    N_PCA: 2048
+    # Whether or not to crop the region of interest.
+    CROP_QUERY_ROI: False
+    # Whether or not to apply L2 norm after the features have been post-processed.
+    # Normalization is heavily recommended based on experiments run.
+    NORMALIZE_FEATURES: True
diff --git a/vissl/config/defaults.yaml b/vissl/config/defaults.yaml
@@ -1240,30 +1240,15 @@ config:
   # INSTANCE RETRIEVAL (benchmark)
   # ----------------------------------------------------------------------------------- #
   IMG_RETRIEVAL:
-    # Resize larger side of image to RESIZE_IMG pixels (e.g. 800)
-    RESIZE_IMG: 1024
-    # Use spatial levels (e.g. 3)
-    SPATIAL_LEVELS: 3
-    # output dimension of PCA
-    N_PCA: 512
-    # Data path and names of train/eval data: Oxford | Paris | whitening
-    DATASET_PATH: ""
+    ########################## Dataset Information #############################
     TRAIN_DATASET_NAME: "Oxford"
     EVAL_DATASET_NAME: "Paris"
-    # Path to the compute_ap binary to evaluate Oxford / Paris
-    EVAL_BINARY_PATH: ""
-    # Whether or not to save the retrieval ranking scores (metrics, rankings, similarity scores)
-    SAVE_RETRIEVAL_RANKINGS_SCORES: True
-    # Whether or not to save the features that were extracted
-    SAVE_FEATURES: False
-    # Whether to apply PCA/whitening or not
-    TRAIN_PCA_WHITENING: True
-    # gem | rmac | l2_norm
-    FEATS_PROCESSING_TYPE: ""
-    # valid only for GeM pooling of features. Note that GEM_POOL_POWER=1 equates to average pooling.
-    GEM_POOL_POWER: 4.0
+    # Data path and names of train/eval data: Oxford | Paris | whitening
+    DATASET_PATH: ""
     # valid only if we are training whitening on the whitening dataset
     WHITEN_IMG_LIST: ""
+    # Path to the compute_ap binary to evaluate Oxford / Paris
+    EVAL_BINARY_PATH: ""
     # Sets data limits for the number of training, query, and database samples.
     DEBUG_MODE: False
     # Number of training samples to use. -1 uses all the samples in the dataset.
@@ -1272,16 +1257,33 @@ config:
     NUM_QUERY_SAMPLES: -1
     # Number of database samples to use. -1 uses all the samples in the dataset.
     NUM_DATABASE_SAMPLES: -1
+    # Whether or not to use distractor images.
+    USE_DISTRACTORS: False
+    ######################## Features Processing Hypers #######################
+    # Resize larger side of image to RESIZE_IMG pixels (e.g. 800)
+    RESIZE_IMG: 1024
+    # Use spatial levels (e.g. 3)
+    SPATIAL_LEVELS: 3
+    # output dimension of PCA
+    N_PCA: 512
+    # Whether to apply PCA/whitening or not
+    TRAIN_PCA_WHITENING: True
+    # gem | rmac
+    FEATS_PROCESSING_TYPE: ""
+    # valid only for GeM pooling of features. Note that GEM_POOL_POWER=1 equates to average pooling.
+    GEM_POOL_POWER: 4.0
     # Whether or not to crop the query images with the given region of interests --
     # Relevant for Oxford, Paris, ROxford, and RParis datasets.
     # Our experiments with RN-50/rmac show that ROI cropping degrades performance.
     CROP_QUERY_ROI: False
     # Whether or not to apply L2 norm after the features have been post-processed.
     # Normalization is heavily recommended based on experiments run.
     NORMALIZE_FEATURES: True
-    # Whether or not to use distractor images.
-    USE_DISTRACTORS: False
-
+    ######################## Misc #######################
+    # Whether or not to save the retrieval ranking scores (metrics, rankings, similarity scores)
+    SAVE_RETRIEVAL_RANKINGS_SCORES: True
+    # Whether or not to save the features that were extracted
+    SAVE_FEATURES: False
 
   # ----------------------------------------------------------------------------------- #
   # K-NEAREST NEIGHBOR (benchmark)