Migrate VISSL from fvcore to ioPath ensuring backwards compatibility (#…

…443) Summary: Pull Request resolved: #443 - Migrating to ioPath (long overdue and resolves ioPath warnings in logs during runtime - add an inaturalist dataset config to test the images load fine Reviewed By: QuentinDuval Differential Revision: D31473900 fbshipit-source-id: a4d8a438cd4270f7577fceab6a64b982b9dcb628
facebookresearch · Oct 8, 2021 · 83d859f · 83d859f
1 parent f2361ec
commit 83d859f
Show file tree

Hide file tree

Showing 42 changed files with 201 additions and 201 deletions.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -18,6 +18,7 @@ parameterized==0.7.4
 tabulate
 pycocotools>=2.0.1
 fvcore
+iopath==0.1.9
 fairscale
 git+git://github.com/facebookresearch/ClassyVision.git
 https://download.pytorch.org/whl/cpu/torch-1.5.0%2Bcpu-cp37-cp37m-linux_x86_64.whl

diff --git a/extra_scripts/convert_caffe2_to_torchvision_resnet.py b/extra_scripts/convert_caffe2_to_torchvision_resnet.py
@@ -19,7 +19,7 @@
 from collections import OrderedDict
 
 import torch
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 
 
 # create the logger
@@ -124,7 +124,7 @@ def _rename_weights_for_resnet(weights, stage_names):
 
 
 def _load_c2_pickled_weights(file_path):
-    with PathManager.open(file_path, "rb") as f:
+    with g_pathmgr.open(file_path, "rb") as f:
         data = pickle.load(f, encoding="latin1")
     if "blobs" in data:
         weights = data["blobs"]

diff --git a/extra_scripts/convert_caffe2_to_vissl_alexnet.py b/extra_scripts/convert_caffe2_to_vissl_alexnet.py
@@ -23,7 +23,7 @@
 
 import numpy as np
 import torch
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 
 
 # create the logger
@@ -58,7 +58,7 @@ def remove_jigsaw_names(data):
 
 
 def _load_c2_pickled_weights(file_path):
-    with PathManager.open(file_path, "rb") as f:
+    with g_pathmgr.open(file_path, "rb") as f:
         data = pickle.load(f, encoding="latin1")
     if "blobs" in data:
         weights = data["blobs"]
@@ -71,7 +71,7 @@ def _load_c2_weights(file_path):
     if file_path.endswith("pkl"):
         weights = _load_c2_pickled_weights(file_path)
     elif file_path.endswith("npy"):
-        with PathManager.open(file_path, "rb") as fopen:
+        with g_pathmgr.open(file_path, "rb") as fopen:
             weights = np.load(fopen, allow_pickle=True, encoding="latin1")[()]
     return weights
 

diff --git a/extra_scripts/convert_folder_to_filelist.py b/extra_scripts/convert_folder_to_filelist.py
@@ -21,7 +21,7 @@
 import argparse
 import os
 
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 from vissl.utils.env import setup_path_manager
 from vissl.utils.io import save_file
 
@@ -58,21 +58,21 @@ def get_argument_parser():
 
     setup_path_manager()
 
-    splits = PathManager.ls(args.input)
+    splits = g_pathmgr.ls(args.input)
     print(f"The following splits are found: { ','.join(splits) }")
 
     dataset_summary = {}
 
     for split in ["train", "trainval", "val", "test"]:
-        if not PathManager.exists(os.path.join(args.input, split)):
+        if not g_pathmgr.exists(os.path.join(args.input, split)):
             continue
 
         dataset_summary[split] = {}
         img_paths = []
         img_labels = []
 
         split_path = os.path.join(args.input, split)
-        label_paths = PathManager.ls(split_path)
+        label_paths = g_pathmgr.ls(split_path)
         dataset_summary[split]["labels"] = label_paths
         dataset_summary[split]["num_labels"] = len(label_paths)
         print(f"{len(label_paths)} classes found for { split } split.")
@@ -81,7 +81,7 @@ def get_argument_parser():
         # Populate the img_paths and img_labels based on torchvision image folder file structure.
         for label in label_paths:
             label_path = os.path.join(split_path, label)
-            images = PathManager.ls(os.path.join(split_path, label))
+            images = g_pathmgr.ls(os.path.join(split_path, label))
             print(f"{len(images)} examples found for { label }, { split }.")
             total_split_examples += len(images)
             for image in images:
@@ -94,17 +94,17 @@ def get_argument_parser():
         # Remove the split .npy filelist if they exist and resave them..
         image_path = os.path.join(args.output, f"{split}_images.npy")
 
-        PathManager.rm(image_path)
+        g_pathmgr.rm(image_path)
         save_file(img_paths, image_path)
         print(f"Saved { image_path }")
 
         label_path = os.path.join(args.output, f"{split}_labels.npy")
 
-        PathManager.rm(label_path)
+        g_pathmgr.rm(label_path)
         save_file(img_labels, label_path)
         print(f"Saved { label_path }")
 
     # Save dataset summary.
     dataset_summary_path = os.path.join(args.output, "dataset_summary.json")
-    PathManager.rm(dataset_summary_path)
+    g_pathmgr.rm(dataset_summary_path)
     save_file(dataset_summary, dataset_summary_path)
diff --git a/extra_scripts/convert_sharded_checkpoint.py b/extra_scripts/convert_sharded_checkpoint.py
@@ -20,7 +20,7 @@
 import enum
 import os
 
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 from vissl.utils.checkpoint import CheckpointFormatConverter
 from vissl.utils.env import setup_path_manager
 from vissl.utils.io import makedir
@@ -41,7 +41,7 @@ class CheckpointType(enum.Enum):
 
 
 def convert_checkpoint(input_path: str, output_path: str, output_type: str):
-    assert PathManager.exists(
+    assert g_pathmgr.exists(
         input_path
     ), f"Checkpoint input path: {input_path} not found."
 

diff --git a/extra_scripts/convert_vissl_to_torchvision.py b/extra_scripts/convert_vissl_to_torchvision.py
@@ -12,7 +12,7 @@
 import sys
 
 import torch
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 from vissl.utils.checkpoint import replace_module_prefix
 from vissl.utils.io import is_url
 
@@ -30,7 +30,7 @@
 
 
 def convert_and_save_model(args, replace_prefix):
-    assert PathManager.exists(args.output_dir), "Output directory does NOT exist"
+    assert g_pathmgr.exists(args.output_dir), "Output directory does NOT exist"
 
     # load the model
     model_path = args.model_url_or_file

diff --git a/extra_scripts/create_low_shot_samples.py b/extra_scripts/create_low_shot_samples.py
@@ -11,7 +11,7 @@
 import random
 
 import numpy as np
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 from vissl.utils.io import load_file, save_file
 
 
@@ -171,7 +171,7 @@ def main():
     )
     opts = parser.parse_args()
 
-    assert PathManager.exists(opts.targets_data_file), "Target file not found. Abort"
+    assert g_pathmgr.exists(opts.targets_data_file), "Target file not found. Abort"
     targets = load_file(opts.targets_data_file)
     sample_ids = list(range(1, 1 + opts.num_samples))
 

diff --git a/extra_scripts/datasets/create_coco_data_files.py b/extra_scripts/datasets/create_coco_data_files.py
@@ -15,7 +15,7 @@
 import sys
 
 import numpy as np
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 from pycocotools.coco import COCO
 
 
@@ -53,8 +53,8 @@ def get_valid_objs(entry, objs):
 
 
 def get_imgs_labels_info(split, json_file, args):
-    assert PathManager.exists(json_file), "Data source does not exist. Abort"
-    json_data = json.load(PathManager.open(json_file, "r"))
+    assert g_pathmgr.exists(json_file), "Data source does not exist. Abort"
+    json_data = json.load(g_pathmgr.open(json_file, "r"))
     image_index = [x["id"] for x in json_data["images"]]
     coco = COCO(json_file)
 

diff --git a/extra_scripts/datasets/create_imagenet_a_data_files.py b/extra_scripts/datasets/create_imagenet_a_data_files.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import torchvision.datasets as datasets
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 from tqdm import tqdm
 from vissl.utils.download import download_and_extract_archive
 from vissl.utils.io import cleanup_dir
@@ -44,7 +44,7 @@ def get_argument_parser():
 
 def remove_file_name_whitespace(input_path: str):
     """
-    Remove the whitespace in the file names for better compatibility with PathManager.
+    Remove the whitespace in the file names for better compatibility with g_pathmgr.
     """
     for class_folder_path in os.listdir(input_path):
         # All necessary folders start with n.
@@ -82,7 +82,7 @@ class ImagenetTargetMapper:
     )
 
     def __init__(self):
-        with PathManager.open(self.IMAGENET_TARGETS_URL) as f:
+        with g_pathmgr.open(self.IMAGENET_TARGETS_URL) as f:
             imagenet_classes = [line.strip() for line in f.readlines()]
             imagenet_classes.sort()
         self.label_to_id = {label: i for i, label in enumerate(imagenet_classes)}
@@ -143,7 +143,7 @@ def cleanup_unused_files(output_path: str):
         download_datasets(args.input)
 
     input_path = os.path.join(args.input, "imagenet-a")
-    assert PathManager.exists(input_path), "Input data path does not exist"
+    assert g_pathmgr.exists(input_path), "Input data path does not exist"
     remove_file_name_whitespace(input_path)
     create_imagenet_test_files(input_path, args.output)
 

diff --git a/extra_scripts/datasets/create_imagenet_data_files.py b/extra_scripts/datasets/create_imagenet_data_files.py
@@ -16,7 +16,7 @@
 import sys
 
 import numpy as np
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 
 
 # initiate the logger
@@ -36,7 +36,7 @@ def get_all_classes(data_dir):
 
 
 def get_images_labels_info(split, args):
-    assert PathManager.exists(args.data_source_dir), "Data source NOT found. Abort!"
+    assert g_pathmgr.exists(args.data_source_dir), "Data source NOT found. Abort!"
     data_dir = f"{args.data_source_dir}/{split}"
     class_idx = get_all_classes(data_dir)
     logger.info("Number of classes in {} data: {}".format(split, len(class_idx)))
@@ -103,7 +103,7 @@ def main():
             json_out_path = f"{args.output_dir}/{partition}_targets.json"
             import json
 
-            with PathManager.open(json_out_path, "w") as fp:
+            with g_pathmgr.open(json_out_path, "w") as fp:
                 json.dump(output_dict, fp)
             logger.info("Saved Json to: {}".format(json_out_path))
     logger.info("DONE!")

diff --git a/extra_scripts/datasets/create_imagenet_r_data_files.py b/extra_scripts/datasets/create_imagenet_r_data_files.py
@@ -10,7 +10,7 @@
     create_imagenet_test_files,
     remove_file_name_whitespace,
 )
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 from vissl.utils.download import download_and_extract_archive
 from vissl.utils.io import cleanup_dir
 
@@ -77,7 +77,7 @@ def cleanup_unused_files(output_path: str):
 
     dataset_name = "imagenet-r"
     input_path = os.path.join(args.input, dataset_name)
-    assert PathManager.exists(input_path), "Input data path does not exist"
+    assert g_pathmgr.exists(input_path), "Input data path does not exist"
     remove_file_name_whitespace(input_path)
     create_imagenet_test_files(input_path, args.output)
 

diff --git a/extra_scripts/datasets/create_imagenet_sketch_data_files.py b/extra_scripts/datasets/create_imagenet_sketch_data_files.py
@@ -10,7 +10,7 @@
     create_imagenet_test_files,
     remove_file_name_whitespace,
 )
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 from torchvision.datasets.utils import extract_archive
 from vissl.utils.download import download_google_drive_url
 from vissl.utils.io import cleanup_dir
@@ -82,7 +82,7 @@ def cleanup_unused_files(output_path: str):
         download_datasets(args.input)
 
     input_path = os.path.join(args.input, "imagenet_sketch")
-    assert PathManager.exists(input_path), "Input data path does not exist"
+    assert g_pathmgr.exists(input_path), "Input data path does not exist"
     remove_file_name_whitespace(input_path)
     create_imagenet_test_files(input_path, args.output)
 

diff --git a/extra_scripts/datasets/create_inaturalist2018_data_files.py b/extra_scripts/datasets/create_inaturalist2018_data_files.py
@@ -20,7 +20,7 @@
 import sys
 
 import numpy as np
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 from vissl.utils.download import download_and_extract_archive
 from vissl.utils.io import save_file
 
@@ -111,10 +111,10 @@ def main():
     args = parser.parse_args()
 
     # Make sure that the input and output directories exist.
-    assert PathManager.exists(
+    assert g_pathmgr.exists(
         args.input_dir_path
     ), "Data input directory not found! Please create the directory"
-    assert PathManager.exists(
+    assert g_pathmgr.exists(
         args.output_dir_path
     ), "Data output directory not found! Please create the directory"
 

diff --git a/extra_scripts/datasets/create_sun397_data_files.py b/extra_scripts/datasets/create_sun397_data_files.py
@@ -10,7 +10,7 @@
 from typing import Any, List
 
 import numpy as np
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 from tqdm import tqdm
 from vissl.utils.download import download_and_extract_archive
 
@@ -95,7 +95,7 @@ def create_sun397_disk_filelist_dataset(input_path: str, output_path: str, seed:
     by allocating 70% of labels to "train", 10% to "val" and 20% to "test".
     """
     random.seed(seed)
-    PathManager.mkdirs(output_path)
+    g_pathmgr.mkdirs(output_path)
 
     # List all the available classes in SUN397 and their path
     image_folder = os.path.join(input_path, "SUN397")
@@ -129,10 +129,10 @@ def create_sun397_disk_filelist_dataset(input_path: str, output_path: str, seed:
     # Save each split
     for split, samples in splits_data.items():
         image_output_path = os.path.join(output_path, f"{split}_images.npy")
-        with PathManager.open(image_output_path, mode="wb") as f:
+        with g_pathmgr.open(image_output_path, mode="wb") as f:
             np.save(f, np.array(samples.image_paths))
         label_output_path = os.path.join(output_path, f"{split}_labels.npy")
-        with PathManager.open(label_output_path, mode="wb") as f:
+        with g_pathmgr.open(label_output_path, mode="wb") as f:
             np.save(f, np.array(samples.image_labels))
 
 

diff --git a/extra_scripts/datasets/create_voc_data_files.py b/extra_scripts/datasets/create_voc_data_files.py
@@ -16,7 +16,7 @@
 from glob import glob
 
 import numpy as np
-from fvcore.common.file_io import PathManager
+from iopath.common.file_io import g_pathmgr
 
 
 # initiate the logger
@@ -39,7 +39,7 @@ def validate_files(input_files):
 
 def get_data_files(split, args):
     data_dir = f"{args.data_source_dir}/ImageSets/Main"
-    assert PathManager.exists(data_dir), "Data: {} doesn't exist".format(data_dir)
+    assert g_pathmgr.exists(data_dir), "Data: {} doesn't exist".format(data_dir)
     test_data_files = glob(os.path.join(data_dir, "*_test.txt"))
     test_data_files = validate_files(test_data_files)
     if args.separate_partitions > 0:
@@ -68,7 +68,7 @@ def get_data_files(split, args):
 
 
 def get_images_labels_info(split, args):
-    assert PathManager.exists(args.data_source_dir), "Data source NOT found. Abort"
+    assert g_pathmgr.exists(args.data_source_dir), "Data source NOT found. Abort"
 
     data_files = get_data_files(split, args)
     # we will construct a map for image name to the vector of -1, 0, 1
@@ -77,7 +77,7 @@ def get_images_labels_info(split, args):
     for cls_num, data_path in enumerate(sorted(data_files)):
         # for this class, we have images and each image will have label
         # 1, -1, 0 -> present, not present, ignore respectively as in VOC data.
-        with PathManager.open(data_path, "r") as fopen:
+        with g_pathmgr.open(data_path, "r") as fopen:
             for line in fopen:
                 try:
                     img_name, orig_label = line.strip().split()
@@ -174,7 +174,7 @@ def main():
             json_out_path = f"{args.output_dir}/{partition}_targets.json"
             import json
 
-            with PathManager.open(json_out_path, "w") as fp:
+            with g_pathmgr.open(json_out_path, "w") as fp:
                 json.dump(output_dict, fp)
             logger.info("Saved Json to: {}".format(json_out_path))
     logger.info("DONE!")

diff --git a/requirements.txt b/requirements.txt
@@ -3,6 +3,7 @@
 cython==0.29.22
 fairscale@https://github.com/facebookresearch/fairscale/tarball/df7db85cef7f9c30a5b821007754b96eb1f977b6
 fvcore==0.1.3.post20210317
+iopath==0.1.9
 hydra-core==1.0.7
 numpy==1.19.5
 parameterized==0.7.4