Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
Migrate VISSL from fvcore to ioPath ensuring backwards compatibility (#…
Browse files Browse the repository at this point in the history
…443)

Summary:
Pull Request resolved: #443

- Migrating to ioPath (long overdue and resolves ioPath warnings in logs during runtime
- add an inaturalist dataset config to test the images load fine

Reviewed By: QuentinDuval

Differential Revision: D31473900

fbshipit-source-id: a4d8a438cd4270f7577fceab6a64b982b9dcb628
  • Loading branch information
prigoyal authored and facebook-github-bot committed Oct 8, 2021
1 parent f2361ec commit 83d859f
Show file tree
Hide file tree
Showing 42 changed files with 201 additions and 201 deletions.
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ parameterized==0.7.4
tabulate
pycocotools>=2.0.1
fvcore
iopath==0.1.9
fairscale
git+git://github.com/facebookresearch/ClassyVision.git
https://download.pytorch.org/whl/cpu/torch-1.5.0%2Bcpu-cp37-cp37m-linux_x86_64.whl
Expand Down
4 changes: 2 additions & 2 deletions extra_scripts/convert_caffe2_to_torchvision_resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from collections import OrderedDict

import torch
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr


# create the logger
Expand Down Expand Up @@ -124,7 +124,7 @@ def _rename_weights_for_resnet(weights, stage_names):


def _load_c2_pickled_weights(file_path):
with PathManager.open(file_path, "rb") as f:
with g_pathmgr.open(file_path, "rb") as f:
data = pickle.load(f, encoding="latin1")
if "blobs" in data:
weights = data["blobs"]
Expand Down
6 changes: 3 additions & 3 deletions extra_scripts/convert_caffe2_to_vissl_alexnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

import numpy as np
import torch
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr


# create the logger
Expand Down Expand Up @@ -58,7 +58,7 @@ def remove_jigsaw_names(data):


def _load_c2_pickled_weights(file_path):
with PathManager.open(file_path, "rb") as f:
with g_pathmgr.open(file_path, "rb") as f:
data = pickle.load(f, encoding="latin1")
if "blobs" in data:
weights = data["blobs"]
Expand All @@ -71,7 +71,7 @@ def _load_c2_weights(file_path):
if file_path.endswith("pkl"):
weights = _load_c2_pickled_weights(file_path)
elif file_path.endswith("npy"):
with PathManager.open(file_path, "rb") as fopen:
with g_pathmgr.open(file_path, "rb") as fopen:
weights = np.load(fopen, allow_pickle=True, encoding="latin1")[()]
return weights

Expand Down
16 changes: 8 additions & 8 deletions extra_scripts/convert_folder_to_filelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import argparse
import os

from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr
from vissl.utils.env import setup_path_manager
from vissl.utils.io import save_file

Expand Down Expand Up @@ -58,21 +58,21 @@ def get_argument_parser():

setup_path_manager()

splits = PathManager.ls(args.input)
splits = g_pathmgr.ls(args.input)
print(f"The following splits are found: { ','.join(splits) }")

dataset_summary = {}

for split in ["train", "trainval", "val", "test"]:
if not PathManager.exists(os.path.join(args.input, split)):
if not g_pathmgr.exists(os.path.join(args.input, split)):
continue

dataset_summary[split] = {}
img_paths = []
img_labels = []

split_path = os.path.join(args.input, split)
label_paths = PathManager.ls(split_path)
label_paths = g_pathmgr.ls(split_path)
dataset_summary[split]["labels"] = label_paths
dataset_summary[split]["num_labels"] = len(label_paths)
print(f"{len(label_paths)} classes found for { split } split.")
Expand All @@ -81,7 +81,7 @@ def get_argument_parser():
# Populate the img_paths and img_labels based on torchvision image folder file structure.
for label in label_paths:
label_path = os.path.join(split_path, label)
images = PathManager.ls(os.path.join(split_path, label))
images = g_pathmgr.ls(os.path.join(split_path, label))
print(f"{len(images)} examples found for { label }, { split }.")
total_split_examples += len(images)
for image in images:
Expand All @@ -94,17 +94,17 @@ def get_argument_parser():
# Remove the split .npy filelist if they exist and resave them..
image_path = os.path.join(args.output, f"{split}_images.npy")

PathManager.rm(image_path)
g_pathmgr.rm(image_path)
save_file(img_paths, image_path)
print(f"Saved { image_path }")

label_path = os.path.join(args.output, f"{split}_labels.npy")

PathManager.rm(label_path)
g_pathmgr.rm(label_path)
save_file(img_labels, label_path)
print(f"Saved { label_path }")

# Save dataset summary.
dataset_summary_path = os.path.join(args.output, "dataset_summary.json")
PathManager.rm(dataset_summary_path)
g_pathmgr.rm(dataset_summary_path)
save_file(dataset_summary, dataset_summary_path)
4 changes: 2 additions & 2 deletions extra_scripts/convert_sharded_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import enum
import os

from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr
from vissl.utils.checkpoint import CheckpointFormatConverter
from vissl.utils.env import setup_path_manager
from vissl.utils.io import makedir
Expand All @@ -41,7 +41,7 @@ class CheckpointType(enum.Enum):


def convert_checkpoint(input_path: str, output_path: str, output_type: str):
assert PathManager.exists(
assert g_pathmgr.exists(
input_path
), f"Checkpoint input path: {input_path} not found."

Expand Down
4 changes: 2 additions & 2 deletions extra_scripts/convert_vissl_to_torchvision.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import sys

import torch
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr
from vissl.utils.checkpoint import replace_module_prefix
from vissl.utils.io import is_url

Expand All @@ -30,7 +30,7 @@


def convert_and_save_model(args, replace_prefix):
assert PathManager.exists(args.output_dir), "Output directory does NOT exist"
assert g_pathmgr.exists(args.output_dir), "Output directory does NOT exist"

# load the model
model_path = args.model_url_or_file
Expand Down
4 changes: 2 additions & 2 deletions extra_scripts/create_low_shot_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import random

import numpy as np
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr
from vissl.utils.io import load_file, save_file


Expand Down Expand Up @@ -171,7 +171,7 @@ def main():
)
opts = parser.parse_args()

assert PathManager.exists(opts.targets_data_file), "Target file not found. Abort"
assert g_pathmgr.exists(opts.targets_data_file), "Target file not found. Abort"
targets = load_file(opts.targets_data_file)
sample_ids = list(range(1, 1 + opts.num_samples))

Expand Down
6 changes: 3 additions & 3 deletions extra_scripts/datasets/create_coco_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import sys

import numpy as np
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr
from pycocotools.coco import COCO


Expand Down Expand Up @@ -53,8 +53,8 @@ def get_valid_objs(entry, objs):


def get_imgs_labels_info(split, json_file, args):
assert PathManager.exists(json_file), "Data source does not exist. Abort"
json_data = json.load(PathManager.open(json_file, "r"))
assert g_pathmgr.exists(json_file), "Data source does not exist. Abort"
json_data = json.load(g_pathmgr.open(json_file, "r"))
image_index = [x["id"] for x in json_data["images"]]
coco = COCO(json_file)

Expand Down
8 changes: 4 additions & 4 deletions extra_scripts/datasets/create_imagenet_a_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np
import torchvision.datasets as datasets
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr
from tqdm import tqdm
from vissl.utils.download import download_and_extract_archive
from vissl.utils.io import cleanup_dir
Expand Down Expand Up @@ -44,7 +44,7 @@ def get_argument_parser():

def remove_file_name_whitespace(input_path: str):
"""
Remove the whitespace in the file names for better compatibility with PathManager.
Remove the whitespace in the file names for better compatibility with g_pathmgr.
"""
for class_folder_path in os.listdir(input_path):
# All necessary folders start with n.
Expand Down Expand Up @@ -82,7 +82,7 @@ class ImagenetTargetMapper:
)

def __init__(self):
with PathManager.open(self.IMAGENET_TARGETS_URL) as f:
with g_pathmgr.open(self.IMAGENET_TARGETS_URL) as f:
imagenet_classes = [line.strip() for line in f.readlines()]
imagenet_classes.sort()
self.label_to_id = {label: i for i, label in enumerate(imagenet_classes)}
Expand Down Expand Up @@ -143,7 +143,7 @@ def cleanup_unused_files(output_path: str):
download_datasets(args.input)

input_path = os.path.join(args.input, "imagenet-a")
assert PathManager.exists(input_path), "Input data path does not exist"
assert g_pathmgr.exists(input_path), "Input data path does not exist"
remove_file_name_whitespace(input_path)
create_imagenet_test_files(input_path, args.output)

Expand Down
6 changes: 3 additions & 3 deletions extra_scripts/datasets/create_imagenet_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import sys

import numpy as np
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr


# initiate the logger
Expand All @@ -36,7 +36,7 @@ def get_all_classes(data_dir):


def get_images_labels_info(split, args):
assert PathManager.exists(args.data_source_dir), "Data source NOT found. Abort!"
assert g_pathmgr.exists(args.data_source_dir), "Data source NOT found. Abort!"
data_dir = f"{args.data_source_dir}/{split}"
class_idx = get_all_classes(data_dir)
logger.info("Number of classes in {} data: {}".format(split, len(class_idx)))
Expand Down Expand Up @@ -103,7 +103,7 @@ def main():
json_out_path = f"{args.output_dir}/{partition}_targets.json"
import json

with PathManager.open(json_out_path, "w") as fp:
with g_pathmgr.open(json_out_path, "w") as fp:
json.dump(output_dict, fp)
logger.info("Saved Json to: {}".format(json_out_path))
logger.info("DONE!")
Expand Down
4 changes: 2 additions & 2 deletions extra_scripts/datasets/create_imagenet_r_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
create_imagenet_test_files,
remove_file_name_whitespace,
)
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr
from vissl.utils.download import download_and_extract_archive
from vissl.utils.io import cleanup_dir

Expand Down Expand Up @@ -77,7 +77,7 @@ def cleanup_unused_files(output_path: str):

dataset_name = "imagenet-r"
input_path = os.path.join(args.input, dataset_name)
assert PathManager.exists(input_path), "Input data path does not exist"
assert g_pathmgr.exists(input_path), "Input data path does not exist"
remove_file_name_whitespace(input_path)
create_imagenet_test_files(input_path, args.output)

Expand Down
4 changes: 2 additions & 2 deletions extra_scripts/datasets/create_imagenet_sketch_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
create_imagenet_test_files,
remove_file_name_whitespace,
)
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr
from torchvision.datasets.utils import extract_archive
from vissl.utils.download import download_google_drive_url
from vissl.utils.io import cleanup_dir
Expand Down Expand Up @@ -82,7 +82,7 @@ def cleanup_unused_files(output_path: str):
download_datasets(args.input)

input_path = os.path.join(args.input, "imagenet_sketch")
assert PathManager.exists(input_path), "Input data path does not exist"
assert g_pathmgr.exists(input_path), "Input data path does not exist"
remove_file_name_whitespace(input_path)
create_imagenet_test_files(input_path, args.output)

Expand Down
6 changes: 3 additions & 3 deletions extra_scripts/datasets/create_inaturalist2018_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import sys

import numpy as np
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr
from vissl.utils.download import download_and_extract_archive
from vissl.utils.io import save_file

Expand Down Expand Up @@ -111,10 +111,10 @@ def main():
args = parser.parse_args()

# Make sure that the input and output directories exist.
assert PathManager.exists(
assert g_pathmgr.exists(
args.input_dir_path
), "Data input directory not found! Please create the directory"
assert PathManager.exists(
assert g_pathmgr.exists(
args.output_dir_path
), "Data output directory not found! Please create the directory"

Expand Down
8 changes: 4 additions & 4 deletions extra_scripts/datasets/create_sun397_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing import Any, List

import numpy as np
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr
from tqdm import tqdm
from vissl.utils.download import download_and_extract_archive

Expand Down Expand Up @@ -95,7 +95,7 @@ def create_sun397_disk_filelist_dataset(input_path: str, output_path: str, seed:
by allocating 70% of labels to "train", 10% to "val" and 20% to "test".
"""
random.seed(seed)
PathManager.mkdirs(output_path)
g_pathmgr.mkdirs(output_path)

# List all the available classes in SUN397 and their path
image_folder = os.path.join(input_path, "SUN397")
Expand Down Expand Up @@ -129,10 +129,10 @@ def create_sun397_disk_filelist_dataset(input_path: str, output_path: str, seed:
# Save each split
for split, samples in splits_data.items():
image_output_path = os.path.join(output_path, f"{split}_images.npy")
with PathManager.open(image_output_path, mode="wb") as f:
with g_pathmgr.open(image_output_path, mode="wb") as f:
np.save(f, np.array(samples.image_paths))
label_output_path = os.path.join(output_path, f"{split}_labels.npy")
with PathManager.open(label_output_path, mode="wb") as f:
with g_pathmgr.open(label_output_path, mode="wb") as f:
np.save(f, np.array(samples.image_labels))


Expand Down
10 changes: 5 additions & 5 deletions extra_scripts/datasets/create_voc_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from glob import glob

import numpy as np
from fvcore.common.file_io import PathManager
from iopath.common.file_io import g_pathmgr


# initiate the logger
Expand All @@ -39,7 +39,7 @@ def validate_files(input_files):

def get_data_files(split, args):
data_dir = f"{args.data_source_dir}/ImageSets/Main"
assert PathManager.exists(data_dir), "Data: {} doesn't exist".format(data_dir)
assert g_pathmgr.exists(data_dir), "Data: {} doesn't exist".format(data_dir)
test_data_files = glob(os.path.join(data_dir, "*_test.txt"))
test_data_files = validate_files(test_data_files)
if args.separate_partitions > 0:
Expand Down Expand Up @@ -68,7 +68,7 @@ def get_data_files(split, args):


def get_images_labels_info(split, args):
assert PathManager.exists(args.data_source_dir), "Data source NOT found. Abort"
assert g_pathmgr.exists(args.data_source_dir), "Data source NOT found. Abort"

data_files = get_data_files(split, args)
# we will construct a map for image name to the vector of -1, 0, 1
Expand All @@ -77,7 +77,7 @@ def get_images_labels_info(split, args):
for cls_num, data_path in enumerate(sorted(data_files)):
# for this class, we have images and each image will have label
# 1, -1, 0 -> present, not present, ignore respectively as in VOC data.
with PathManager.open(data_path, "r") as fopen:
with g_pathmgr.open(data_path, "r") as fopen:
for line in fopen:
try:
img_name, orig_label = line.strip().split()
Expand Down Expand Up @@ -174,7 +174,7 @@ def main():
json_out_path = f"{args.output_dir}/{partition}_targets.json"
import json

with PathManager.open(json_out_path, "w") as fp:
with g_pathmgr.open(json_out_path, "w") as fp:
json.dump(output_dict, fp)
logger.info("Saved Json to: {}".format(json_out_path))
logger.info("DONE!")
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
cython==0.29.22
fairscale@https://github.com/facebookresearch/fairscale/tarball/df7db85cef7f9c30a5b821007754b96eb1f977b6
fvcore==0.1.3.post20210317
iopath==0.1.9
hydra-core==1.0.7
numpy==1.19.5
parameterized==0.7.4
Expand Down
Loading

0 comments on commit 83d859f

Please sign in to comment.