Skip to content

Commit

Permalink
Merge branch 'main' into aeojnlajnef
Browse files Browse the repository at this point in the history
  • Loading branch information
pmeier authored Aug 2, 2023
2 parents 4b02136 + cab9fba commit a7383a6
Show file tree
Hide file tree
Showing 48 changed files with 3,478 additions and 1,577 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/run-clang-format.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
DEVNULL = open(os.devnull, "wb")


DEFAULT_EXTENSIONS = "c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx,cu"
DEFAULT_EXTENSIONS = "c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx,cu,mm"


class ExitStatus:
Expand Down
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set(CMAKE_CXX_STANDARD 17)
file(STRINGS version.txt TORCHVISION_VERSION)

option(WITH_CUDA "Enable CUDA support" OFF)
option(WITH_MPS "Enable MPS support" OFF)
option(WITH_PNG "Enable features requiring LibPNG." ON)
option(WITH_JPEG "Enable features requiring LibJPEG." ON)
option(USE_PYTHON "Link to Python when building" OFF)
Expand All @@ -15,6 +16,11 @@ if(WITH_CUDA)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
endif()

if(WITH_MPS)
enable_language(OBJC OBJCXX)
add_definitions(-DWITH_MPS)
endif()

find_package(Torch REQUIRED)

if (WITH_PNG)
Expand Down Expand Up @@ -79,6 +85,9 @@ list(APPEND ALLOW_LISTED ${TVCPP} ${TVCPP}/io/image ${TVCPP}/io/image/cpu ${TVCP
if(WITH_CUDA)
list(APPEND ALLOW_LISTED ${TVCPP}/ops/cuda ${TVCPP}/ops/autocast)
endif()
if(WITH_MPS)
list(APPEND ALLOW_LISTED ${TVCPP}/ops/mps)
endif()

FOREACH(DIR ${ALLOW_LISTED})
file(GLOB ALL_SOURCES ${ALL_SOURCES} ${DIR}/*.*)
Expand Down
10 changes: 5 additions & 5 deletions gallery/plot_cutmix_mixup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
How to use CutMix and MixUp
===========================
:class:`~torchvision.transforms.v2.Cutmix` and
:class:`~torchvision.transforms.v2.Mixup` are popular augmentation strategies
:class:`~torchvision.transforms.v2.CutMix` and
:class:`~torchvision.transforms.v2.MixUp` are popular augmentation strategies
that can improve classification accuracy.
These transforms are slightly different from the rest of the Torchvision
Expand Down Expand Up @@ -79,8 +79,8 @@

dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

cutmix = v2.Cutmix(num_classes=NUM_CLASSES)
mixup = v2.Mixup(num_classes=NUM_CLASSES)
cutmix = v2.CutMix(num_classes=NUM_CLASSES)
mixup = v2.MixUp(num_classes=NUM_CLASSES)
cutmix_or_mixup = v2.RandomChoice([cutmix, mixup])

for images, labels in dataloader:
Expand Down Expand Up @@ -148,5 +148,5 @@ def labels_getter(batch):
return batch["target"]["classes"]


out = v2.Cutmix(num_classes=NUM_CLASSES, labels_getter=labels_getter)(batch)
out = v2.CutMix(num_classes=NUM_CLASSES, labels_getter=labels_getter)(batch)
print(f"{out['imgs'].shape = }, {out['target']['classes'].shape = }")
18 changes: 9 additions & 9 deletions gallery/plot_datapoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from torchvision.transforms.v2 import functional as F


########################################################################################################################
# %%
# What are datapoints?
# --------------------
#
Expand All @@ -36,7 +36,7 @@
assert image.data_ptr() == tensor.data_ptr()


########################################################################################################################
# %%
# Under the hood, they are needed in :mod:`torchvision.transforms.v2` to correctly dispatch to the appropriate function
# for the input data.
#
Expand All @@ -59,22 +59,22 @@
print(image)


########################################################################################################################
# %%
# Similar to other PyTorch creations ops, the constructor also takes the ``dtype``, ``device``, and ``requires_grad``
# parameters.

float_image = datapoints.Image([[[0, 1], [1, 0]]], dtype=torch.float32, requires_grad=True)
print(float_image)


########################################################################################################################
# %%
# In addition, :class:`~torchvision.datapoints.Image` and :class:`~torchvision.datapoints.Mask` also take a
# :class:`PIL.Image.Image` directly:

image = datapoints.Image(PIL.Image.open("assets/astronaut.jpg"))
print(image.shape, image.dtype)

########################################################################################################################
# %%
# In general, the datapoints can also store additional metadata that complements the underlying tensor. For example,
# :class:`~torchvision.datapoints.BoundingBoxes` stores the coordinate format as well as the spatial size of the
# corresponding image alongside the actual values:
Expand All @@ -85,7 +85,7 @@
print(bounding_box)


########################################################################################################################
# %%
# Do I have to wrap the output of the datasets myself?
# ----------------------------------------------------
#
Expand Down Expand Up @@ -120,7 +120,7 @@ def __getitem__(self, item):

...

########################################################################################################################
# %%
# 2. Perform the wrapping inside a custom transformation at the beginning of your pipeline:


Expand All @@ -144,7 +144,7 @@ def get_transform(train):
transforms.append(T.PILToTensor())
...

########################################################################################################################
# %%
# .. note::
#
# If both :class:`~torchvision.datapoints.BoundingBoxes`'es and :class:`~torchvision.datapoints.Mask`'s are included in
Expand All @@ -171,7 +171,7 @@ def get_transform(train):

assert isinstance(new_image, torch.Tensor) and not isinstance(new_image, datapoints.Image)

########################################################################################################################
# %%
# .. note::
#
# This "unwrapping" behaviour is something we're actively seeking feedback on. If you find this surprising or if you
Expand Down
16 changes: 8 additions & 8 deletions gallery/plot_optical_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def plot(imgs, **imshow_kwargs):

plt.tight_layout()

###################################
# %%
# Reading Videos Using Torchvision
# --------------------------------
# We will first read a video using :func:`~torchvision.io.read_video`.
Expand All @@ -62,7 +62,7 @@ def plot(imgs, **imshow_kwargs):
video_path = Path(tempfile.mkdtemp()) / "basketball.mp4"
_ = urlretrieve(video_url, video_path)

#########################
# %%
# :func:`~torchvision.io.read_video` returns the video frames, audio frames and
# the metadata associated with the video. In our case, we only need the video
# frames.
Expand All @@ -79,7 +79,7 @@ def plot(imgs, **imshow_kwargs):

plot(img1_batch)

#########################
# %%
# The RAFT model accepts RGB images. We first get the frames from
# :func:`~torchvision.io.read_video` and resize them to ensure their dimensions
# are divisible by 8. Note that we explicitly use ``antialias=False``, because
Expand All @@ -104,7 +104,7 @@ def preprocess(img1_batch, img2_batch):
print(f"shape = {img1_batch.shape}, dtype = {img1_batch.dtype}")


####################################
# %%
# Estimating Optical flow using RAFT
# ----------------------------------
# We will use our RAFT implementation from
Expand All @@ -125,7 +125,7 @@ def preprocess(img1_batch, img2_batch):
print(f"type = {type(list_of_flows)}")
print(f"length = {len(list_of_flows)} = number of iterations of the model")

####################################
# %%
# The RAFT model outputs lists of predicted flows where each entry is a
# (N, 2, H, W) batch of predicted flows that corresponds to a given "iteration"
# in the model. For more details on the iterative nature of the model, please
Expand All @@ -144,7 +144,7 @@ def preprocess(img1_batch, img2_batch):
print(f"min = {predicted_flows.min()}, max = {predicted_flows.max()}")


####################################
# %%
# Visualizing predicted flows
# ---------------------------
# Torchvision provides the :func:`~torchvision.utils.flow_to_image` utlity to
Expand All @@ -166,7 +166,7 @@ def preprocess(img1_batch, img2_batch):
grid = [[img1, flow_img] for (img1, flow_img) in zip(img1_batch, flow_imgs)]
plot(grid)

####################################
# %%
# Bonus: Creating GIFs of predicted flows
# ---------------------------------------
# In the example above we have only shown the predicted flows of 2 pairs of
Expand All @@ -187,7 +187,7 @@ def preprocess(img1_batch, img2_batch):
# output_folder = "/tmp/" # Update this to the folder of your choice
# write_jpeg(flow_img, output_folder + f"predicted_flow_{i}.jpg")

####################################
# %%
# Once the .jpg flow images are saved, you can convert them into a video or a
# GIF using ffmpeg with e.g.:
#
Expand Down
20 changes: 10 additions & 10 deletions gallery/plot_repurposing_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def show(imgs):
axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])


####################################
# %%
# Masks
# -----
# In tasks like instance and panoptic segmentation, masks are commonly defined, and are defined by this package,
Expand All @@ -53,7 +53,7 @@ def show(imgs):
# A nice property of masks is that they can be easily repurposed to be used in methods to solve a variety of object
# localization tasks.

####################################
# %%
# Converting Masks to Bounding Boxes
# -----------------------------------------------
# For example, the :func:`~torchvision.ops.masks_to_boxes` operation can be used to
Expand All @@ -70,7 +70,7 @@ def show(imgs):
mask = read_image(mask_path)


#########################
# %%
# Here the masks are represented as a PNG Image, with floating point values.
# Each pixel is encoded as different colors, with 0 being background.
# Notice that the spatial dimensions of image and mask match.
Expand All @@ -79,7 +79,7 @@ def show(imgs):
print(img.size())
print(mask)

############################
# %%

# We get the unique colors, as these would be the object ids.
obj_ids = torch.unique(mask)
Expand All @@ -91,7 +91,7 @@ def show(imgs):
# Note that this snippet would work as well if the masks were float values instead of ints.
masks = mask == obj_ids[:, None, None]

########################
# %%
# Now the masks are a boolean tensor.
# The first dimension in this case 3 and denotes the number of instances: there are 3 people in the image.
# The other two dimensions are height and width, which are equal to the dimensions of the image.
Expand All @@ -101,7 +101,7 @@ def show(imgs):
print(masks.size())
print(masks)

####################################
# %%
# Let us visualize an image and plot its corresponding segmentation masks.
# We will use the :func:`~torchvision.utils.draw_segmentation_masks` to draw the segmentation masks.

Expand All @@ -113,7 +113,7 @@ def show(imgs):

show(drawn_masks)

####################################
# %%
# To convert the boolean masks into bounding boxes.
# We will use the :func:`~torchvision.ops.masks_to_boxes` from the torchvision.ops module
# It returns the boxes in ``(xmin, ymin, xmax, ymax)`` format.
Expand All @@ -124,7 +124,7 @@ def show(imgs):
print(boxes.size())
print(boxes)

####################################
# %%
# As the shape denotes, there are 3 boxes and in ``(xmin, ymin, xmax, ymax)`` format.
# These can be visualized very easily with :func:`~torchvision.utils.draw_bounding_boxes` utility
# provided in :ref:`torchvision.utils <utils>`.
Expand All @@ -134,7 +134,7 @@ def show(imgs):
drawn_boxes = draw_bounding_boxes(img, boxes, colors="red")
show(drawn_boxes)

###################################
# %%
# These boxes can now directly be used by detection models in torchvision.
# Here is demo with a Faster R-CNN model loaded from
# :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn`
Expand All @@ -153,7 +153,7 @@ def show(imgs):
detection_outputs = model(img.unsqueeze(0), [target])


####################################
# %%
# Converting Segmentation Dataset to Detection Dataset
# ----------------------------------------------------
#
Expand Down
12 changes: 6 additions & 6 deletions gallery/plot_scripted_tensor_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ def show(imgs):
axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])


####################################
# %%
# The :func:`~torchvision.io.read_image` function allows to read an image and
# directly load it as a tensor

dog1 = read_image(str(Path('assets') / 'dog1.jpg'))
dog2 = read_image(str(Path('assets') / 'dog2.jpg'))
show([dog1, dog2])

####################################
# %%
# Transforming images on GPU
# --------------------------
# Most transforms natively support tensors on top of PIL images (to visualize
Expand All @@ -76,7 +76,7 @@ def show(imgs):
transformed_dog2 = transforms(dog2)
show([transformed_dog1, transformed_dog2])

####################################
# %%
# Scriptable transforms for easier deployment via torchscript
# -----------------------------------------------------------
# We now show how to combine image transformations and a model forward pass,
Expand All @@ -103,7 +103,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
return y_pred.argmax(dim=1)


####################################
# %%
# Now, let's define scripted and non-scripted instances of ``Predictor`` and
# apply it on multiple tensor images of the same size

Expand All @@ -115,7 +115,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
res = predictor(batch)
res_scripted = scripted_predictor(batch)

####################################
# %%
# We can verify that the prediction of the scripted and non-scripted models are
# the same:

Expand All @@ -128,7 +128,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
assert pred == pred_scripted
print(f"Prediction for Dog {i + 1}: {labels[str(pred.item())]}")

####################################
# %%
# Since the model is scripted, it can be easily dumped on disk and re-used

import tempfile
Expand Down
Loading

0 comments on commit a7383a6

Please sign in to comment.