diff --git a/habitat_sim/utils/data/data_extractor.py b/habitat_sim/utils/data/data_extractor.py new file mode 100644 index 0000000000..f3a33f8891 --- /dev/null +++ b/habitat_sim/utils/data/data_extractor.py @@ -0,0 +1,183 @@ +import math +import os + +import matplotlib.pyplot as plt +import numpy as np + +import habitat_sim +import habitat_sim.bindings as hsim +from habitat_sim.agent import AgentState +from habitat_sim.utils.data.pose_extractor import PoseExtractor + + +class ImageExtractor: + r"""Main class that extracts data by creating a simulator and generating a topdown map from which to + iteratively generate image data. + + :property scene_filepath: The location of the .glb file given to the simulator + :property labels: class labels of things to tather images of + :property cfg: configuration for simulator of type SimulatorConfiguration + :property sim: Simulator object + :property pixels_per_meter: Resolution of topdown map. 0.1 means each pixel in the topdown map + represents 0.1 x 0.1 meters in the coordinate system of the pathfinder + :property tdv: TopdownView object + :property topdown_view: The actual 2D array representing the topdown view + :property pose_extractor: PoseExtractor object + :property poses: list of camera poses gathered from pose_extractor + :property label_map: maps lable numbers on the topdown map to their name + :property out_name_to_sensor_name: maps name of output to the sensor same corresponding to that output + :property output: list of output names that the user wants e.g. ['rgba', 'depth'] + """ + + def __init__( + self, + scene_filepath, + labels=[0.0], + img_size=(512, 512), + output=["rgba"], + sim=None, + ): + self.scene_filepath = scene_filepath + self.labels = set(labels) + self.cfg = self._config_sim(self.scene_filepath, img_size) + + if sim is None: + sim = habitat_sim.Simulator(self.cfg) + else: + # If a sim is provided we have to make a new cfg + self.cfg = self._config_sim(sim.config.sim_cfg.scene.id, img_size) + sim.reconfigure(self.cfg) + + self.sim = sim + self.pixels_per_meter = 0.1 + ref_point = self._get_pathfinder_reference_point(self.sim.pathfinder) + self.tdv = TopdownView(self.sim, ref_point[1], self.pixels_per_meter) + self.topdown_view = self.tdv.topdown_view + + self.pose_extractor = PoseExtractor( + self.topdown_view, self.sim.pathfinder, self.pixels_per_meter + ) + self.poses = self.pose_extractor.extract_poses( + labels=self.labels + ) # list of poses + self.label_map = {0.0: "unnavigable", 1.0: "navigable"} + + # Configure the output each data sample + self.out_name_to_sensor_name = { + "rgba": "color_sensor", + "depth": "depth_sensor", + "semantic": "semantic_sensor", + } + self.output = output + + def __len__(self): + return len(self.poses) + + def __getitem__(self, idx): + if isinstance(idx, slice): + start, stop, step = idx.start, idx.stop, idx.step + if start is None: + start = 0 + if stop is None: + stop = len(self.poses) + if step is None: + step = 1 + + return [ + self.__getitem__(i) + for i in range(start, stop, step) + if i < len(self.poses) + ] + + pos, rot, label = self.poses[idx] + new_state = AgentState() + new_state.position = pos + new_state.rotation = rot + self.sim.agents[0].set_state(new_state) + obs = self.sim.get_sensor_observations() + sample = { + out_name: obs[self.out_name_to_sensor_name[out_name]] + for out_name in self.output + } + sample["label"] = self.label_map[label] + + return sample + + def close(self): + r"""Deletes the instance of the simulator. Necessary for instatiating a different ImageExtractor. + """ + if self.sim is not None: + self.sim.close() + del self.sim + self.sim = None + + def _config_sim(self, scene_filepath, img_size): + settings = { + "width": img_size[1], # Spatial resolution of the observations + "height": img_size[0], + "scene": scene_filepath, # Scene path + "default_agent": 0, + "sensor_height": 1.5, # Height of sensors in meters + "color_sensor": True, # RGBA sensor + "semantic_sensor": True, # Semantic sensor + "depth_sensor": True, # Depth sensor + "silent": True, + } + + sim_cfg = hsim.SimulatorConfiguration() + sim_cfg.enable_physics = False + sim_cfg.gpu_device_id = 0 + sim_cfg.scene.id = settings["scene"] + + # define default sensor parameters (see src/esp/Sensor/Sensor.h) + sensors = { + "color_sensor": { # active if sim_settings["color_sensor"] + "sensor_type": hsim.SensorType.COLOR, + "resolution": [settings["height"], settings["width"]], + "position": [0.0, settings["sensor_height"], 0.0], + }, + "depth_sensor": { # active if sim_settings["depth_sensor"] + "sensor_type": hsim.SensorType.DEPTH, + "resolution": [settings["height"], settings["width"]], + "position": [0.0, settings["sensor_height"], 0.0], + }, + "semantic_sensor": { # active if sim_settings["semantic_sensor"] + "sensor_type": hsim.SensorType.SEMANTIC, + "resolution": [settings["height"], settings["width"]], + "position": [0.0, settings["sensor_height"], 0.0], + }, + } + + # create sensor specifications + sensor_specs = [] + for sensor_uuid, sensor_params in sensors.items(): + if settings[sensor_uuid]: + sensor_spec = hsim.SensorSpec() + sensor_spec.uuid = sensor_uuid + sensor_spec.sensor_type = sensor_params["sensor_type"] + sensor_spec.resolution = sensor_params["resolution"] + sensor_spec.position = sensor_params["position"] + sensor_spec.gpu2gpu_transfer = False + sensor_specs.append(sensor_spec) + + # create agent specifications + agent_cfg = habitat_sim.agent.AgentConfiguration() + agent_cfg.sensor_specifications = sensor_specs + + return habitat_sim.Configuration(sim_cfg, [agent_cfg]) + + def _get_pathfinder_reference_point(self, pf): + bound1, bound2 = pf.get_bounds() + startw = min(bound1[0], bound2[0]) + starth = min(bound1[2], bound2[2]) + starty = pf.get_random_navigable_point()[ + 1 + ] # Can't think of a better way to get a valid y-axis value + return (startw, starty, starth) # width, y, height + + +class TopdownView(object): + def __init__(self, sim, height, pixels_per_meter=0.1): + self.topdown_view = sim.pathfinder.get_topdown_view( + pixels_per_meter, height + ).astype(np.float64) diff --git a/habitat_sim/utils/data/pose_extractor.py b/habitat_sim/utils/data/pose_extractor.py new file mode 100644 index 0000000000..671b604816 --- /dev/null +++ b/habitat_sim/utils/data/pose_extractor.py @@ -0,0 +1,192 @@ +import collections +import copy + +import matplotlib.pyplot as plt +import numpy as np + +import habitat_sim +from habitat_sim.utils.common import quat_from_two_vectors + + +class PoseExtractor(object): + r"""Class that takes in a topdown view and pathfinder and determines a list of reasonable camera poses + + :property topdown_view: 2D array representing topdown view of scene + :property pathfinder: the pathfinder from the Simulator object + :property pixels_per_meter: resolution of the topdown view (explained in ImageExtractor) + :property gridpoints: list of positions for the camera + :property dist: distance between each camera position + """ + + def __init__(self, topdown_view, pathfinder, pixels_per_meter=0.1): + self.topdown_view = topdown_view + self.pathfinder = pathfinder + self.pixels_per_meter = pixels_per_meter + self.gridpoints = None + + # Determine the physical spacing between each camera position + x, z = self.topdown_view.shape + self.dist = ( + min(x, z) // 10 + ) # This produces lots of simular images for small scenes. Perhaps a smarter solution exists + + def extract_poses(self, labels): + r"""Uses the topdown map to define positions in the scene from which to generate images. Returns + a list of poses, where each pose is (position, rotation, class label) for the camera. Currently + class label only supports 'unnavigable points', meaning the user cannot yet specify something + like 'chair' to obtain images of. + + :property labels: The labels to take images of (currently only supports unnavigable points) + """ + height, width = self.topdown_view.shape + n_gridpoints_width, n_gridpoints_height = ( + width // self.dist - 1, + height // self.dist - 1, + ) + self.gridpoints = [] + for h in range(n_gridpoints_height): + for w in range(n_gridpoints_width): + point = (self.dist + h * self.dist, self.dist + w * self.dist) + if self.valid_point(*point): + self.gridpoints.append(point) + + # Find the closest point of the target class to each gridpoint + poses = [] + self.cpis = [] + for point in self.gridpoints: + closest_point_of_interest, label = self._bfs(point, labels) + if closest_point_of_interest is None: + continue + + poses.append((point, closest_point_of_interest, label)) + self.cpis.append(closest_point_of_interest) + + # Convert from topdown map coordinate system to that of the pathfinder + startw, starty, starth = self._get_pathfinder_reference_point() + for i, pose in enumerate(poses): + pos, cpi, label = pose + r1, c1 = pos + r2, c2 = cpi + new_pos = np.array( + [ + startw + c1 * self.pixels_per_meter, + starty, + starth + r1 * self.pixels_per_meter, + ] + ) + new_cpi = np.array( + [ + startw + c2 * self.pixels_per_meter, + starty, + starth + r2 * self.pixels_per_meter, + ] + ) + cam_normal = new_cpi - new_pos + new_rot = self._compute_quat(cam_normal) + poses[i] = (new_pos, new_rot, label) + + return poses + + def valid_point(self, row, col): + r"""Whether a point is navigable + + :property row: row in the topdown view + :property col: col in the topdown view + """ + return self.topdown_view[row][col] == 1.0 + + def is_point_of_interest(self, point, labels): + r"""Whether one of the class labels exists at the specified point. + + :property point: the point to consider + :property labels: The labels to take images of (currently only supports unnavigable points) + """ + r, c = point + is_interesting = False + if self.topdown_view[r][c] in labels: + is_interesting = True + + return is_interesting, self.topdown_view[r][c] + + def _show_topdown_view(self, cmap="seismic_r", show_valid_points=False): + if show_valid_points: + topdown_view_copy = copy.copy(self.topdown_view) + for p in self.gridpoints: + r, c = p + topdown_view_copy[r][c] = 0.5 + + for cpi in self.cpis: + r, c = cpi + topdown_view_copy[r][c] = 0.65 + + plt.imshow(topdown_view_copy, cmap=cmap) + else: + plt.imshow(self.topdown_view, cmap=cmap) + + plt.show() + + def _get_pathfinder_reference_point(self): + bound1, bound2 = self.pathfinder.get_bounds() + startw = min(bound1[0], bound2[0]) + starth = min(bound1[2], bound2[2]) + starty = self.pathfinder.get_random_navigable_point()[ + 1 + ] # Can't think of a better way to get a valid y-axis value + return (startw, starty, starth) # width, y, height + + def _compute_quat(self, cam_normal): + """Rotations start from -z axis""" + return quat_from_two_vectors(habitat_sim.geo.FRONT, cam_normal) + + def _bfs(self, point, labels): + step = 3 # making this larger really speeds up BFS + + def get_neighbors(p): + r, c = p + return [ + (r - step, c - step), + (r - step, c), + (r - step, c + step), + (r, c - step), + (r, c + step), + (r + step, c - step), + (r + step, c), + (r + step, c + step), + ] + + point_row, point_col = point + bounding_box = [ + point_row - 2 * self.dist, + point_row + 2 * self.dist, + point_col - 2 * self.dist, + point_col + 2 * self.dist, + ] + in_bounds = ( + lambda row, col: bounding_box[0] <= row <= bounding_box[1] + and bounding_box[2] <= col <= bounding_box[3] + ) + is_valid = lambda row, col: 0 <= row < len( + self.topdown_view + ) and 0 <= col < len(self.topdown_view[0]) + visited = ( + set() + ) # Can use the topdown view as visited set to save space at cost of time to reset it for each bfs + q = collections.deque([(point, 0)]) + while q: + cur, layer = q.popleft() + if not in_bounds(*cur): # No point of interest found within bounding box + return None, None + + visited.add(cur) + is_point_of_interest, label = self.is_point_of_interest(cur, labels) + if is_point_of_interest: + if layer > self.dist / 2: + return cur, label + else: + return None, None + + for n in get_neighbors(cur): + if n not in visited and is_valid(*n): + q.append((n, layer + step)) + + return None, None diff --git a/pyproject.toml b/pyproject.toml index 6a5a79de57..dbd6131a6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.isort] skip_glob = ["*/deps/*", "*/build/*", "*/obselete/*"] -known_third_party = ["PIL", "attr", "conf", "demo_runner", "hypothesis", "magnum", "numba", "numpy", "pytest", "quaternion", "scipy", "settings", "setuptools", "tqdm"] +known_third_party = ["PIL", "attr", "conf", "demo_runner", "hypothesis", "magnum", "matplotlib", "numba", "numpy", "pytest", "quaternion", "scipy", "settings", "setuptools", "torch", "tqdm"] multi_line_output = 3 force_grid_wrap = false line_length = 88 diff --git a/requirements.txt b/requirements.txt index 6b3dbecc5c..90dab2c92e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ numpy-quaternion pillow scipy>=1.3.0 tqdm +matplotlib diff --git a/src/esp/bindings/ShortestPathBindings.cpp b/src/esp/bindings/ShortestPathBindings.cpp index ab5dab00cd..b5b0670022 100644 --- a/src/esp/bindings/ShortestPathBindings.cpp +++ b/src/esp/bindings/ShortestPathBindings.cpp @@ -72,6 +72,9 @@ void initShortestPathBindings(py::module& m) { .def(py::init(&PathFinder::create<>)) .def("get_bounds", &PathFinder::bounds) .def("seed", &PathFinder::seed) + .def("get_topdown_view", &PathFinder::getTopDownView, + R"(Returns the topdown view of the PathFinder's navmesh.)", + "pixelsPerMeter"_a, "height"_a) .def("get_random_navigable_point", &PathFinder::getRandomNavigablePoint) .def("find_path", py::overload_cast(&PathFinder::findPath), "path"_a) diff --git a/src/esp/nav/PathFinder.cpp b/src/esp/nav/PathFinder.cpp index cc37809992..ab521805b4 100644 --- a/src/esp/nav/PathFinder.cpp +++ b/src/esp/nav/PathFinder.cpp @@ -229,6 +229,10 @@ struct PathFinder::Impl { std::pair bounds() const { return bounds_; }; + Eigen::Matrix getTopDownView( + const float pixelsPerMeter, + const float height); + private: struct NavMeshDeleter { void operator()(dtNavMesh* mesh) { dtFreeNavMesh(mesh); } @@ -1128,6 +1132,38 @@ bool PathFinder::Impl::isNavigable(const vec3f& pt, return true; } +typedef Eigen::Matrix MatrixXb; + +Eigen::Matrix +PathFinder::Impl::getTopDownView(const float pixelsPerMeter, + const float height) { + std::pair mapBounds = bounds(); + vec3f bound1 = mapBounds.first; + vec3f bound2 = mapBounds.second; + + float xspan = std::abs(bound1[0] - bound2[0]); + float zspan = std::abs(bound1[2] - bound2[2]); + int xResolution = xspan / pixelsPerMeter; + int zResolution = zspan / pixelsPerMeter; + float startx = fmin(bound1[0], bound2[0]); + float startz = fmin(bound1[2], bound2[2]); + MatrixXb topdownMap(zResolution, xResolution); + + float curz = startz; + float curx = startx; + for (int h = 0; h < zResolution; h++) { + for (int w = 0; w < xResolution; w++) { + vec3f point = vec3f(curx, height, curz); + topdownMap(h, w) = isNavigable(point, 0.5); + curx = curx + pixelsPerMeter; + } + curz = curz + pixelsPerMeter; + curx = startx; + } + + return topdownMap; +} + PathFinder::PathFinder() : pimpl_{spimpl::make_unique_impl()} {}; bool PathFinder::build(const NavMeshSettings& bs, @@ -1222,5 +1258,11 @@ std::pair PathFinder::bounds() const { return pimpl_->bounds(); } +Eigen::Matrix PathFinder::getTopDownView( + const float pixelsPerMeter, + const float height) { + return pimpl_->getTopDownView(pixelsPerMeter, height); +} + } // namespace nav } // namespace esp diff --git a/src/esp/nav/PathFinder.h b/src/esp/nav/PathFinder.h index 7c22493b24..00546b2b01 100644 --- a/src/esp/nav/PathFinder.h +++ b/src/esp/nav/PathFinder.h @@ -319,6 +319,10 @@ class PathFinder { */ std::pair bounds() const; + Eigen::Matrix getTopDownView( + const float pixelsPerMeter, + const float height); + ESP_SMART_POINTERS_WITH_UNIQUE_PIMPL(PathFinder); }; diff --git a/tests/gt_data/skokloster-castle-topdown-view.npy b/tests/gt_data/skokloster-castle-topdown-view.npy new file mode 100644 index 0000000000..80710319c0 Binary files /dev/null and b/tests/gt_data/skokloster-castle-topdown-view.npy differ diff --git a/tests/test_data_extraction.py b/tests/test_data_extraction.py new file mode 100644 index 0000000000..75ad48dbc6 --- /dev/null +++ b/tests/test_data_extraction.py @@ -0,0 +1,50 @@ +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import DataLoader, Dataset + +from habitat_sim.utils.data.data_extractor import ImageExtractor, TopdownView + + +class TrivialNet(nn.Module): + def __init__(self): + super(TrivialNet, self).__init__() + + def forward(self, x): + x = F.relu(x) + return x + + +class MyDataset(Dataset): + def __init__(self, extractor): + self.extractor = extractor + + def __len__(self): + return len(self.extractor) + + def __getitem__(self, idx): + return self.extractor[idx] + + +def test_topdown_view(sim): + tdv = TopdownView(sim, height=0.0, pixels_per_meter=0.1) + topdown_view = tdv.topdown_view + + +def test_data_extractor_end_to_end(sim): + # Path is relative to simulator.py + scene_filepath = "" + extractor = ImageExtractor(scene_filepath, labels=[0.0], img_size=(32, 32), sim=sim) + dataset = MyDataset(extractor) + dataloader = DataLoader(dataset, batch_size=3) + net = TrivialNet() + + # Run data through network + for i, sample_batch in enumerate(dataloader): + img, label = sample_batch["rgba"], sample_batch["label"] + img = img.permute(0, 3, 2, 1).float() + out = net(img)