DDS: various improvements (#3573)

- If a DDS pixel format header contains "this is a normal map" flag set, then handle that properly (#3525): - For a DXT5, this means it's a "DXT5nm" format. Normal X & Y components are in green and alpha channel respectively. Compute Z component from those, and the result is a 3-channel tangent space normal map. - For ATI2/BC5 format that also has a "normal map" flag set, compute the Z component automatically too, and result is a 3-channel image. However a bunch of ATI2/BC5 normal maps out there in the wild do not have that bit set, so also have a global "dds:bc5normal" attribute to optionally turn this on. - Fix support of R10G10B10A2 format - was producing garbage data due to code assumption that truncation down to 8 bits never needs to happen. - Fix bit expansion ("too dark") of very low-bit formats, e.g. R3G3B2. Code was simply shifting the bits, instead of properly repeating the high bits into new low bit positions (i.e. a 3-bit "111" needs to become an 8-bit "11111111" and not a "11100000"). - Support "RXGB" (DXT5, with red & alpha swapped) as well as "BC4U" and "BC5U" formats (the latter two are just BC4 and BC5, produced by some older tools). - Support alpha-only (e.g. A8) formats. All this mostly prompted by doing DDS improvements in Blender, and wanting to get Cycles renderer (which uses OIIO) to the same feature parity. See https://developer.blender.org/T101405 and https://developer.blender.org/D16087.
AcademySoftwareFoundation · Oct 2, 2022 · 5dd9fee · 5dd9fee
1 parent 490d51b
commit 5dd9fee
Show file tree

Hide file tree

Showing 6 changed files with 170 additions and 61 deletions.
diff --git a/src/dds.imageio/dds_pvt.h b/src/dds.imageio/dds_pvt.h
@@ -19,6 +19,10 @@ namespace DDS_pvt {
 #define DDS_4CC_ATI1 DDS_MAKE4CC('A', 'T', 'I', '1')
 #define DDS_4CC_ATI2 DDS_MAKE4CC('A', 'T', 'I', '2')
 #define DDS_4CC_DX10 DDS_MAKE4CC('D', 'X', '1', '0')
+#define DDS_4CC_RXGB DDS_MAKE4CC('R', 'X', 'G', 'B')
+#define DDS_4CC_BC4U DDS_MAKE4CC('B', 'C', '4', 'U')
+#define DDS_4CC_BC5U DDS_MAKE4CC('B', 'C', '5', 'U')
+
 #define DDS_FORMAT_BC4_UNORM 80
 #define DDS_FORMAT_BC5_UNORM 83
 #define DDS_FORMAT_BC6H_UF16 95
@@ -43,24 +47,23 @@ enum class Compression {
 /// images.
 ///
 enum {
-    DDS_PF_ALPHA     = 0x00000001,  ///< image has alpha channel
-    DDS_PF_FOURCC    = 0x00000004,  ///< image is compressed
-    DDS_PF_LUMINANCE = 0x00020000,  ///< image has luminance data
-    DDS_PF_RGB       = 0x00000040,  ///< image has RGB data
-    DDS_PF_YUV       = 0x00000200   ///< image has YUV data
+    DDS_PF_ALPHA     = 0x00000001,   ///< image has alpha channel
+    DDS_PF_ALPHAONLY = 0x00000002,   ///< image has only the alpha channel
+    DDS_PF_FOURCC    = 0x00000004,   ///< image is compressed
+    DDS_PF_LUMINANCE = 0x00020000,   ///< image has luminance data
+    DDS_PF_RGB       = 0x00000040,   ///< image has RGB data
+    DDS_PF_YUV       = 0x00000200,   ///< image has YUV data
+    DDS_PF_NORMAL    = 0x80000000u,  ///< image is a tangent space normal map
 };
 
 /// DDS pixel format structure.
 ///
 typedef struct {
-    uint32_t size;    ///< structure size, must be 32
-    uint32_t flags;   ///< flags to indicate valid fields
-    uint32_t fourCC;  ///< compression four-character code
-    uint32_t bpp;     ///< bits per pixel
-    uint32_t rmask;   ///< bitmask for the red channel
-    uint32_t gmask;   ///< bitmask for the green channel
-    uint32_t bmask;   ///< bitmask for the blue channel
-    uint32_t amask;   ///< bitmask for the alpha channel
+    uint32_t size;      ///< structure size, must be 32
+    uint32_t flags;     ///< flags to indicate valid fields
+    uint32_t fourCC;    ///< compression four-character code
+    uint32_t bpp;       ///< bits per pixel
+    uint32_t masks[4];  ///< bitmasks for the r,g,b,a channels
 } dds_pixformat;
 
 /// DDS caps flags, field 1.

diff --git a/src/dds.imageio/ddsinput.cpp b/src/dds.imageio/ddsinput.cpp
@@ -60,13 +60,11 @@ class DDSInput final : public ImageInput {
     std::vector<unsigned char> m_buf;  ///< Buffer the image pixels
     int m_subimage;
     int m_miplevel;
-    int m_nchans;            ///< Number of colour channels in image
-    int m_nfaces;            ///< Number of cube map sides in image
-    int m_Bpp;               ///< Number of bytes per pixel
-    int m_redL, m_redR;      ///< Bit shifts to extract red channel
-    int m_greenL, m_greenR;  ///< Bit shifts to extract green channel
-    int m_blueL, m_blueR;    ///< Bit shifts to extract blue channel
-    int m_alphaL, m_alphaR;  ///< Bit shifts to extract alpha channel
+    int m_nchans;               ///< Number of colour channels in image
+    int m_nfaces;               ///< Number of cube map sides in image
+    int m_Bpp;                  ///< Number of bytes per pixel
+    uint32_t m_BitCounts[4];    ///< Bit counts in r,g,b,a channels
+    uint32_t m_RightShifts[4];  ///< Shifts to extract r,g,b,a channels
     Compression m_compression = Compression::None;
     dds_header m_dds;  ///< DDS header
     dds_header_dx10 m_dx10;
@@ -91,7 +89,8 @@ class DDSInput final : public ImageInput {
 
     /// Helper function: calculate bit shifts to properly extract channel data
     ///
-    inline void calc_shifts(int mask, int& left, int& right);
+    inline static void calc_shifts(uint32_t mask, uint32_t& count,
+                                   uint32_t& right);
 
     /// Helper function: performs the actual file seeking.
     ///
@@ -111,8 +110,10 @@ GetBaseType(Compression cmp)
 }
 
 static int
-GetChannelCount(Compression cmp)
+GetChannelCount(Compression cmp, bool isNormal)
 {
+    if (isNormal)
+        return 3;
     if (cmp == Compression::BC4)
         return 1;
     if (cmp == Compression::BC5)
@@ -136,15 +137,61 @@ GetStorageRequirements(size_t width, size_t height, Compression cmp)
     return blockCount * GetBlockSize(cmp);
 }
 
+static uint8_t
+ComputeNormalZ(uint8_t x, uint8_t y)
+{
+    float nx  = 2 * (x / 255.0f) - 1;
+    float ny  = 2 * (y / 255.0f) - 1;
+    float nz  = 0.0f;
+    float nz2 = 1 - nx * nx - ny * ny;
+    if (nz2 > 0) {
+        nz = sqrtf(nz2);
+    }
+    int z = int(255.0f * (nz + 1) / 2.0f);
+    if (z < 0)
+        z = 0;
+    if (z > 255)
+        z = 255;
+    return z;
+}
+
+static void
+ComputeNormalRG(uint8_t rgba[kBlockSize * kBlockSize * 4])
+{
+    // expand from RG into RGB, computing B from RG
+    for (int i = kBlockSize * kBlockSize - 1; i >= 0; --i) {
+        uint8_t x       = rgba[i * 2 + 0];
+        uint8_t y       = rgba[i * 2 + 1];
+        rgba[i * 3 + 0] = x;
+        rgba[i * 3 + 1] = y;
+        rgba[i * 3 + 2] = ComputeNormalZ(x, y);
+    }
+}
+
+static void
+ComputeNormalAG(uint8_t rgba[kBlockSize * kBlockSize * 4])
+{
+    // contract from RGBA (R & B unused) to RGB, computing B from GA
+    for (int i = 0; i < kBlockSize * kBlockSize; ++i) {
+        uint8_t x       = rgba[i * 4 + 3];
+        uint8_t y       = rgba[i * 4 + 1];
+        rgba[i * 3 + 0] = x;
+        rgba[i * 3 + 1] = y;
+        rgba[i * 3 + 2] = ComputeNormalZ(x, y);
+    }
+}
+
+
 static void
 DecompressImage(uint8_t* rgba, int width, int height, void const* blocks,
-                Compression cmp)
+                Compression cmp, const dds_pixformat& pixelFormat)
 {
     uint8_t rgbai[kBlockSize * kBlockSize * 4];
     uint16_t rgbh[kBlockSize * kBlockSize * 3];
     const uint8_t* sourceBlock = reinterpret_cast<const uint8_t*>(blocks);
     const size_t blockSize     = GetBlockSize(cmp);
-    const int channelCount     = GetChannelCount(cmp);
+    const int channelCount     = GetChannelCount(cmp,
+                                             pixelFormat.flags & DDS_PF_NORMAL);
     for (int y = 0; y < height; y += kBlockSize) {
         for (int x = 0; x < width; x += kBlockSize) {
             // decompress the BCn block
@@ -178,6 +225,25 @@ DecompressImage(uint8_t* rgba, int width, int height, void const* blocks,
             }
             sourceBlock += blockSize;
 
+            /* Swap R & A for RXGB format case. */
+            if (cmp == Compression::DXT5
+                && pixelFormat.fourCC == DDS_4CC_RXGB) {
+                for (int i = 0; i < 16; ++i) {
+                    uint8_t r        = rgbai[i * 4 + 0];
+                    uint8_t a        = rgbai[i * 4 + 3];
+                    rgbai[i * 4 + 0] = a;
+                    rgbai[i * 4 + 3] = r;
+                }
+            }
+            /* Convert into full normal map if needed. */
+            else if (pixelFormat.flags & DDS_PF_NORMAL) {
+                if (cmp == Compression::BC5) {
+                    ComputeNormalRG(rgbai);
+                } else if (cmp == Compression::DXT5) {
+                    ComputeNormalAG(rgbai);
+                }
+            }
+
             // write the pixels into the destination image location
             if (cmp == Compression::BC6HU || cmp == Compression::BC6HS) {
                 // HDR formats: half
@@ -326,9 +392,9 @@ DDSInput::open(const std::string& name, ImageSpec& newspec)
         || !(m_dds.flags & DDS_HEIGHT) || !m_dds.height
         || ((m_dds.flags & DDS_DEPTH) && !m_dds.depth)
         || (!(m_dds.fmt.flags & DDS_PF_FOURCC)
-            && !((m_dds.fmt.flags & DDS_PF_RGB)
-                 | (m_dds.fmt.flags & DDS_PF_LUMINANCE)
-                 | (m_dds.fmt.flags & DDS_PF_ALPHA)))) {
+            && !(m_dds.fmt.flags
+                 & (DDS_PF_RGB | DDS_PF_LUMINANCE | DDS_PF_ALPHA
+                    | DDS_PF_ALPHAONLY)))) {
         errorf("Image with no data");
         return false;
     }
@@ -348,8 +414,14 @@ DDSInput::open(const std::string& name, ImageSpec& newspec)
         case DDS_4CC_DXT3: m_compression = Compression::DXT3; break;
         case DDS_4CC_DXT4: m_compression = Compression::DXT4; break;
         case DDS_4CC_DXT5: m_compression = Compression::DXT5; break;
+        case DDS_4CC_RXGB:
+            m_compression = Compression::DXT5;
+            m_dds.fmt.flags &= ~DDS_PF_NORMAL;
+            break;
         case DDS_4CC_ATI1: m_compression = Compression::BC4; break;
         case DDS_4CC_ATI2: m_compression = Compression::BC5; break;
+        case DDS_4CC_BC4U: m_compression = Compression::BC4; break;
+        case DDS_4CC_BC5U: m_compression = Compression::BC5; break;
         case DDS_4CC_DX10: {
             switch (m_dx10.dxgiFormat) {
             case DDS_FORMAT_BC4_UNORM: m_compression = Compression::BC4; break;
@@ -370,19 +442,27 @@ DDSInput::open(const std::string& name, ImageSpec& newspec)
         }
     }
 
+    // treat BC5 as normal maps if global attribute is set
+    if ((m_compression == Compression::BC5)
+        && OIIO::get_int_attribute("dds:bc5normal")) {
+        m_dds.fmt.flags |= DDS_PF_NORMAL;
+    }
+
     // determine the number of channels we have
     if (m_compression != Compression::None) {
-        m_nchans = GetChannelCount(m_compression);
+        m_nchans = GetChannelCount(m_compression,
+                                   m_dds.fmt.flags & DDS_PF_NORMAL);
     } else {
-        m_nchans = ((m_dds.fmt.flags & DDS_PF_LUMINANCE) ? 1 : 3)
+        m_nchans = ((m_dds.fmt.flags & (DDS_PF_LUMINANCE | DDS_PF_ALPHAONLY))
+                        ? 1
+                        : 3)
                    + ((m_dds.fmt.flags & DDS_PF_ALPHA) ? 1 : 0);
         // also calculate bytes per pixel and the bit shifts
         m_Bpp = (m_dds.fmt.bpp + 7) >> 3;
         if (!(m_dds.fmt.flags & DDS_PF_LUMINANCE)) {
-            calc_shifts(m_dds.fmt.rmask, m_redL, m_redR);
-            calc_shifts(m_dds.fmt.gmask, m_greenL, m_greenR);
-            calc_shifts(m_dds.fmt.bmask, m_blueL, m_blueR);
-            calc_shifts(m_dds.fmt.amask, m_alphaL, m_alphaR);
+            for (int i = 0; i < 4; ++i)
+                calc_shifts(m_dds.fmt.masks[i], m_BitCounts[i],
+                            m_RightShifts[i]);
         }
     }
 
@@ -412,25 +492,25 @@ DDSInput::open(const std::string& name, ImageSpec& newspec)
 
 
 inline void
-DDSInput::calc_shifts(int mask, int& left, int& right)
+DDSInput::calc_shifts(uint32_t mask, uint32_t& count, uint32_t& right)
 {
     if (mask == 0) {
-        left = right = 0;
+        count = right = 0;
         return;
     }
 
-    int i, tmp = mask;
-    for (i = 0; i < 32; i++, tmp >>= 1) {
-        if (tmp & 1)
+    int i;
+    for (i = 0; i < 32; i++, mask >>= 1) {
+        if (mask & 1)
             break;
     }
     right = i;
 
-    for (i = 0; i < 8; i++, tmp >>= 1) {
-        if (!(tmp & 1))
+    for (i = 0; i < 32; i++, mask >>= 1) {
+        if (!(mask & 1))
             break;
     }
-    left = 8 - i;
+    count = i;
 }
 
 
@@ -577,7 +657,8 @@ DDSInput::seek_subimage(int subimage, int miplevel)
     }
 
     if (m_dds.fmt.bpp
-        && (m_dds.fmt.flags & (DDS_PF_RGB | DDS_PF_LUMINANCE | DDS_PF_YUV)))
+        && (m_dds.fmt.flags
+            & (DDS_PF_RGB | DDS_PF_LUMINANCE | DDS_PF_YUV | DDS_PF_ALPHAONLY)))
         m_spec.attribute("oiio:BitsPerSample", m_dds.fmt.bpp);
 
     // linear color space for HDR-ish images
@@ -636,8 +717,6 @@ DDSInput::seek_subimage(int subimage, int miplevel)
     return true;
 }
 
-
-
 bool
 DDSInput::internal_readimg(unsigned char* dst, int w, int h, int d)
 {
@@ -649,7 +728,7 @@ DDSInput::internal_readimg(unsigned char* dst, int w, int h, int d)
         if (!ioread(&tmp[0], tmp.size(), 1))
             return false;
         // decompress image
-        DecompressImage(dst, w, h, &tmp[0], m_compression);
+        DecompressImage(dst, w, h, &tmp[0], m_compression, m_dds.fmt);
         tmp.clear();
         // correct pre-multiplied alpha, if necessary
         if (m_compression == Compression::DXT2
@@ -685,15 +764,12 @@ DDSInput::internal_readimg(unsigned char* dst, int w, int h, int d)
                     uint32_t pixel = 0;
                     OIIO_DASSERT(tmp.size() >= size_t(x * m_Bpp + m_Bpp));
                     memcpy(&pixel, tmp.data() + x * m_Bpp, m_Bpp);
-                    dst[k + 0] = ((pixel & m_dds.fmt.rmask) >> m_redR)
-                                 << m_redL;
-                    dst[k + 1] = ((pixel & m_dds.fmt.gmask) >> m_greenR)
-                                 << m_greenL;
-                    dst[k + 2] = ((pixel & m_dds.fmt.bmask) >> m_blueR)
-                                 << m_blueL;
-                    if (m_dds.fmt.flags & DDS_PF_ALPHA)
-                        dst[k + 3] = ((pixel & m_dds.fmt.amask) >> m_alphaR)
-                                     << m_alphaL;
+                    for (int ch = 0; ch < m_spec.nchannels; ++ch) {
+                        dst[k + ch]
+                            = bit_range_convert((pixel & m_dds.fmt.masks[ch])
+                                                    >> m_RightShifts[ch],
+                                                m_BitCounts[ch], 8);
+                    }
                 }
             }
         }

diff --git a/src/doc/builtinplugins.rst b/src/doc/builtinplugins.rst
@@ -141,13 +141,22 @@ DDS
 DDS (Direct Draw Surface) is an image file format designed by Microsoft
 for use in Direct3D graphics.  DDS files use the extension :file:`.dds`.
 
-DDS is an awful format, with several compression modes that are all so
-lossy as to be completely useless for high-end graphics.  Nevertheless,
-they are widely used in games and graphics hardware directly supports
-these compression modes.  Alas.
+DDS is primarily meant for images that are directly usable by the GPU.
+It supports 2D, cube and volume images with or without MIPmaps; using
+either uncompressed pixel formats or one of the lossy compression
+schemes supported by the graphics hardware (BC1-BC7).
 
 OpenImageIO currently only supports reading DDS files, not writing them.
 
+DDS files containing a "normal map" (`0x80000000`) pixel format flag
+will be interpreted as a tangent space normal map. When reading such files,
+the resulting image will be a 3-channel image with red & green channels
+coming from file data, and the blue channel computed as if it were the
+Z component of a normal map. This applies to images using DXT5 compression
+(normal X & Y components are assumed to be in alpha & green channels)
+and images using BC5/ATI2 compression (normal X & Y components are in
+red & green channels).
+
 .. list-table::
    :widths: 30 10 65
    :header-rows: 1
@@ -187,7 +196,14 @@ attributes are supported:
    * - ``oiio:ioproxy``
      - ptr
      - Pointer to a ``Filesystem::IOProxy`` that will handle the I/O, for
-       example by reading from memory rather than the file system.
+       example by reading from memory rather than the file system.    
+
+Additionally, an integer ``dds:bc5normal`` global attribute is supported
+to control behaviour of images compressed in BC5/ATI2 compression format.
+When the attribute value is set to non-zero (default is zero), any input
+image using BC5/ATI2 compression format is assumed to be a normal map,
+even if pixel format "normal map" flag is not set.
+
 
 **Configuration settings for DDS output**
 

diff --git a/src/include/OpenImageIO/imageio.h b/src/include/OpenImageIO/imageio.h
@@ -2750,6 +2750,11 @@ OIIO_API std::string geterror(bool clear = true);
 ///    may not read these correctly, but OIIO will. That's why the default
 ///    is not to support it.
 ///
+/// - `int dds:bc5normal`
+///
+///    When nonzero, treats BC5/ATI2 format files as normal maps (loads as
+///    3 channels, computes blue from red and green). Default is 0.
+///
 /// - `int openexr:core`
 ///
 ///    When nonzero, use the new "OpenEXR core C library" when available,