From e28e2a7e193e40b6f785d821b225ab7dbe61e83c Mon Sep 17 00:00:00 2001
From: Christoph Schranz <christoph.schranz@salzburgresearch.at>
Date: Tue, 19 Dec 2023 16:22:14 +0100
Subject: [PATCH] update libcudnn8 to install TF RTX requirements

---
 .build/Dockerfile                          |   7 ++
 extra/Getting_Started/GPU-processing.ipynb | 138 ++++++++++++---------
 src/Dockerfile.gpulibs                     |   7 ++
 3 files changed, 92 insertions(+), 60 deletions(-)

diff --git a/.build/Dockerfile b/.build/Dockerfile
index 763b013..1d45e7b 100755
--- a/.build/Dockerfile
+++ b/.build/Dockerfile
@@ -380,6 +380,13 @@ LABEL maintainer="Christoph Schranz <christoph.schranz@salzburgresearch.at>, Mat
 # Install Tensorflow, check compatibility here:
 # https://www.tensorflow.org/install/source#gpu
 # installation via conda leads to errors in version 4.8.2
+# Install CUDA-specific nvidia libraries and update libcudnn8 before that
+USER root
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends  --allow-change-held-packages libcudnn8 && \
+    apt-get install -y --no-install-recommends libnvinfer-dev libnvinfer-plugin-dev  && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN cd /usr/lib/x86_64-linux-gnu && ln -s libnvinfer_plugin.so.8 libnvinfer_plugin.so.7 && ln -s libnvinfer.so.8 libnvinfer.so.7
 USER ${NB_UID}
 RUN pip install --upgrade pip && \
     pip install --no-cache-dir tensorflow==2.15.0 keras==2.15.0 && \
diff --git a/extra/Getting_Started/GPU-processing.ipynb b/extra/Getting_Started/GPU-processing.ipynb
index 3aeaa51..fb97168 100755
--- a/extra/Getting_Started/GPU-processing.ipynb
+++ b/extra/Getting_Started/GPU-processing.ipynb
@@ -27,20 +27,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Thu Dec 14 17:16:30 2023       \n",
+      "Tue Dec 19 15:20:27 2023       \n",
       "+-----------------------------------------------------------------------------+\n",
-      "| NVIDIA-SMI 520.61.05    Driver Version: 520.61.05    CUDA Version: 11.8     |\n",
+      "| NVIDIA-SMI 525.54       Driver Version: 526.56       CUDA Version: 12.0     |\n",
       "|-------------------------------+----------------------+----------------------+\n",
       "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
       "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
       "|                               |                      |               MIG M. |\n",
       "|===============================+======================+======================|\n",
-      "|   0  NVIDIA RTX A6000    On   | 00000000:41:00.0 Off |                  Off |\n",
-      "| 30%   49C    P8    27W / 300W |      5MiB / 49140MiB |      0%      Default |\n",
-      "|                               |                      |                  N/A |\n",
-      "+-------------------------------+----------------------+----------------------+\n",
-      "|   1  NVIDIA RTX A6000    On   | 00000000:61:00.0 Off |                  Off |\n",
-      "| 35%   63C    P2    90W / 300W |   9635MiB / 49140MiB |      0%      Default |\n",
+      "|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0 Off |                  N/A |\n",
+      "| N/A   43C    P0    19W /  N/A |      0MiB /  6144MiB |      0%      Default |\n",
       "|                               |                      |                  N/A |\n",
       "+-------------------------------+----------------------+----------------------+\n",
       "                                                                               \n",
@@ -49,6 +45,7 @@
       "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
       "|        ID   ID                                                   Usage      |\n",
       "|=============================================================================|\n",
+      "|  No running processes found                                                 |\n",
       "+-----------------------------------------------------------------------------+\n"
      ]
     }
@@ -94,14 +91,11 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-12-14 17:16:32.448916: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2023-12-14 17:16:32.472734: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
-      "2023-12-14 17:16:32.472758: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
-      "2023-12-14 17:16:32.473445: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
-      "2023-12-14 17:16:32.477355: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2023-12-14 17:16:32.477730: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
-      "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2023-12-14 17:16:33.173733: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
+      "2023-12-19 15:20:31.292167: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+      "2023-12-19 15:20:31.292283: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+      "2023-12-19 15:20:31.345027: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+      "2023-12-19 15:20:31.405681: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
      ]
     },
     {
@@ -115,14 +109,26 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-12-14 17:16:33.915841: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
-      "2023-12-14 17:16:33.916057: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
-      "2023-12-14 17:16:33.916926: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n",
-      "Skipping registering GPU devices...\n",
-      "2023-12-14 17:16:34.083985: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
-      "2023-12-14 17:16:34.084152: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
-      "2023-12-14 17:16:34.084263: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n",
-      "Skipping registering GPU devices...\n"
+      "2023-12-19 15:20:33.437438: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
+      "Your kernel may have been built without NUMA support.\n",
+      "2023-12-19 15:20:33.438375: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
+      "Your kernel may have been built without NUMA support.\n",
+      "2023-12-19 15:20:33.438401: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
+      "Your kernel may have been built without NUMA support.\n",
+      "2023-12-19 15:20:33.439883: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
+      "Your kernel may have been built without NUMA support.\n",
+      "2023-12-19 15:20:33.439914: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
+      "Your kernel may have been built without NUMA support.\n",
+      "2023-12-19 15:20:33.439931: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
+      "Your kernel may have been built without NUMA support.\n",
+      "2023-12-19 15:20:35.742285: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
+      "Your kernel may have been built without NUMA support.\n",
+      "2023-12-19 15:20:35.742336: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
+      "Your kernel may have been built without NUMA support.\n",
+      "2023-12-19 15:20:35.742346: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2022] Could not identify NUMA node of platform GPU id 0, defaulting to 0.  Your kernel may not have been built with NUMA support.\n",
+      "2023-12-19 15:20:35.742371: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
+      "Your kernel may have been built without NUMA support.\n",
+      "2023-12-19 15:20:35.742387: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /device:GPU:0 with 3007 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6\n"
      ]
     },
     {
@@ -133,8 +139,19 @@
        " memory_limit: 268435456\n",
        " locality {\n",
        " }\n",
-       " incarnation: 14747982026689315297\n",
-       " xla_global_id: -1]"
+       " incarnation: 16132906446837489126\n",
+       " xla_global_id: -1,\n",
+       " name: \"/device:GPU:0\"\n",
+       " device_type: \"GPU\"\n",
+       " memory_limit: 3153068032\n",
+       " locality {\n",
+       "   bus_id: 1\n",
+       "   links {\n",
+       "   }\n",
+       " }\n",
+       " incarnation: 13564847057713934038\n",
+       " physical_device_desc: \"device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6\"\n",
+       " xla_global_id: 416903419]"
       ]
      },
      "execution_count": 3,
@@ -157,11 +174,11 @@
     {
      "data": {
       "text/plain": [
-       "tensor([[0.3446, 0.0452, 0.2264],\n",
-       "        [0.7986, 0.7481, 0.9437],\n",
-       "        [0.0514, 0.0179, 0.9945],\n",
-       "        [0.6514, 0.9786, 0.4902],\n",
-       "        [0.9525, 0.8661, 0.2606]])"
+       "tensor([[0.6527, 0.7924, 0.8878],\n",
+       "        [0.7822, 0.0338, 0.4025],\n",
+       "        [0.6130, 0.1177, 0.9255],\n",
+       "        [0.0451, 0.7010, 0.0331],\n",
+       "        [0.1705, 0.8165, 0.7323]])"
       ]
      },
      "execution_count": 4,
@@ -211,7 +228,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "260 ms ± 61.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+      "559 ms ± 87.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
      ]
     }
    ],
@@ -245,7 +262,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "76.7 ms ± 1.28 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+      "200 ms ± 4.12 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
      ]
     }
    ],
@@ -271,16 +288,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "tensor([[0.3524, 0.4564, 0.5821, 0.0973, 0.7754],\n",
-      "        [0.7047, 0.2262, 0.4790, 0.1555, 0.5360],\n",
-      "        [0.0142, 0.1699, 0.9471, 0.2035, 0.9215],\n",
-      "        [0.5230, 0.0497, 0.8534, 0.3936, 0.3059],\n",
-      "        [0.8031, 0.8541, 0.3866, 0.6828, 0.7291]], device='cuda:0')\n",
-      "tensor([[0.3524, 0.4564, 0.5821, 0.0973, 0.7754],\n",
-      "        [0.7047, 0.2262, 0.4790, 0.1555, 0.5360],\n",
-      "        [0.0142, 0.1699, 0.9471, 0.2035, 0.9215],\n",
-      "        [0.5230, 0.0497, 0.8534, 0.3936, 0.3059],\n",
-      "        [0.8031, 0.8541, 0.3866, 0.6828, 0.7291]], dtype=torch.float64)\n"
+      "tensor([[0.4609, 0.7584, 0.4593, 0.0551, 0.1594],\n",
+      "        [0.6063, 0.5960, 0.4197, 0.7962, 0.1542],\n",
+      "        [0.5160, 0.4067, 0.6062, 0.1356, 0.8867],\n",
+      "        [0.3636, 0.7090, 0.3487, 0.0552, 0.4904],\n",
+      "        [0.6309, 0.0065, 0.8926, 0.0643, 0.6346]], device='cuda:0')\n",
+      "tensor([[0.4609, 0.7584, 0.4593, 0.0551, 0.1594],\n",
+      "        [0.6063, 0.5960, 0.4197, 0.7962, 0.1542],\n",
+      "        [0.5160, 0.4067, 0.6062, 0.1356, 0.8867],\n",
+      "        [0.3636, 0.7090, 0.3487, 0.0552, 0.4904],\n",
+      "        [0.6309, 0.0065, 0.8926, 0.0643, 0.6346]], dtype=torch.float64)\n"
      ]
     }
    ],
@@ -304,7 +321,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "3.37 ms ± 23.6 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+      "The slowest run took 5.26 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
+      "90.5 ms ± 74.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
      ]
     }
    ],
@@ -351,11 +369,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "tensor([[0.4466, 0.0260, 0.0687, 0.6375, 0.9676],\n",
-      "        [0.2974, 0.0200, 0.0621, 0.4341, 0.0167],\n",
-      "        [0.1146, 0.3012, 0.9246, 0.1484, 0.8045],\n",
-      "        [0.4448, 0.5577, 0.4649, 0.2364, 0.7051],\n",
-      "        [0.0479, 0.7472, 0.2121, 0.9418, 0.7699]], device='cuda:0')\n"
+      "tensor([[0.2782, 0.4737, 0.6745, 0.9081, 0.3480],\n",
+      "        [0.1753, 0.6475, 0.3926, 0.3947, 0.9197],\n",
+      "        [0.1747, 0.6550, 0.4903, 0.4221, 0.3066],\n",
+      "        [0.8606, 0.6053, 0.0784, 0.1127, 0.2536],\n",
+      "        [0.0236, 0.5264, 0.6400, 0.5198, 0.5281]], device='cuda:0')\n"
      ]
     }
    ],
@@ -386,11 +404,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "tensor([[ 1.2995e-03,  1.6008e-04,  3.7637e-04,  1.3155e-04,  4.5707e-05],\n",
-      "        [ 1.6008e-04,  8.3649e-04,  4.2130e-05,  9.5201e-05,  1.6981e-04],\n",
-      "        [ 3.7637e-04,  4.2130e-05,  1.1736e-03,  3.9943e-04, -2.7599e-04],\n",
-      "        [ 1.3155e-04,  9.5201e-05,  3.9942e-04,  4.7651e-04,  1.6600e-04],\n",
-      "        [ 4.5707e-05,  1.6981e-04, -2.7599e-04,  1.6600e-04,  1.3608e-03]],\n",
+      "tensor([[ 6.5489e-04,  1.8794e-04,  2.2678e-04, -2.8653e-04,  1.9096e-04],\n",
+      "        [ 1.8794e-04,  7.0443e-04,  2.0275e-04, -2.2673e-04,  2.6704e-04],\n",
+      "        [ 2.2678e-04,  2.0275e-04,  6.8227e-04, -1.5024e-05,  3.2128e-04],\n",
+      "        [-2.8653e-04, -2.2673e-04, -1.5024e-05,  1.1865e-03,  1.9364e-04],\n",
+      "        [ 1.9096e-04,  2.6704e-04,  3.2128e-04,  1.9364e-04,  1.0109e-03]],\n",
       "       device='cuda:0')\n"
      ]
     }
@@ -409,11 +427,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "tensor([[ 1.2995e-03,  1.6008e-04,  3.7637e-04,  1.3155e-04,  4.5707e-05],\n",
-      "        [ 1.6008e-04,  8.3649e-04,  4.2130e-05,  9.5201e-05,  1.6981e-04],\n",
-      "        [ 3.7637e-04,  4.2130e-05,  1.1736e-03,  3.9943e-04, -2.7599e-04],\n",
-      "        [ 1.3155e-04,  9.5201e-05,  3.9942e-04,  4.7651e-04,  1.6600e-04],\n",
-      "        [ 4.5707e-05,  1.6981e-04, -2.7599e-04,  1.6600e-04,  1.3608e-03]],\n",
+      "tensor([[ 6.5489e-04,  1.8794e-04,  2.2678e-04, -2.8653e-04,  1.9096e-04],\n",
+      "        [ 1.8794e-04,  7.0443e-04,  2.0275e-04, -2.2673e-04,  2.6704e-04],\n",
+      "        [ 2.2678e-04,  2.0275e-04,  6.8227e-04, -1.5024e-05,  3.2128e-04],\n",
+      "        [-2.8653e-04, -2.2673e-04, -1.5024e-05,  1.1865e-03,  1.9364e-04],\n",
+      "        [ 1.9096e-04,  2.6704e-04,  3.2128e-04,  1.9364e-04,  1.0109e-03]],\n",
       "       dtype=torch.float64)\n"
      ]
     }
@@ -449,7 +467,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.11.7"
   }
  },
  "nbformat": 4,
diff --git a/src/Dockerfile.gpulibs b/src/Dockerfile.gpulibs
index 3c4b18c..a352c4f 100644
--- a/src/Dockerfile.gpulibs
+++ b/src/Dockerfile.gpulibs
@@ -3,6 +3,13 @@ LABEL maintainer="Christoph Schranz <christoph.schranz@salzburgresearch.at>, Mat
 # Install Tensorflow, check compatibility here:
 # https://www.tensorflow.org/install/source#gpu
 # installation via conda leads to errors in version 4.8.2
+# Install CUDA-specific nvidia libraries and update libcudnn8 before that
+USER root
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends  --allow-change-held-packages libcudnn8 && \
+    apt-get install -y --no-install-recommends libnvinfer-dev libnvinfer-plugin-dev  && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN cd /usr/lib/x86_64-linux-gnu && ln -s libnvinfer_plugin.so.8 libnvinfer_plugin.so.7 && ln -s libnvinfer.so.8 libnvinfer.so.7
 USER ${NB_UID}
 RUN pip install --upgrade pip && \
     pip install --no-cache-dir tensorflow==2.15.0 keras==2.15.0 && \