bytedeco · saudet · Nov 10, 2023 · Oct 11, 2023 · Oct 16, 2023 · Oct 16, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,7 +3,7 @@
  * Refactor and improve presets for PyTorch ([pull #1360](https://github.com/bytedeco/javacpp-presets/pull/1360))
  * Include `mkl_lapack.h` header file in presets for MKL ([issue #1388](https://github.com/bytedeco/javacpp-presets/issues/1388))
  * Map new higher-level C++ API of Triton Inference Server ([pull #1361](https://github.com/bytedeco/javacpp-presets/pull/1361))
- * Upgrade presets for OpenCV 4.8.1, DNNL 3.3, OpenBLAS 0.3.24, CPython 3.12.0, NumPy 1.26.1, SciPy 1.11.3, LLVM 17.0.1, Leptonica 1.83.1, Tesseract 5.3.3, CUDA 12.3.0, cuDNN 8.9.5, NCCL 2.18.5, TensorFlow Lite 2.14.0, Triton Inference Server 2.38.0, ONNX 1.14.1, ONNX Runtime 1.16.1, TVM 0.13.0, and their dependencies
+ * Upgrade presets for OpenCV 4.8.1, DNNL 3.3, OpenBLAS 0.3.24, CPython 3.12.0, NumPy 1.26.1, SciPy 1.11.3, LLVM 17.0.1, Leptonica 1.83.1, Tesseract 5.3.3, CUDA 12.3.0, cuDNN 8.9.5, NCCL 2.18.5, PyTorch 2.1.0, TensorFlow Lite 2.14.0, Triton Inference Server 2.38.0, ONNX 1.14.1, ONNX Runtime 1.16.1, TVM 0.13.0, and their dependencies
 
 ### June 6, 2023 version 1.5.9
  * Virtualize `nvinfer1::IGpuAllocator` from TensorRT to allow customization ([pull #1367](https://github.com/bytedeco/javacpp-presets/pull/1367))

diff --git a/README.md b/README.md
@@ -134,7 +134,7 @@ Further, in the case of Android, the JavaCPP Presets also rely on:
 
 Manual Installation
 -------------------
-Simply put all the desired JAR files (`opencv*.jar`, `ffmpeg*.jar`, etc.), in addition to `javacpp.jar`, somewhere in your class path. The JAR files available as pre-built artifacts are meant to be used with [JavaCPP](https://github.com/bytedeco/javacpp). The binaries for Linux were built for CentOS 6 and 7, so they should work on most distributions currently in use. The ones for Android were compiled for ARMv7 processors featuring an FPU, so they will not work on ancient devices such as the HTC Magic or some others with an ARMv6 CPU. Here are some more specific instructions for common cases:
+Simply put all the desired JAR files (`opencv*.jar`, `ffmpeg*.jar`, etc.), in addition to `javacpp.jar`, somewhere in your class path. The JAR files available as pre-built artifacts are meant to be used with [JavaCPP](https://github.com/bytedeco/javacpp). The binaries for Linux are built with Ubuntu, so they should work on most distributions currently in use. The ones for Android were compiled for ARMv7 processors featuring an FPU, so they will not work on ancient devices such as the HTC Magic or some others with an ARMv6 CPU. Here are some more specific instructions for common cases:
 
 NetBeans (Java SE 7 or newer):
 
@@ -222,7 +222,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * NVIDIA Video Codec SDK 12.1.x  https://developer.nvidia.com/nvidia-video-codec-sdk
  * OpenCL 3.0.x  https://github.com/KhronosGroup/OpenCL-ICD-Loader
  * MXNet 1.9.x  https://github.com/apache/incubator-mxnet
- * PyTorch 2.0.x  https://github.com/pytorch/pytorch
+ * PyTorch 2.1.x  https://github.com/pytorch/pytorch
  * SentencePiece 0.1.99  https://github.com/google/sentencepiece
  * TensorFlow 1.15.x  https://github.com/tensorflow/tensorflow
  * TensorFlow Lite 2.14.x  https://github.com/tensorflow/tensorflow

diff --git a/platform/pom.xml b/platform/pom.xml
@@ -292,7 +292,7 @@
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>pytorch-platform</artifactId>
-      <version>2.0.1-${project.version}</version>
+      <version>2.1.0-${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.bytedeco</groupId>

diff --git a/pytorch/README.md b/pytorch/README.md
@@ -9,7 +9,7 @@ Introduction
 ------------
 This directory contains the JavaCPP Presets module for:
 
- * PyTorch 2.0.1  https://pytorch.org/
+ * PyTorch 2.1.0  https://pytorch.org/
 
 Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
 
@@ -48,14 +48,14 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>pytorch-platform</artifactId>
-            <version>2.0.1-1.5.10-SNAPSHOT</version>
+            <version>2.1.0-1.5.10-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies required to use CUDA, cuDNN, and NCCL -->
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>pytorch-platform-gpu</artifactId>
-            <version>2.0.1-1.5.10-SNAPSHOT</version>
+            <version>2.1.0-1.5.10-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled CUDA, cuDNN, and NCCL -->
@@ -109,7 +109,7 @@ public class SimpleMNIST {
         }
 
         // Use one of many "standard library" modules.
-        LinearImpl fc1 = null, fc2 = null, fc3 = null;
+        final LinearImpl fc1, fc2, fc3;
     }
 
     public static void main(String[] args) throws Exception {

diff --git a/pytorch/cppbuild.sh b/pytorch/cppbuild.sh
@@ -27,15 +27,15 @@ if [[ "$EXTENSION" == *gpu ]]; then
     export USE_CUDNN=1
     export USE_FAST_NVCC=0
     export CUDA_SEPARABLE_COMPILATION=OFF
-    export TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0+PTX"
+    export TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;8.0;9.0"
 fi
 
 export PYTHON_BIN_PATH=$(which python3)
 if [[ $PLATFORM == windows* ]]; then
     export PYTHON_BIN_PATH=$(which python.exe)
 fi
 
-PYTORCH_VERSION=2.0.1
+PYTORCH_VERSION=2.1.0
 
 mkdir -p "$PLATFORM$EXTENSION"
 cd "$PLATFORM$EXTENSION"

diff --git a/pytorch/include_list.pl b/pytorch/include_list.pl
@@ -0,0 +1,65 @@
+#!/bin/perl
+
+# Must be run at from javacpp-presets/pytorch after cppbuild.sh has been run 
+# for linux-x86_64-gpu
+
+# Generate the lists of includes to parse, in order, from the output
+# of g++ -H
+# Used to update src/main/resources/org/bytedeco/pytorch/presets/*
+
+use strict;
+use warnings;
+
+my %incs;
+my @inc_per_depth;
+
+sub flush($) {
+    my $min_depth = shift;
+    for (my $d = @inc_per_depth - 1; $d >= $min_depth; $d--) {
+        if ($inc_per_depth[$d]) {
+            foreach my $i (@{$inc_per_depth[$d]}) {
+                print "#include \"$i\"\n";
+                $incs{$i} = 1;
+            }
+            undef $inc_per_depth[$d];
+        }
+    }
+}
+
+sub go {
+    my $path = join ' ', @_;
+
+    my @inc = `g++ -I. -I torch/csrc/api/include/ -H $path -E 2>&1 > /dev/null`;
+    foreach my $i (@inc) {
+        chomp $i;
+        my ($depth, $f) = $i =~ /^(\.+)\s(.*\.h)$/;
+        next unless $depth;
+        $depth = length($depth);
+        $f =~ s#^\./##;
+        next if $f =~ m#^/
+  |^ATen/ops/\w+_native\.h$
+  |^ATen/ops/\w+_meta\.h$
+  |^ATen/ops/\w+_ops\.h$
+  |^ATen/ops/_\w+\.h$#x
+            or $incs{$f};
+        flush($depth);
+        my $incs = $inc_per_depth[$depth];
+        $incs = $inc_per_depth[$depth] = [] unless $incs;
+        push @$incs, $f;
+    }
+    flush(0);
+}
+
+chdir "cppbuild/linux-x86_64-gpu/pytorch/torch/include";
+
+go('torch/csrc/api/include/torch/torch.h', 'torch/script.h');
+
+print <<EOF;
+
+// Included by
+// ATen/cudnn/Descriptors.h
+// ATen/cudnn/Types.h
+// c10/cuda/CUDAGuard.h
+EOF
+
+go('ATen/cudnn/Descriptors.h', 'ATen/cudnn/Types.h', 'c10/cuda/CUDAGuard.h', '-I/opt/cuda/targets/x86_64-linux/include/');
diff --git a/pytorch/platform/gpu/pom.xml b/pytorch/platform/gpu/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>pytorch-platform-gpu</artifactId>
-  <version>2.0.1-${project.parent.version}</version>
+  <version>2.1.0-${project.parent.version}</version>
   <name>JavaCPP Presets Platform GPU for PyTorch</name>
 
   <properties>

diff --git a/pytorch/platform/pom.xml b/pytorch/platform/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>pytorch-platform</artifactId>
-  <version>2.0.1-${project.parent.version}</version>
+  <version>2.1.0-${project.parent.version}</version>
   <name>JavaCPP Presets Platform for PyTorch</name>
 
   <properties>

diff --git a/pytorch/pom.xml b/pytorch/pom.xml
@@ -11,7 +11,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>pytorch</artifactId>
-  <version>2.0.1-${project.parent.version}</version>
+  <version>2.1.0-${project.parent.version}</version>
   <name>JavaCPP Presets for PyTorch</name>
 
   <dependencies>

diff --git a/pytorch/samples/SimpleMNIST.java b/pytorch/samples/SimpleMNIST.java
@@ -20,14 +20,14 @@ static class Net extends Module {
         Tensor forward(Tensor x) {
             // Use one of many tensor manipulation functions.
             x = relu(fc1.forward(x.reshape(x.size(0), 784)));
-            x = dropout(x, /*p=*/0.5, /*train=*/is_training(), false);
+            x = dropout(x, /*p=*/0.5, /*train=*/is_training());
             x = relu(fc2.forward(x));
-            x = log_softmax(fc3.forward(x), new LogSoftmaxFuncOptions(/*dim=*/1));
+            x = log_softmax(fc3.forward(x), /*dim=*/1);
             return x;
         }
 
         // Use one of many "standard library" modules.
-        LinearImpl fc1 = null, fc2 = null, fc3 = null;
+        final LinearImpl fc1, fc2, fc3;
     }
 
     public static void main(String[] args) throws Exception {

diff --git a/pytorch/samples/pom.xml b/pytorch/samples/pom.xml
@@ -12,14 +12,14 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>pytorch-platform</artifactId>
-            <version>2.0.1-1.5.10-SNAPSHOT</version>
+            <version>2.1.0-1.5.10-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies required to use CUDA, cuDNN, and NCCL -->
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>pytorch-platform-gpu</artifactId>
-            <version>2.0.1-1.5.10-SNAPSHOT</version>
+            <version>2.1.0-1.5.10-SNAPSHOT</version>
         </dependency>
 
         <!-- Additional dependencies to use bundled CUDA, cuDNN, and NCCL -->

diff --git a/pytorch/src/gen/java/org/bytedeco/pytorch/ActivityTraceWrapper.java b/pytorch/src/gen/java/org/bytedeco/pytorch/ActivityTraceWrapper.java
diff --git a/pytorch/src/gen/java/org/bytedeco/pytorch/Allocator.java b/pytorch/src/gen/java/org/bytedeco/pytorch/Allocator.java
@@ -52,7 +52,7 @@ public class Allocator extends Pointer {
   // is guaranteed to return a unique_ptr with this deleter attached;
   // it means the rawAllocate and rawDeallocate APIs are safe to use.
   // This function MUST always return the same BoundDeleter.
-  public native @Cast("c10::DeleterFnPtr") PointerConsumer raw_deleter();
+  public native PointerConsumer raw_deleter();
   public native Pointer raw_allocate(@Cast("size_t") long n);
   public native void raw_deallocate(Pointer ptr);
 }
diff --git a/pytorch/src/gen/java/org/bytedeco/pytorch/AnyModule.java b/pytorch/src/gen/java/org/bytedeco/pytorch/AnyModule.java
@@ -227,6 +227,8 @@ public class AnyModule extends Pointer {
   private native void allocate(@SharedPtr @Cast({"", "std::shared_ptr<torch::nn::ReplicationPad1dImpl>"}) ReplicationPad1dImpl module);
   public AnyModule(ConstantPad1dImpl module) { super((Pointer)null); allocate(module); }
   private native void allocate(@SharedPtr @Cast({"", "std::shared_ptr<torch::nn::ConstantPad1dImpl>"}) ConstantPad1dImpl module);
+  public AnyModule(ZeroPad1dImpl module) { super((Pointer)null); allocate(module); }
+  private native void allocate(@SharedPtr @Cast({"", "std::shared_ptr<torch::nn::ZeroPad1dImpl>"}) ZeroPad1dImpl module);
   public AnyModule(AvgPool1dImpl module) { super((Pointer)null); allocate(module); }
   private native void allocate(@SharedPtr @Cast({"", "std::shared_ptr<torch::nn::AvgPool1dImpl>"}) AvgPool1dImpl module);
   public AnyModule(MaxPool1dImpl module) { super((Pointer)null); allocate(module); }
@@ -267,6 +269,8 @@ public class AnyModule extends Pointer {
   private native void allocate(@SharedPtr @Cast({"", "std::shared_ptr<torch::nn::ReplicationPad3dImpl>"}) ReplicationPad3dImpl module);
   public AnyModule(ConstantPad3dImpl module) { super((Pointer)null); allocate(module); }
   private native void allocate(@SharedPtr @Cast({"", "std::shared_ptr<torch::nn::ConstantPad3dImpl>"}) ConstantPad3dImpl module);
+  public AnyModule(ZeroPad3dImpl module) { super((Pointer)null); allocate(module); }
+  private native void allocate(@SharedPtr @Cast({"", "std::shared_ptr<torch::nn::ZeroPad3dImpl>"}) ZeroPad3dImpl module);
   public AnyModule(AvgPool3dImpl module) { super((Pointer)null); allocate(module); }
   private native void allocate(@SharedPtr @Cast({"", "std::shared_ptr<torch::nn::AvgPool3dImpl>"}) AvgPool3dImpl module);
   public AnyModule(MaxPool3dImpl module) { super((Pointer)null); allocate(module); }

diff --git a/pytorch/src/gen/java/org/bytedeco/pytorch/ArgumentDef.java b/pytorch/src/gen/java/org/bytedeco/pytorch/ArgumentDef.java
@@ -38,18 +38,10 @@ public class ArgumentDef extends Pointer {
         return new ArgumentDef((Pointer)this).offsetAddress(i);
     }
 
-  public static class GetTypeFn extends FunctionPointer {
-      static { Loader.load(); }
-      /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-      public    GetTypeFn(Pointer p) { super(p); }
-      protected GetTypeFn() { allocate(); }
-      private native void allocate();
-      public native @ByVal Type.TypePtr call();
-  }
-  public native GetTypeFn getTypeFn(); public native ArgumentDef getTypeFn(GetTypeFn setter);
-  public native GetTypeFn getFakeTypeFn(); public native ArgumentDef getFakeTypeFn(GetTypeFn setter);
+  public native TypeSupplier getTypeFn(); public native ArgumentDef getTypeFn(TypeSupplier setter);
+  public native TypeSupplier getFakeTypeFn(); public native ArgumentDef getFakeTypeFn(TypeSupplier setter);
   public ArgumentDef() { super((Pointer)null); allocate(); }
   private native void allocate();
-  public ArgumentDef(GetTypeFn getTypeFn, GetTypeFn getFakeTypeFn) { super((Pointer)null); allocate(getTypeFn, getFakeTypeFn); }
-  private native void allocate(GetTypeFn getTypeFn, GetTypeFn getFakeTypeFn);
+  public ArgumentDef(TypeSupplier getTypeFn, TypeSupplier getFakeTypeFn) { super((Pointer)null); allocate(getTypeFn, getFakeTypeFn); }
+  private native void allocate(TypeSupplier getTypeFn, TypeSupplier getFakeTypeFn);
 }
diff --git a/pytorch/src/gen/java/org/bytedeco/pytorch/AutogradMeta.java b/pytorch/src/gen/java/org/bytedeco/pytorch/AutogradMeta.java
@@ -71,6 +71,12 @@ public class AutogradMeta extends AutogradMetaInterface {
 
 
 
+  // The post_acc_grad_hooks_ field stores only Python hooks
+  // (PyFunctionTensorPostAccGradHooks) that are called after the
+  // .grad field has been accumulated into. This is less complicated
+  // than the hooks_ field, which encapsulates a lot more.
+  public native @UniquePtr @Cast({"", "", "std::unique_ptr<torch::autograd::PostAccumulateGradHook>&&"}) PostAccumulateGradHook post_acc_grad_hooks_(); public native AutogradMeta post_acc_grad_hooks_(PostAccumulateGradHook setter);
+
   // Only meaningful on leaf variables (must be false otherwise)
   public native @Cast("bool") boolean requires_grad_(); public native AutogradMeta requires_grad_(boolean setter);
 

diff --git a/pytorch/src/gen/java/org/bytedeco/pytorch/AutogradState.java b/pytorch/src/gen/java/org/bytedeco/pytorch/AutogradState.java
@@ -46,7 +46,7 @@ private native void allocate(
 
   public native void set_inference_mode(@Cast("bool") boolean enabled);
 
-  public native void set_multithreading_enabled(@Cast("bool") boolean mulithreading_enabled);
+  public native void set_multithreading_enabled(@Cast("bool") boolean multithreading_enabled);
 
   public native void set_view_replay_enabled(@Cast("bool") boolean view_replay_enabled);
 

diff --git a/pytorch/src/gen/java/org/bytedeco/pytorch/BackendMeta.java b/pytorch/src/gen/java/org/bytedeco/pytorch/BackendMeta.java
@@ -0,0 +1,49 @@
+// Targeted by JavaCPP version 1.5.10-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.pytorch;
+
+import org.bytedeco.pytorch.Allocator;
+import org.bytedeco.pytorch.Function;
+import org.bytedeco.pytorch.functions.*;
+import org.bytedeco.pytorch.Module;
+import org.bytedeco.javacpp.annotation.Cast;
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import static org.bytedeco.openblas.global.openblas_nolapack.*;
+import static org.bytedeco.openblas.global.openblas.*;
+
+import static org.bytedeco.pytorch.global.torch.*;
+
+
+// For ease of copy pasting
+// #if 0
+// #endif
+
+/**
+ * This structure is intended to hold additional metadata of the specific device
+ * backend.
+ **/
+@Namespace("c10") @Properties(inherit = org.bytedeco.pytorch.presets.torch.class)
+public class BackendMeta extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public BackendMeta() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public BackendMeta(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public BackendMeta(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public BackendMeta position(long position) {
+        return (BackendMeta)super.position(position);
+    }
+    @Override public BackendMeta getPointer(long i) {
+        return new BackendMeta((Pointer)this).offsetAddress(i);
+    }
+
+  public native @ByVal BackendMetaRef clone(
+        @Const @ByRef BackendMetaRef ptr);
+}