Skip to content

Commit

Permalink
[NCCL] Upgrade TF NCCL version to 2.21.5
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 646800346
  • Loading branch information
tensorflower-gardener authored and copybara-github committed Jun 26, 2024
1 parent e13f157 commit 9178927
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 8 deletions.
4 changes: 2 additions & 2 deletions third_party/nccl/archive.BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ exports_files(["LICENSE.txt"])

NCCL_MAJOR = 2

NCCL_MINOR = 19
NCCL_MINOR = 21

NCCL_PATCH = 3
NCCL_PATCH = 5

NCCL_VERSION = NCCL_MAJOR * 10000 + NCCL_MINOR * 100 + NCCL_PATCH # e.g., 21605

Expand Down
50 changes: 47 additions & 3 deletions third_party/nccl/archive.patch
Original file line number Diff line number Diff line change
@@ -1,9 +1,31 @@
diff --git a/src/device/all_gather.h b/src/device/all_gather.h
index 809e8ae..57eab81 100644
--- a/src/device/all_gather.h
+++ b/src/device/all_gather.h
@@ -296,7 +296,7 @@ struct RunWorkElement<ncclFuncAllGather, T, RedOp, NCCL_ALGO_COLLNET_DIRECT, NCC
scat.args = args;
scat.chunkSize = chunkSize;
scat.railGridOffset = railGridOffset;
- prims.process</*Recv=*/1, /*Send=*/1>(scat);
+ prims.template process</*Recv=*/1, /*Send=*/1>(scat);
}
}
return;
@@ -314,7 +314,7 @@ struct RunWorkElement<ncclFuncAllGather, T, RedOp, NCCL_ALGO_COLLNET_DIRECT, NCC
scat.args = args;
scat.chunkSize = chunkSize;
scat.railGridOffset = railGridOffset;
- prims.process</*Recv=*/1, /*Send=*/0>(scat);
+ prims.template process</*Recv=*/1, /*Send=*/0>(scat);
}
return;
}
diff --git a/src/device/common.cu b/src/device/common.cu.cc
similarity index 100%
rename from src/device/common.cu
rename to src/device/common.cu.cc
diff --git a/src/device/common.h b/src/device/common.h
index 97581f7..134fdb8 100644
index d8581d3..09ac3b6 100644
--- a/src/device/common.h
+++ b/src/device/common.h
@@ -15,7 +15,7 @@
Expand All @@ -14,9 +36,9 @@ index 97581f7..134fdb8 100644
+extern __device__ ncclDevFuncPtr_t ncclDevFuncTable[];

struct ncclShmemGroup {
ncclConnInfo *recvConns[NCCL_MAX_NVLS_ARITY];
ncclConnInfo *recvConns[NCCL_MAX_ARITY];
diff --git a/src/device/generate.py b/src/device/generate.py
index 0b053de..87bf6cb 100755
index 43de85d..87cd677 100755
--- a/src/device/generate.py
+++ b/src/device/generate.py
@@ -195,7 +195,7 @@ kernel_funcs = sorted(set(best_kernel(*fn) for fn in primary_funcs))
Expand Down Expand Up @@ -111,3 +133,25 @@ diff --git a/src/device/onerank.cu b/src/device/onerank.cu.cc
similarity index 100%
rename from src/device/onerank.cu
rename to src/device/onerank.cu.cc
diff --git a/src/device/reduce_scatter.h b/src/device/reduce_scatter.h
index d0b5249..2dacd60 100644
--- a/src/device/reduce_scatter.h
+++ b/src/device/reduce_scatter.h
@@ -254,7 +254,7 @@ struct RunWorkElement<ncclFuncReduceScatter, T, RedOp, NCCL_ALGO_COLLNET_DIRECT,
scat.args = args;
scat.chunkSize = chunkSize;
scat.railGridOffset = railGridOffset;
- prims.process</*Recv=*/0, /*Send=*/1>(scat);
+ prims.template process</*Recv=*/0, /*Send=*/1>(scat);
}
return;
}
@@ -278,7 +278,7 @@ struct RunWorkElement<ncclFuncReduceScatter, T, RedOp, NCCL_ALGO_COLLNET_DIRECT,
scat.args = args;
scat.chunkSize = chunkSize;
scat.railGridOffset = railGridOffset;
- prims.process</*Recv=*/1, /*Send=*/1>(scat);
+ prims.template process</*Recv=*/1, /*Send=*/1>(scat);
}
}
return;
6 changes: 3 additions & 3 deletions workspace2.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,9 @@ def _tf_repositories():
name = "nccl_archive",
build_file = "//third_party:nccl/archive.BUILD",
patch_file = ["//third_party/nccl:archive.patch"],
sha256 = "1c5474553afedb88e878c772f13d6f90b9226b3f2971dfa6f873adb9443100c2",
strip_prefix = "nccl-2.19.3-1",
urls = tf_mirror_urls("https://github.com/nvidia/nccl/archive/v2.19.3-1.tar.gz"),
sha256 = "1923596984d85e310b5b6c52b2c72a1b93da57218f2bc5a5c7ac3d59297a3303",
strip_prefix = "nccl-2.21.5-1",
urls = tf_mirror_urls("https://github.com/nvidia/nccl/archive/v2.21.5-1.tar.gz"),
)

# Note that we are currently taking NVTX headers from a NCCL release to get nvToolsExtPayload.h
Expand Down

0 comments on commit 9178927

Please sign in to comment.