feat: download cuda toolkit

Signed-off-by: James <namnh0122@gmail.com>
janhq · Aug 29, 2024 · a0ec614 · a0ec614
1 parent 8fdff72
commit a0ec614
Show file tree

Hide file tree

Showing 8 changed files with 291 additions and 54 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 # cortex-js
+.DS_Store
 cortex-js/cortex.db
 dist
 *.lock
@@ -18,4 +19,4 @@ cortex-js/package-lock.json
 .vscode
 cortex-js/command
 cortex-js/src/infrastructure/commanders/test/test_data
-**/vcpkg_installed
+**/vcpkg_installed
diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc
@@ -7,6 +7,7 @@
 #include "utils/archive_utils.h"   
 #include "utils/system_info_utils.h"
 // clang-format on
+#include "utils/cuda_toolkit_utils.h"
 #include "utils/engine_matcher_utils.h"
 
 namespace commands {
@@ -103,9 +104,10 @@ bool EngineInitCmd::Exec() const {
                                                  .path = path,
                                              }}};
 
-            DownloadService().AddDownloadTask(downloadTask, [](const std::string&
-                                                                   absolute_path,
-                                                               bool unused) {
+            DownloadService download_service;
+            download_service.AddDownloadTask(downloadTask, [](const std::string&
+                                                                  absolute_path,
+                                                              bool unused) {
               // try to unzip the downloaded file
               std::filesystem::path downloadedEnginePath{absolute_path};
               LOG_INFO << "Downloaded engine path: "
@@ -125,6 +127,58 @@ bool EngineInitCmd::Exec() const {
               }
               LOG_INFO << "Finished!";
             });
+            if (system_info.os == "mac" || engineName_ == "cortex.onnx") {
+              return;
+            }
+            // download cuda toolkit
+            const std::string jan_host = "https://catalog.jan.ai";
+            const std::string cuda_toolkit_file_name = "cuda.tar.gz";
+            const std::string download_id = "cuda";
+
+            auto gpu_driver_version = system_info_utils::GetDriverVersion();
+
+            auto cuda_runtime_version =
+                cuda_toolkit_utils::GetCompatibleCudaToolkitVersion(
+                    gpu_driver_version, system_info.os, engineName_);
+
+            std::ostringstream cuda_toolkit_path;
+            cuda_toolkit_path << "dist/cuda-dependencies/" << 11.7 << "/"
+                              << system_info.os << "/"
+                              << cuda_toolkit_file_name;
+
+            LOG_DEBUG << "Cuda toolkit download url: " << jan_host
+                      << cuda_toolkit_path.str();
+
+            auto downloadCudaToolkitTask = DownloadTask{
+                .id = download_id,
+                .type = DownloadType::CudaToolkit,
+                .error = std::nullopt,
+                .items = {DownloadItem{
+                    .id = download_id,
+                    .host = jan_host,
+                    .fileName = cuda_toolkit_file_name,
+                    .type = DownloadType::CudaToolkit,
+                    .path = cuda_toolkit_path.str(),
+                }},
+            };
+
+            download_service.AddDownloadTask(
+                downloadCudaToolkitTask,
+                [](const std::string& absolute_path, bool unused) {
+                  LOG_DEBUG << "Downloaded cuda path: " << absolute_path;
+                  // try to unzip the downloaded file
+                  std::filesystem::path downloaded_path{absolute_path};
+
+                  archive_utils::ExtractArchive(
+                      absolute_path,
+                      downloaded_path.parent_path().parent_path().string());
+
+                  try {
+                    std::filesystem::remove(absolute_path);
+                  } catch (std::exception& e) {
+                    LOG_ERROR << "Error removing downloaded file: " << e.what();
+                  }
+                });
 
             return true;
           }

diff --git a/engine/main.cc b/engine/main.cc
@@ -22,53 +22,53 @@
 #error "Unsupported platform!"
 #endif
 
-void RunServer(){
+void RunServer() {
   // Create logs/ folder and setup log to file
-      std::filesystem::create_directory(cortex_utils::logs_folder);
-      trantor::AsyncFileLogger asyncFileLogger;
-      asyncFileLogger.setFileName(cortex_utils::logs_base_name);
-      asyncFileLogger.startLogging();
-      trantor::Logger::setOutputFunction(
-          [&](const char* msg, const uint64_t len) {
-            asyncFileLogger.output(msg, len);
-          },
-          [&]() { asyncFileLogger.flush(); });
-      asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
-      // Number of cortex.cpp threads
-      // if (argc > 1) {
-      //   thread_num = std::atoi(argv[1]);
-      // }
+  std::filesystem::create_directory(cortex_utils::logs_folder);
+  trantor::AsyncFileLogger asyncFileLogger;
+  asyncFileLogger.setFileName(cortex_utils::logs_base_name);
+  asyncFileLogger.startLogging();
+  trantor::Logger::setOutputFunction(
+      [&](const char* msg, const uint64_t len) {
+        asyncFileLogger.output(msg, len);
+      },
+      [&]() { asyncFileLogger.flush(); });
+  asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
+  // Number of cortex.cpp threads
+  // if (argc > 1) {
+  //   thread_num = std::atoi(argv[1]);
+  // }
 
-      // // Check for host argument
-      // if (argc > 2) {
-      //   host = argv[2];
-      // }
+  // // Check for host argument
+  // if (argc > 2) {
+  //   host = argv[2];
+  // }
 
-      // // Check for port argument
-      // if (argc > 3) {
-      //   port = std::atoi(argv[3]);  // Convert string argument to int
-      // }
-      int thread_num = 1;
-      std::string host = "127.0.0.1";
-      int port = 3928;
+  // // Check for port argument
+  // if (argc > 3) {
+  //   port = std::atoi(argv[3]);  // Convert string argument to int
+  // }
+  int thread_num = 1;
+  std::string host = "127.0.0.1";
+  int port = 3928;
 
-      int logical_cores = std::thread::hardware_concurrency();
-      int drogon_thread_num = std::max(thread_num, logical_cores);
-      // cortex_utils::nitro_logo();
+  int logical_cores = std::thread::hardware_concurrency();
+  int drogon_thread_num = std::max(thread_num, logical_cores);
+  // cortex_utils::nitro_logo();
 #ifdef CORTEX_CPP_VERSION
-      LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
+  LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
 #else
-      LOG_INFO << "cortex.cpp version: undefined";
+  LOG_INFO << "cortex.cpp version: undefined";
 #endif
 
-      LOG_INFO << "Server started, listening at: " << host << ":" << port;
-      LOG_INFO << "Please load your model";
-      drogon::app().addListener(host, port);
-      drogon::app().setThreadNum(drogon_thread_num);
-      LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
+  LOG_INFO << "Server started, listening at: " << host << ":" << port;
+  LOG_INFO << "Please load your model";
+  drogon::app().addListener(host, port);
+  drogon::app().setThreadNum(drogon_thread_num);
+  LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
 
-      drogon::app().run();
-      // return 0;
+  drogon::app().run();
+  // return 0;
 }
 
 void ForkProcess() {
@@ -80,19 +80,21 @@ void ForkProcess() {
   ZeroMemory(&si, sizeof(si));
   si.cb = sizeof(si);
   ZeroMemory(&pi, sizeof(pi));
-  std::string cmds = cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
+  std::string cmds =
+      cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
   // Create child process
   if (!CreateProcess(
           NULL,  // No module name (use command line)
-          const_cast<char*>(cmds.c_str()),  // Command line (replace with your actual executable)
-          NULL,                   // Process handle not inheritable
-          NULL,                   // Thread handle not inheritable
-          FALSE,                  // Set handle inheritance to FALSE
-          0,                      // No creation flags
-          NULL,                   // Use parent's environment block
-          NULL,                   // Use parent's starting directory
-          &si,                    // Pointer to STARTUPINFO structure
-          &pi))                   // Pointer to PROCESS_INFORMATION structure
+          const_cast<char*>(
+              cmds.c_str()),  // Command line (replace with your actual executable)
+          NULL,               // Process handle not inheritable
+          NULL,               // Thread handle not inheritable
+          FALSE,              // Set handle inheritance to FALSE
+          0,                  // No creation flags
+          NULL,               // Use parent's environment block
+          NULL,               // Use parent's starting directory
+          &si,                // Pointer to STARTUPINFO structure
+          &pi))               // Pointer to PROCESS_INFORMATION structure
   {
     std::cout << "Could not start server: " << GetLastError() << std::endl;
   } else {

diff --git a/engine/services/download_service.h b/engine/services/download_service.h
@@ -4,7 +4,7 @@
 #include <optional>
 #include <vector>
 
-enum class DownloadType { Model, Engine, Miscellaneous };
+enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit };
 
 enum class DownloadStatus {
   Pending,

diff --git a/engine/utils/cuda_toolkit_utils.h b/engine/utils/cuda_toolkit_utils.h
@@ -0,0 +1,63 @@
+#include <string>
+#include "utils/semantic_version_utils.h"
+
+namespace cuda_toolkit_utils {
+// those semantic versions are based on: https://docs.nvidia.com/deeplearning/cudnn/latest/reference/support-matrix.html#f1
+inline std::string GetCompatibleCudaToolkitVersion(
+    const std::string& driver_semantic_version, const std::string& os,
+    const std::string& engine) {
+
+  if (engine == "cortex.tensorrt-llm") {
+    // if the engine is cortex.tensorrt-llm, the minimum required CUDA version is 12.4
+    if (os == "windows") {
+      if (semantic_version_utils::CompareSemanticVersion(
+              driver_semantic_version, "527.41") >= 0) {
+        return "12.4";
+      } else {
+        throw std::runtime_error(
+            "GPU driver version not supported. Minimum "
+            "required driver version is 527.41");
+      }
+    } else if (os == "linux") {
+      if (semantic_version_utils::CompareSemanticVersion(
+              driver_semantic_version, "525.60.13") >= 0) {
+        return "12.4";
+      } else {
+        throw std::runtime_error(
+            "GPU driver version not supported. Minimum required driver version "
+            "is 525.60.13");
+      }
+    } else {
+      throw std::runtime_error("Unsupported OS");
+    }
+  }
+
+  if (os == "windows") {
+    if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version,
+                                                       "527.41") >= 0) {
+      return "12.4";
+    } else if (semantic_version_utils::CompareSemanticVersion(
+                   driver_semantic_version, "452.39") >= 0) {
+      return "11.7";
+    } else {
+      throw std::runtime_error(
+          "GPU driver version not supported. Minimum "
+          "required driver version is 452.39");
+    }
+  } else if (os == "linux") {
+    if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version,
+                                                       "525.60.13") >= 0) {
+      return "12.4";
+    } else if (semantic_version_utils::CompareSemanticVersion(
+                   driver_semantic_version, "450.80.02") >= 0) {
+      return "11.7";
+    } else {
+      throw std::runtime_error(
+          "GPU driver version not supported. Minimum "
+          "required driver version is 450.80.02");
+    }
+  } else {
+    throw std::runtime_error("Unsupported OS");
+  }
+}
+}  // namespace cuda_toolkit_utils
diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h
@@ -4,17 +4,63 @@
 #include <string>
 #include <string_view>
 
+#if defined(__APPLE__) && defined(__MACH__)
+#include <mach-o/dyld.h>
+#elif defined(__linux__)
+#include <unistd.h>
+#elif defined(_WIN32)
+#include <windows.h>
+#endif
+
 namespace file_manager_utils {
 
+inline std::filesystem::path GetExecutableFolderContainerPath() {
+#if defined(__APPLE__) && defined(__MACH__)
+  char buffer[1024];
+  uint32_t size = sizeof(buffer);
+
+  if (_NSGetExecutablePath(buffer, &size) == 0) {
+    LOG_INFO << "Executable path: " << buffer;
+    return std::filesystem::path{buffer}.parent_path();
+  } else {
+    LOG_ERROR << "Failed to get executable path";
+    return std::filesystem::current_path();
+  }
+#elif defined(__linux__)
+  // TODO: haven't tested
+  char buffer[1024];
+  ssize_t len = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1);
+  if (len != -1) {
+    buffer[len] = '\0';
+    LOG_INFO << "Executable path: " << buffer;
+    return std::filesystem::path{buffer}.parent_path();
+  } else {
+    LOG_ERROR << "Failed to get executable path";
+    return std::filesystem::current_path();
+  }
+#elif defined(_WIN32)
+  // TODO: haven't tested
+  char buffer[MAX_PATH];
+  GetModuleFileNameA(NULL, buffer, MAX_PATH);
+  LOG_INFO << "Executable path: " << buffer;
+  return std::filesystem::path{buffer}.parent_path();
+#else
+  LOG_ERROR << "Unsupported platform!";
+  return std::filesystem::current_path();
+#endif
+}
+
 inline std::filesystem::path GetContainerFolderPath(
     const std::string_view type) {
-  const auto current_path{std::filesystem::current_path()};
+  const auto current_path{GetExecutableFolderContainerPath()};
   auto container_folder_path = std::filesystem::path{};
 
   if (type == "Model") {
     container_folder_path = current_path / "models";
   } else if (type == "Engine") {
     container_folder_path = current_path / "engines";
+  } else if (type == "CudaToolkit") {
+    container_folder_path = current_path;
   } else {
     container_folder_path = current_path / "misc";
   }
@@ -35,6 +81,8 @@ inline std::string downloadTypeToString(DownloadType type) {
       return "Engine";
     case DownloadType::Miscellaneous:
       return "Misc";
+    case DownloadType::CudaToolkit:
+      return "CudaToolkit";
     default:
       return "UNKNOWN";
   }