example: Add torch profiler example (#1026)

* add profiler example Signed-off-by: Jinjing.Zhou <allenzhou@tensorchord.ai> * lint Signed-off-by: Jinjing.Zhou <allenzhou@tensorchord.ai> * fix Signed-off-by: Jinjing.Zhou <allenzhou@tensorchord.ai> * fix markdown lint Signed-off-by: Jinjing.Zhou <allenzhou@tensorchord.ai> Signed-off-by: Jinjing.Zhou <allenzhou@tensorchord.ai>
tensorchord · Oct 18, 2022 · bc6255b · bc6255b
1 parent ded3fce
commit bc6255b
Show file tree

Hide file tree

Showing 7 changed files with 190 additions and 11 deletions.
diff --git a/.github/workflows/link-check.yml b/.github/workflows/link-check.yml
@@ -5,21 +5,22 @@ on:
     branches:
       - main
     paths:
-      - '.github/workflows/**'
-      - '**.md'
+      - ".github/workflows/**"
+      - "**.md"
   pull_request:
     paths:
-      - '.github/workflows/**'
-      - '**.md'
+      - ".github/workflows/**"
+      - "**.md"
 
 jobs:
   markdown-link-check:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
-    - uses: gaurav-nelson/github-action-markdown-link-check@v1
-      with:
-        file-path: 'README.md'
-        folder-path: 'docs'
-        check-modified-files-only: yes
-        base-branch	: main
+      - uses: actions/checkout@v3
+      - uses: gaurav-nelson/github-action-markdown-link-check@v1
+        with:
+          file-path: "README.md"
+          folder-path: "docs"
+          check-modified-files-only: yes
+          base-branch: main
+          config-file: .markdown-lint.json
diff --git a/.markdown-lint.json b/.markdown-lint.json
@@ -0,0 +1,7 @@
+{
+    "ignorePatterns": [
+        {
+            "pattern": "^http://localhost.*"
+        },
+    ]
+}
diff --git a/examples/pytorch-profiler/README.md b/examples/pytorch-profiler/README.md
@@ -0,0 +1,7 @@
+# PyTorch profiler example
+
+This example is adopted from torch's [official tutorial](https://pytorch.org/tutorials/intermediate/tensorboard_profiler_tutorial.html). It shows how to use PyTorch profiler to analyze performance bottlenecks in a model.
+
+## Usage
+
+Run `envd up` at current folder. And execute `python main.py` in the envd container. Then you can see the profiling result through TensorBoard at http://localhost:8888
diff --git a/examples/pytorch-profiler/build.envd b/examples/pytorch-profiler/build.envd
@@ -0,0 +1,16 @@
+envdlib = include("https://github.com/tensorchord/envdlib")
+
+
+def build():
+    base(os="ubuntu20.04", language="python3")
+    shell("zsh")
+    install.cuda(version="11.2.0", cudnn="8")
+    install.python_packages(
+        [
+            "torch",
+            "torchvision",
+            "torch_tb_profiler",
+            "--extra-index-url https://download.pytorch.org/whl/cu113",
+        ]
+    )
+    envdlib.tensorboard(envd_port=8888, envd_dir="/home/envd/log", host_port=8888)
diff --git a/examples/pytorch-profiler/main.py b/examples/pytorch-profiler/main.py
@@ -0,0 +1,66 @@
+# Adopt from https://pytorch.org/tutorials/intermediate/tensorboard_profiler_tutorial.html
+import torch
+import torch.nn
+import torch.optim
+import torch.profiler
+import torch.utils.data
+import torchvision.datasets
+import torchvision.models
+import torchvision.transforms as T
+from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
+
+transform = T.Compose(
+    [T.Resize(224), T.ToTensor(), T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+)
+train_set = torchvision.datasets.CIFAR10(
+    root="./data", train=True, download=True, transform=transform
+)
+train_loader = torch.utils.data.DataLoader(train_set, batch_size=2, shuffle=True)
+
+device = torch.device("cuda:0")
+model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT).to(device)
+criterion = torch.nn.CrossEntropyLoss().cuda(device)
+optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
+model.train()
+
+
+def train(data):
+    inputs, labels = data[0].to(device=device), data[1].to(device=device)
+    outputs = model(inputs)
+    loss = criterion(outputs, labels)
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+
+
+with torch.profiler.profile(
+    schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
+    on_trace_ready=torch.profiler.tensorboard_trace_handler(
+        "/home/envd/log/efficientnet"
+    ),
+    record_shapes=True,
+    profile_memory=True,
+    with_stack=True,
+) as prof:
+    for step, batch_data in enumerate(train_loader):
+        if step >= (1 + 1 + 3) * 2:
+            break
+        train(batch_data)
+        prof.step()  # Need to call this at the end of each step to notify profiler of steps' boundary.
+
+
+prof = torch.profiler.profile(
+    schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
+    on_trace_ready=torch.profiler.tensorboard_trace_handler(
+        "/home/envd/log/efficientnet"
+    ),
+    record_shapes=True,
+    with_stack=True,
+)
+prof.start()
+for step, batch_data in enumerate(train_loader):
+    if step >= (1 + 1 + 3) * 2:
+        break
+    train(batch_data)
+    prof.step()
+prof.stop()
diff --git a/examples/pytorch_profiler/build.envd b/examples/pytorch_profiler/build.envd
@@ -0,0 +1,16 @@
+envdlib = include("https://github.com/tensorchord/envdlib")
+
+
+def build():
+    base(os="ubuntu20.04", language="python3")
+    shell("zsh")
+    install.cuda(version="11.2.0", cudnn="8")
+    install.python_packages(
+        [
+            "torch",
+            "torchvision",
+            "torch_tb_profiler",
+            "--extra-index-url https://download.pytorch.org/whl/cu113",
+        ]
+    )
+    envdlib.tensorboard(8888, envd_dir="/home/envd/log/efficientnet")
diff --git a/examples/pytorch_profiler/main.py b/examples/pytorch_profiler/main.py
@@ -0,0 +1,66 @@
+# Adopt from https://pytorch.org/tutorials/intermediate/tensorboard_profiler_tutorial.html
+import torch
+import torch.nn
+import torch.optim
+import torch.profiler
+import torch.utils.data
+import torchvision.datasets
+import torchvision.models
+import torchvision.transforms as T
+from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
+
+transform = T.Compose(
+    [T.Resize(224), T.ToTensor(), T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+)
+train_set = torchvision.datasets.CIFAR10(
+    root="./data", train=True, download=True, transform=transform
+)
+train_loader = torch.utils.data.DataLoader(train_set, batch_size=2, shuffle=True)
+
+device = torch.device("cuda:0")
+model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT).to(device)
+criterion = torch.nn.CrossEntropyLoss().cuda(device)
+optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
+model.train()
+
+
+def train(data):
+    inputs, labels = data[0].to(device=device), data[1].to(device=device)
+    outputs = model(inputs)
+    loss = criterion(outputs, labels)
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+
+
+with torch.profiler.profile(
+    schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
+    on_trace_ready=torch.profiler.tensorboard_trace_handler(
+        "/home/envd/log/efficientnet"
+    ),
+    record_shapes=True,
+    profile_memory=True,
+    with_stack=True,
+) as prof:
+    for step, batch_data in enumerate(train_loader):
+        if step >= (1 + 1 + 3) * 2:
+            break
+        train(batch_data)
+        prof.step()  # Need to call this at the end of each step to notify profiler of steps' boundary.
+
+
+prof = torch.profiler.profile(
+    schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2),
+    on_trace_ready=torch.profiler.tensorboard_trace_handler(
+        "/home/envd/log/efficientnet"
+    ),
+    record_shapes=True,
+    with_stack=True,
+)
+prof.start()
+for step, batch_data in enumerate(train_loader):
+    if step >= (1 + 1 + 3) * 2:
+        break
+    train(batch_data)
+    prof.step()
+prof.stop()