Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump rust to 1.76 and disable sleeping between kernels on Win #210

Merged
merged 4 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
- uses: actions/checkout@v3
with:
submodules: true
- uses: dtolnay/rust-toolchain@1.74.1
- uses: dtolnay/rust-toolchain@1.76
- uses: Swatinem/rust-cache@v2
- run: cargo check --workspace --all-features

Expand Down Expand Up @@ -49,7 +49,7 @@ jobs:
- uses: actions/checkout@v3
with:
submodules: true
- uses: dtolnay/rust-toolchain@1.74.1
- uses: dtolnay/rust-toolchain@1.76
- if: matrix.runner == 'self-hosted-linux-intel'
run: sudo apt-get update && sudo apt-get install -y libpocl2 pocl-opencl-icd ocl-icd-opencl-dev
name: Install dependencies for testing openCL on Linux
Expand Down Expand Up @@ -79,7 +79,7 @@ jobs:
- uses: actions/checkout@v3
with:
submodules: true
- uses: dtolnay/rust-toolchain@1.74.1
- uses: dtolnay/rust-toolchain@1.76
with:
components: rustfmt
- uses: Swatinem/rust-cache@v2
Expand Down Expand Up @@ -117,7 +117,7 @@ jobs:
with:
submodules: true
- name: Install stable
uses: dtolnay/rust-toolchain@1.74.1
uses: dtolnay/rust-toolchain@1.76
with:
components: llvm-tools-preview
- name: cargo install cargo-llvm-cov
Expand Down Expand Up @@ -177,7 +177,7 @@ jobs:
- uses: actions/checkout@v3
with:
submodules: true
- uses: dtolnay/rust-toolchain@1.74.1
- uses: dtolnay/rust-toolchain@1.76
- uses: Swatinem/rust-cache@v2
with:
key: ${{ join( matrix.os, '-' ) }}
Expand Down
47 changes: 36 additions & 11 deletions scrypt-ocl/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@
SpatialDims,
};
use post::initialize::{Initialize, VrfNonce, ENTIRE_LABEL_SIZE, LABEL_SIZE};
use std::{cmp::min, fmt::Display, io::Write, ops::Range};
use std::{
cmp::min,
fmt::Display,
io::Write,
ops::Range,
time::{Duration, Instant},
};
use thiserror::Error;

pub use ocl;
Expand Down Expand Up @@ -267,8 +273,8 @@
let mut best_nonce = None;
let labels_end = labels.end;

let mut total_kernel_duration = std::time::Duration::ZERO;
let mut last_kernel_duration = std::time::Duration::ZERO;
let mut total_kernel_duration = Duration::ZERO;
let mut last_kernel_duration = Duration::ZERO;

for (iter, index) in labels.step_by(self.global_work_size).enumerate() {
self.kernel.set_arg(1, index)?;
Expand All @@ -291,15 +297,32 @@
self.kernel.cmd().enew(&mut kernel_event).enq()?;
}

let read_start = std::time::Instant::now();
let read_start = Instant::now();
// On some platforms (eg. Nvidia), the read command will spin CPU 100% until the kernel finishes.
// Hence we wait a bit before reading the buffer.
// The wait time is based on the average kernel duration, with some margin.
if iter > 0 {
let average = total_kernel_duration.div_f32(iter as f32);
let wait = (last_kernel_duration + average).div_f32(2.0).mul_f32(0.9);
log::trace!("waiting for kernel to finish for {wait:?}");
std::thread::sleep(wait);
// It's weighted 50% of last kernel duration and 50% of average kernel duration
// to speed up convergence to the optimal wait time.
//
// We skip few 'warmup iterations', as the average kernel duration is not yet reliable.
let warmup_iters = 10;
if iter > warmup_iters {
let average = total_kernel_duration.div_f32((iter - warmup_iters) as f32);
log::trace!("last execution time: {last_kernel_duration:?}, average: {average:?})");

Check warning on line 311 in scrypt-ocl/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

scrypt-ocl/src/lib.rs#L310-L311

Added lines #L310 - L311 were not covered by tests

#[cfg(not(target_os = "windows"))]
{
let wait = (last_kernel_duration + average).div_f32(2.0).mul_f32(0.9);
// Don't wait longer than `average - 5ms` to give the scheduler time to switch back to this thread.
let wait = min(
average
.checked_sub(Duration::from_millis(5))
.unwrap_or_default(),
wait,
);
log::trace!("waiting for kernel to finish for {wait:?}");
std::thread::sleep(wait);

Check warning on line 324 in scrypt-ocl/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

scrypt-ocl/src/lib.rs#L315-L324

Added lines #L315 - L324 were not covered by tests
}
}

let labels_buffer =
Expand All @@ -310,8 +333,10 @@
.read(labels_buffer.as_mut())
.enq()?;

last_kernel_duration = read_start.elapsed();
total_kernel_duration += last_kernel_duration;
if iter >= warmup_iters {
last_kernel_duration = read_start.elapsed();
total_kernel_duration += last_kernel_duration;

Check warning on line 338 in scrypt-ocl/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

scrypt-ocl/src/lib.rs#L337-L338

Added lines #L337 - L338 were not covered by tests
}

// Look for VRF nonce if enabled
// TODO: run in background / in parallel to GPU
Expand Down
Loading