Skip to content

Commit

Permalink
refactoring DecodeBuffer::repeat with a #[cold] annotation improves p…
Browse files Browse the repository at this point in the history
…erformance by a few percent
  • Loading branch information
KillingSpark committed Apr 1, 2024
1 parent 9f48937 commit 28a309a
Showing 1 changed file with 70 additions and 58 deletions.
128 changes: 70 additions & 58 deletions src/decoding/decodebuffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,36 +79,7 @@ impl Decodebuffer {

pub fn repeat(&mut self, offset: usize, match_length: usize) -> Result<(), DecodebufferError> {
if offset > self.buffer.len() {
if self.total_output_counter <= self.window_size as u64 {
// at least part of that repeat is from the dictionary content
let bytes_from_dict = offset - self.buffer.len();

if bytes_from_dict > self.dict_content.len() {
return Err(DecodebufferError::NotEnoughBytesInDictionary {
got: self.dict_content.len(),
need: bytes_from_dict,
});
}

if bytes_from_dict < match_length {
let dict_slice =
&self.dict_content[self.dict_content.len() - bytes_from_dict..];
self.buffer.extend(dict_slice);

self.total_output_counter += bytes_from_dict as u64;
return self.repeat(self.buffer.len(), match_length - bytes_from_dict);
} else {
let low = self.dict_content.len() - bytes_from_dict;
let high = low + match_length;
let dict_slice = &self.dict_content[low..high];
self.buffer.extend(dict_slice);
}
} else {
return Err(DecodebufferError::OffsetTooBig {
offset,
buf_len: self.buffer.len(),
});
}
self.repeat_from_dict(offset, match_length)
} else {
let buf_len = self.buffer.len();
let start_idx = buf_len - offset;
Expand All @@ -117,33 +88,7 @@ impl Decodebuffer {
self.buffer.reserve(match_length);
if end_idx > buf_len {
// We need to copy in chunks.
// We have at max offset bytes in one chunk, the last one can be smaller
let mut start_idx = start_idx;
let mut copied_counter_left = match_length;
// TODO this can be optimized further I think.
// Each time we copy a chunk we have a repetiton of length 'offset', so we can copy offset * iteration many bytes from start_idx
while copied_counter_left > 0 {
let chunksize = usize::min(offset, copied_counter_left);

// SAFETY: Requirements checked:
// 1. start_idx + chunksize must be <= self.buffer.len()
// We know that:
// 1. start_idx starts at buffer.len() - offset
// 2. chunksize <= offset (== offset for each iteration but the last, and match_length modulo offset in the last iteration)
// 3. the buffer grows by offset many bytes each iteration but the last
// 4. start_idx is increased by the same amount as the buffer grows each iteration
//
// Thus follows: start_idx + chunksize == self.buffer.len() in each iteration but the last, where match_length modulo offset == chunksize < offset
// Meaning: start_idx + chunksize <= self.buffer.len()
//
// 2. explicitly reserved enough memory for the whole match_length
unsafe {
self.buffer
.extend_from_within_unchecked(start_idx, chunksize)
};
copied_counter_left -= chunksize;
start_idx += chunksize;
}
self.repeat_in_chunks(offset, match_length, start_idx);
} else {
// can just copy parts of the existing buffer
// SAFETY: Requirements checked:
Expand All @@ -162,9 +107,76 @@ impl Decodebuffer {
}

self.total_output_counter += match_length as u64;
Ok(())
}
}

Ok(())
fn repeat_in_chunks(&mut self, offset: usize, match_length: usize, start_idx: usize) {
// We have at max offset bytes in one chunk, the last one can be smaller
let mut start_idx = start_idx;
let mut copied_counter_left = match_length;
// TODO this can be optimized further I think.
// Each time we copy a chunk we have a repetiton of length 'offset', so we can copy offset * iteration many bytes from start_idx
while copied_counter_left > 0 {
let chunksize = usize::min(offset, copied_counter_left);

// SAFETY: Requirements checked:
// 1. start_idx + chunksize must be <= self.buffer.len()
// We know that:
// 1. start_idx starts at buffer.len() - offset
// 2. chunksize <= offset (== offset for each iteration but the last, and match_length modulo offset in the last iteration)
// 3. the buffer grows by offset many bytes each iteration but the last
// 4. start_idx is increased by the same amount as the buffer grows each iteration
//
// Thus follows: start_idx + chunksize == self.buffer.len() in each iteration but the last, where match_length modulo offset == chunksize < offset
// Meaning: start_idx + chunksize <= self.buffer.len()
//
// 2. explicitly reserved enough memory for the whole match_length
unsafe {
self.buffer
.extend_from_within_unchecked(start_idx, chunksize)
};
copied_counter_left -= chunksize;
start_idx += chunksize;
}
}

#[cold]
fn repeat_from_dict(
&mut self,
offset: usize,
match_length: usize,
) -> Result<(), DecodebufferError> {
if self.total_output_counter <= self.window_size as u64 {
// at least part of that repeat is from the dictionary content
let bytes_from_dict = offset - self.buffer.len();

if bytes_from_dict > self.dict_content.len() {
return Err(DecodebufferError::NotEnoughBytesInDictionary {
got: self.dict_content.len(),
need: bytes_from_dict,
});
}

if bytes_from_dict < match_length {
let dict_slice = &self.dict_content[self.dict_content.len() - bytes_from_dict..];
self.buffer.extend(dict_slice);

self.total_output_counter += bytes_from_dict as u64;
return self.repeat(self.buffer.len(), match_length - bytes_from_dict);
} else {
let low = self.dict_content.len() - bytes_from_dict;
let high = low + match_length;
let dict_slice = &self.dict_content[low..high];
self.buffer.extend(dict_slice);
}
Ok(())
} else {
return Err(DecodebufferError::OffsetTooBig {
offset,
buf_len: self.buffer.len(),
});
}
}

// Check if and how many bytes can currently be drawn from the buffer
Expand Down

0 comments on commit 28a309a

Please sign in to comment.