Skip to content

Commit

Permalink
proxy: bump parquet (#7782)
Browse files Browse the repository at this point in the history
## Summary of changes

Updates the parquet lib. one change left that we need is in an open PR
against upstream, hopefully we can remove the git dependency by 52.0.0
apache/arrow-rs#5773

I'm not sure why the parquet files got a little bit bigger. I tested
them and they still open fine. 🤷

side effect of the update, chrono updated and added yet another
deprecation warning (hence why the safekeepers change)
  • Loading branch information
conradludgate committed May 19, 2024
1 parent 5caee4c commit a5ecca9
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 49 deletions.
29 changes: 21 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ opentelemetry = "0.20.0"
opentelemetry-otlp = { version = "0.13.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
opentelemetry-semantic-conventions = "0.12.0"
parking_lot = "0.12"
parquet = { version = "49.0.0", default-features = false, features = ["zstd"] }
parquet_derive = "49.0.0"
parquet = { version = "51.0.0", default-features = false, features = ["zstd"] }
parquet_derive = "51.0.0"
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
pin-project-lite = "0.2"
procfs = "0.14"
Expand Down Expand Up @@ -244,8 +244,8 @@ tonic-build = "0.9"
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }

# bug fixes for UUID
parquet = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
parquet_derive = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" }
parquet_derive = { git = "https://github.com/apache/arrow-rs", branch = "master" }

################# Binary contents sections

Expand Down
73 changes: 39 additions & 34 deletions proxy/src/context/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ where
}

async fn upload_parquet(
w: SerializedFileWriter<Writer<BytesMut>>,
mut w: SerializedFileWriter<Writer<BytesMut>>,
len: i64,
storage: &GenericRemoteStorage,
) -> anyhow::Result<Writer<BytesMut>> {
Expand All @@ -319,11 +319,15 @@ async fn upload_parquet(

// I don't know how compute intensive this is, although it probably isn't much... better be safe than sorry.
// finish method only available on the fork: https://github.com/apache/arrow-rs/issues/5253
let (writer, metadata) = tokio::task::spawn_blocking(move || w.finish())
let (mut buffer, metadata) =
tokio::task::spawn_blocking(move || -> parquet::errors::Result<_> {
let metadata = w.finish()?;
let buffer = std::mem::take(w.inner_mut().get_mut());
Ok((buffer, metadata))
})
.await
.unwrap()?;

let mut buffer = writer.into_inner();
let data = buffer.split().freeze();

let compression = len as f64 / len_uncompressed as f64;
Expand Down Expand Up @@ -474,10 +478,11 @@ mod tests {
RequestData {
session_id: uuid::Builder::from_random_bytes(rng.gen()).into_uuid(),
peer_addr: Ipv4Addr::from(rng.gen::<[u8; 4]>()).to_string(),
timestamp: chrono::NaiveDateTime::from_timestamp_millis(
timestamp: chrono::DateTime::from_timestamp_millis(
rng.gen_range(1703862754..1803862754),
)
.unwrap(),
.unwrap()
.naive_utc(),
application_name: Some("test".to_owned()),
username: Some(hex::encode(rng.gen::<[u8; 4]>())),
endpoint_id: Some(hex::encode(rng.gen::<[u8; 16]>())),
Expand Down Expand Up @@ -560,15 +565,15 @@ mod tests {
assert_eq!(
file_stats,
[
(1315008, 3, 6000),
(1315001, 3, 6000),
(1315061, 3, 6000),
(1315018, 3, 6000),
(1315148, 3, 6000),
(1314990, 3, 6000),
(1314782, 3, 6000),
(1315018, 3, 6000),
(438575, 1, 2000)
(1315314, 3, 6000),
(1315307, 3, 6000),
(1315367, 3, 6000),
(1315324, 3, 6000),
(1315454, 3, 6000),
(1315296, 3, 6000),
(1315088, 3, 6000),
(1315324, 3, 6000),
(438713, 1, 2000)
]
);

Expand Down Expand Up @@ -598,11 +603,11 @@ mod tests {
assert_eq!(
file_stats,
[
(1221738, 5, 10000),
(1227888, 5, 10000),
(1229682, 5, 10000),
(1229044, 5, 10000),
(1220322, 5, 10000)
(1222212, 5, 10000),
(1228362, 5, 10000),
(1230156, 5, 10000),
(1229518, 5, 10000),
(1220796, 5, 10000)
]
);

Expand Down Expand Up @@ -634,11 +639,11 @@ mod tests {
assert_eq!(
file_stats,
[
(1207385, 5, 10000),
(1207116, 5, 10000),
(1207409, 5, 10000),
(1207397, 5, 10000),
(1207652, 5, 10000)
(1207859, 5, 10000),
(1207590, 5, 10000),
(1207883, 5, 10000),
(1207871, 5, 10000),
(1208126, 5, 10000)
]
);

Expand All @@ -663,15 +668,15 @@ mod tests {
assert_eq!(
file_stats,
[
(1315008, 3, 6000),
(1315001, 3, 6000),
(1315061, 3, 6000),
(1315018, 3, 6000),
(1315148, 3, 6000),
(1314990, 3, 6000),
(1314782, 3, 6000),
(1315018, 3, 6000),
(438575, 1, 2000)
(1315314, 3, 6000),
(1315307, 3, 6000),
(1315367, 3, 6000),
(1315324, 3, 6000),
(1315454, 3, 6000),
(1315296, 3, 6000),
(1315088, 3, 6000),
(1315324, 3, 6000),
(438713, 1, 2000)
]
);

Expand Down Expand Up @@ -708,7 +713,7 @@ mod tests {
// files are smaller than the size threshold, but they took too long to fill so were flushed early
assert_eq!(
file_stats,
[(659240, 2, 3001), (658954, 2, 3000), (658750, 2, 2999)]
[(659462, 2, 3001), (659176, 2, 3000), (658972, 2, 2999)]
);

tmpdir.close().unwrap();
Expand Down
2 changes: 1 addition & 1 deletion safekeeper/src/broker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ async fn task_stats(stats: Arc<BrokerStats>) {

let now = BrokerStats::now_millis();
if now > last_pulled && now - last_pulled > warn_duration.as_millis() as u64 {
let ts = chrono::NaiveDateTime::from_timestamp_millis(last_pulled as i64).expect("invalid timestamp");
let ts = chrono::DateTime::from_timestamp_millis(last_pulled as i64).expect("invalid timestamp");
info!("no broker updates for some time, last update: {:?}", ts);
}
}
Expand Down
4 changes: 2 additions & 2 deletions workspace_hack/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ num-bigint = { version = "0.4" }
num-integer = { version = "0.1", features = ["i128"] }
num-traits = { version = "0.2", features = ["i128", "libm"] }
once_cell = { version = "1" }
parquet = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs", default-features = false, features = ["zstd"] }
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master", default-features = false, features = ["zstd"] }
prost = { version = "0.11" }
rand = { version = "0.8", features = ["small_rng"] }
regex = { version = "1" }
Expand Down Expand Up @@ -102,7 +102,7 @@ num-bigint = { version = "0.4" }
num-integer = { version = "0.1", features = ["i128"] }
num-traits = { version = "0.2", features = ["i128", "libm"] }
once_cell = { version = "1" }
parquet = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs", default-features = false, features = ["zstd"] }
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master", default-features = false, features = ["zstd"] }
prost = { version = "0.11" }
regex = { version = "1" }
regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }
Expand Down

1 comment on commit a5ecca9

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3166 tests run: 3025 passed, 3 failed, 138 skipped (full report)


Failures on Postgres 14

  • test_sharding_autosplit[github-actions-selfhosted]: release
  • test_storage_controller_many_tenants[github-actions-selfhosted]: release
  • test_basebackup_with_high_slru_count[github-actions-selfhosted-vectored-10-13-30]: release
# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_sharding_autosplit[release-pg14-github-actions-selfhosted] or test_storage_controller_many_tenants[release-pg14-github-actions-selfhosted] or test_basebackup_with_high_slru_count[release-pg14-github-actions-selfhosted-vectored-10-13-30]"
Flaky tests (1)

Postgres 14

  • test_timeline_deletion_with_files_stuck_in_upload_queue: debug

Code coverage* (full report)

  • functions: 31.3% (6383 of 20401 functions)
  • lines: 47.7% (48646 of 101902 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
a5ecca9 at 2024-05-19T21:03:58.496Z :recycle:

Please sign in to comment.