Skip to content

Commit

Permalink
Use Path instead of String for output dir.
Browse files Browse the repository at this point in the history
  • Loading branch information
kskalski committed Aug 18, 2023
1 parent a3371c2 commit c50bfe6
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 41 deletions.
9 changes: 3 additions & 6 deletions crates/cli/src/parse/file_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,9 @@ use crate::args::Args;

pub(crate) fn parse_file_output(args: &Args, source: &Source) -> Result<FileOutput, ParseError> {
// process output directory
let output_dir = std::fs::canonicalize(args.output_dir.clone())
.map_err(|_e| {
ParseError::ParseError("Failed to canonicalize output directory".to_string())
})?
.to_string_lossy()
.into_owned();
let output_dir = std::fs::canonicalize(args.output_dir.clone()).map_err(|_e| {
ParseError::ParseError("Failed to canonicalize output directory".to_string())
})?;
match fs::create_dir_all(&output_dir) {
Ok(_) => {}
Err(e) => return Err(ParseError::ParseError(format!("Error creating directory: {}", e))),
Expand Down
2 changes: 1 addition & 1 deletion crates/cli/src/summaries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ pub(crate) fn print_cryo_summary(
print_bullet("inner request size", source.inner_request_size.to_string());
};
print_bullet("output format", sink.format.as_str());
print_bullet("output dir", &sink.output_dir);
print_bullet("output dir", sink.output_dir.to_string_lossy());
match report_path {
None => print_bullet("report file", "None"),
Some(path) => print_bullet("report file", path),
Expand Down
6 changes: 3 additions & 3 deletions crates/freeze/src/types/chunks/chunk.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::types::{Datatype, FileError, FileOutput};
use std::collections::HashMap;
use std::{collections::HashMap, path::PathBuf};

use super::{binary_chunk::BinaryChunk, chunk_ops::ChunkData, number_chunk::NumberChunk};

Expand Down Expand Up @@ -33,7 +33,7 @@ impl Chunk {
datatype: &Datatype,
file_output: &FileOutput,
chunk_label: &Option<String>,
) -> Result<String, FileError> {
) -> Result<PathBuf, FileError> {
match self {
Chunk::Block(chunk) => chunk.filepath(datatype, file_output, chunk_label),
Chunk::Transaction(chunk) => chunk.filepath(datatype, file_output, chunk_label),
Expand All @@ -47,7 +47,7 @@ impl Chunk {
datatypes: Vec<&Datatype>,
file_output: &FileOutput,
chunk_label: &Option<String>,
) -> Result<HashMap<Datatype, String>, FileError> {
) -> Result<HashMap<Datatype, PathBuf>, FileError> {
let mut paths = HashMap::new();
for datatype in datatypes {
let path = self.filepath(datatype, file_output, chunk_label)?;
Expand Down
7 changes: 2 additions & 5 deletions crates/freeze/src/types/chunks/chunk_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub trait ChunkData: Sized {
datatype: &Datatype,
file_output: &FileOutput,
chunk_label: &Option<String>,
) -> Result<String, FileError> {
) -> Result<std::path::PathBuf, FileError> {
let network_name = file_output.prefix.clone();
let stub = match chunk_label {
Some(chunk_label) => chunk_label.clone(),
Expand All @@ -46,10 +46,7 @@ pub trait ChunkData: Sized {
None => vec![network_name, datatype.dataset().name().to_string(), stub],
};
let filename = format!("{}.{}", pieces.join("__"), file_output.format.as_str());
match file_output.output_dir.as_str() {
"." => Ok(filename),
output_dir => Ok(output_dir.to_string() + "/" + filename.as_str()),
}
Ok(file_output.output_dir.join(filename))
}
}

Expand Down
26 changes: 14 additions & 12 deletions crates/freeze/src/types/dataframes/export.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
use std::collections::HashMap;
use std::{
collections::HashMap,
path::{Path, PathBuf},
};

use polars::prelude::*;

use crate::types::{FileError, FileOutput};

pub(crate) fn dfs_to_files<T>(
dfs: &mut HashMap<T, DataFrame>,
filenames: &HashMap<T, String>,
filenames: &HashMap<T, PathBuf>,
file_output: &FileOutput,
) -> Result<(), FileError>
where
Expand All @@ -28,15 +31,14 @@ where
/// write polars dataframe to file
pub(crate) fn df_to_file(
df: &mut DataFrame,
filename: &str,
filename: &Path,
file_output: &FileOutput,
) -> Result<(), FileError> {
let binding = filename.to_string() + "_tmp";
let tmp_filename = binding.as_str();
let result = match filename {
_ if filename.ends_with(".parquet") => df_to_parquet(df, tmp_filename, file_output),
_ if filename.ends_with(".csv") => df_to_csv(df, tmp_filename),
_ if filename.ends_with(".json") => df_to_json(df, tmp_filename),
let tmp_filename = filename.with_extension("_tmp");
let result = match filename.extension().and_then(|ex| ex.to_str()) {
Some("parquet") => df_to_parquet(df, &tmp_filename, file_output),
Some("csv") => df_to_csv(df, &tmp_filename),
Some("json") => df_to_json(df, &tmp_filename),
_ => return Err(FileError::FileWriteError),
};
match result {
Expand All @@ -48,7 +50,7 @@ pub(crate) fn df_to_file(
/// write polars dataframe to parquet file
fn df_to_parquet(
df: &mut DataFrame,
filename: &str,
filename: &Path,
file_output: &FileOutput,
) -> Result<(), FileError> {
let file = std::fs::File::create(filename).map_err(|_e| FileError::FileWriteError)?;
Expand All @@ -64,7 +66,7 @@ fn df_to_parquet(
}

/// write polars dataframe to csv file
fn df_to_csv(df: &mut DataFrame, filename: &str) -> Result<(), FileError> {
fn df_to_csv(df: &mut DataFrame, filename: &Path) -> Result<(), FileError> {
let file = std::fs::File::create(filename).map_err(|_e| FileError::FileWriteError)?;
let result = CsvWriter::new(file).finish(df);
match result {
Expand All @@ -74,7 +76,7 @@ fn df_to_csv(df: &mut DataFrame, filename: &str) -> Result<(), FileError> {
}

/// write polars dataframe to json file
fn df_to_json(df: &mut DataFrame, filename: &str) -> Result<(), FileError> {
fn df_to_json(df: &mut DataFrame, filename: &Path) -> Result<(), FileError> {
let file = std::fs::File::create(filename).map_err(|_e| FileError::FileWriteError)?;
let result = JsonWriter::new(file).with_json_format(JsonFormat::Json).finish(df);
match result {
Expand Down
2 changes: 1 addition & 1 deletion crates/freeze/src/types/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use polars::prelude::*;
#[derive(Clone)]
pub struct FileOutput {
/// Path of directory where to save files
pub output_dir: String,
pub output_dir: std::path::PathBuf,
/// Prefix of file name
pub prefix: String,
/// Suffix to use at the end of file names
Expand Down
10 changes: 4 additions & 6 deletions crates/freeze/src/types/queries.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::{HashMap, HashSet};
use std::{collections::{HashMap, HashSet}, path::{Path, PathBuf}};

use ethers::prelude::*;

Expand Down Expand Up @@ -34,7 +34,7 @@ pub struct MultiQuery {
impl MultiQuery {
/// get number of chunks that have not yet been collected
pub fn get_n_chunks_remaining(&self, sink: &FileOutput) -> Result<u64, FreezeError> {
let actual_files: HashSet<String> = list_files(&sink.output_dir)
let actual_files: HashSet<PathBuf> = list_files(&sink.output_dir)
.map_err(|_e| {
FreezeError::CollectError(CollectError::CollectError(
"could not list files in output dir".to_string(),
Expand All @@ -53,13 +53,11 @@ impl MultiQuery {
}
}

fn list_files(dir: &str) -> Result<Vec<String>, std::io::Error> {
fn list_files(dir: &Path) -> Result<Vec<PathBuf>, std::io::Error> {
let mut file_list = Vec::new();
for entry in std::fs::read_dir(dir)? {
let entry = entry?;
if let Some(filename) = entry.path().to_str() {
file_list.push(filename.to_string());
}
file_list.push(entry.path());
}
Ok(file_list)
}
Expand Down
12 changes: 6 additions & 6 deletions crates/freeze/src/types/summaries.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::types::Datatype;
use std::collections::HashMap;
use std::{collections::HashMap, path::PathBuf};

/// Summary of freeze operation
#[derive(serde::Serialize, Debug)]
Expand All @@ -11,7 +11,7 @@ pub struct FreezeSummary {
/// number of chunks that encountered an error
pub n_errored: u64,
/// paths
pub paths: HashMap<Datatype, Vec<String>>,
pub paths: HashMap<Datatype, Vec<PathBuf>>,
}

pub(crate) trait FreezeSummaryAgg {
Expand Down Expand Up @@ -49,19 +49,19 @@ pub struct FreezeChunkSummary {
/// whether chunk encountered an error
pub errored: bool,
/// output paths
pub paths: HashMap<Datatype, String>,
pub paths: HashMap<Datatype, PathBuf>,
}

impl FreezeChunkSummary {
pub(crate) fn success(paths: HashMap<Datatype, String>) -> FreezeChunkSummary {
pub(crate) fn success(paths: HashMap<Datatype, PathBuf>) -> FreezeChunkSummary {
FreezeChunkSummary { skipped: false, errored: false, paths }
}

pub(crate) fn error(paths: HashMap<Datatype, String>) -> FreezeChunkSummary {
pub(crate) fn error(paths: HashMap<Datatype, PathBuf>) -> FreezeChunkSummary {
FreezeChunkSummary { skipped: false, errored: true, paths }
}

pub(crate) fn skip(paths: HashMap<Datatype, String>) -> FreezeChunkSummary {
pub(crate) fn skip(paths: HashMap<Datatype, PathBuf>) -> FreezeChunkSummary {
FreezeChunkSummary { skipped: true, errored: false, paths }
}
}
2 changes: 1 addition & 1 deletion crates/python/src/freeze_adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ pub fn _freeze(
let paths = PyDict::new(py);
for (key, values) in &result.paths {
let key = key.dataset().name();
let values: Vec<&str> = values.iter().map(AsRef::as_ref).collect();
let values: Vec<&str> = values.iter().filter_map(|p| p.to_str()).collect();
paths.set_item(key, values).unwrap();
}
let paths = paths.to_object(py);
Expand Down

0 comments on commit c50bfe6

Please sign in to comment.