From 62d4376a530dd501a607808eed9244c3783989d5 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 1 Mar 2024 14:45:36 -0500 Subject: [PATCH] Parse files from Ruby API using fread, not mmap --- ext/prism/extension.c | 2 +- include/prism/util/pm_string.h | 11 ++++ src/util/pm_string.c | 104 ++++++++++++++++++++++++++++++++- 3 files changed, 115 insertions(+), 2 deletions(-) diff --git a/ext/prism/extension.c b/ext/prism/extension.c index c14e5165db3..0ae4a30b26e 100644 --- a/ext/prism/extension.c +++ b/ext/prism/extension.c @@ -254,7 +254,7 @@ file_options(int argc, VALUE *argv, pm_string_t *input, pm_options_t *options) { const char * string_source = (const char *) pm_string_source(&options->filepath); - if (!pm_string_mapped_init(input, string_source)) { + if (!pm_string_file_init(input, string_source)) { pm_options_free(options); #ifdef _WIN32 diff --git a/include/prism/util/pm_string.h b/include/prism/util/pm_string.h index d6442fc608d..a68e2a7c912 100644 --- a/include/prism/util/pm_string.h +++ b/include/prism/util/pm_string.h @@ -109,6 +109,17 @@ void pm_string_constant_init(pm_string_t *string, const char *source, size_t len */ PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath); +/** + * Read the file indicated by the filepath parameter into source and load its + * contents and size into the given `pm_string_t`. The given `pm_string_t` + * should be freed using `pm_string_free` when it is no longer used. + * + * @param string The string to initialize. + * @param filepath The filepath to read. + * @return Whether or not the file was successfully read. + */ +PRISM_EXPORTED_FUNCTION bool pm_string_file_init(pm_string_t *string, const char *filepath); + /** * Returns the memory size associated with the string. * diff --git a/src/util/pm_string.c b/src/util/pm_string.c index fea3f1d2cf3..753429a2336 100644 --- a/src/util/pm_string.c +++ b/src/util/pm_string.c @@ -58,7 +58,7 @@ pm_string_constant_init(pm_string_t *string, const char *source, size_t length) * `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use * `mmap`, and on other POSIX systems we'll use `read`. */ -bool +PRISM_EXPORTED_FUNCTION bool pm_string_mapped_init(pm_string_t *string, const char *filepath) { #ifdef _WIN32 // Open the file for reading. @@ -143,6 +143,108 @@ pm_string_mapped_init(pm_string_t *string, const char *filepath) { #endif } +/** + * Read the file indicated by the filepath parameter into source and load its + * contents and size into the given `pm_string_t`. The given `pm_string_t` + * should be freed using `pm_string_free` when it is no longer used. + */ +PRISM_EXPORTED_FUNCTION bool +pm_string_file_init(pm_string_t *string, const char *filepath) { +#ifdef _WIN32 + // Open the file for reading. + HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + + if (file == INVALID_HANDLE_VALUE) { + return false; + } + + // Get the file size. + DWORD file_size = GetFileSize(file, NULL); + if (file_size == INVALID_FILE_SIZE) { + CloseHandle(file); + return false; + } + + // If the file is empty, then we don't need to do anything else, we'll set + // the source to a constant empty string and return. + if (file_size == 0) { + CloseHandle(file); + const uint8_t source[] = ""; + *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 }; + return true; + } + + // Create a buffer to read the file into. + uint8_t *source = malloc(file_size); + if (source == NULL) { + CloseHandle(file); + return false; + } + + // Read the contents of the file + DWORD bytes_read; + if (!ReadFile(file, source, file_size, &bytes_read, NULL)) { + CloseHandle(file); + return false; + } + + // Check the number of bytes read + if (bytes_read != file_size) { + free(source); + CloseHandle(file); + return false; + } + + CloseHandle(file); + *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = (size_t) file_size }; + return true; +#elif defined(_POSIX_MAPPED_FILES) + FILE *file = fopen(filepath, "rb"); + if (file == NULL) { + return false; + } + + fseek(file, 0, SEEK_END); + long file_size = ftell(file); + + if (file_size == -1) { + fclose(file); + return false; + } + + if (file_size == 0) { + fclose(file); + const uint8_t source[] = ""; + *string = (pm_string_t) { .type = PM_STRING_CONSTANT, .source = source, .length = 0 }; + return true; + } + + size_t length = (size_t) file_size; + uint8_t *source = malloc(length); + if (source == NULL) { + fclose(file); + return false; + } + + fseek(file, 0, SEEK_SET); + size_t bytes_read = fread(source, length, 1, file); + fclose(file); + + if (bytes_read != 1) { + free(source); + return false; + } + + *string = (pm_string_t) { .type = PM_STRING_OWNED, .source = source, .length = length }; + return true; +#else + (void) string; + (void) filepath; + perror("pm_string_file_init is not implemented for this platform"); + return false; +#endif +} + /** * Returns the memory size associated with the string. */