From 09c49da081586ba5048229044d77efd2d4a7125d Mon Sep 17 00:00:00 2001 From: Michael Graeb Date: Thu, 12 Oct 2023 18:15:10 +0000 Subject: [PATCH] aws_byte_buf_init_from_special_file() --- include/aws/common/byte_buf.h | 12 ++++ source/file.c | 131 +++++++++++++++++++++++++--------- tests/CMakeLists.txt | 1 + tests/file_test.c | 101 ++++++++++++++++++++++++++ 4 files changed, 210 insertions(+), 35 deletions(-) diff --git a/include/aws/common/byte_buf.h b/include/aws/common/byte_buf.h index 17b0ae59b..dc41fc1c4 100644 --- a/include/aws/common/byte_buf.h +++ b/include/aws/common/byte_buf.h @@ -135,6 +135,18 @@ AWS_COMMON_API int aws_byte_buf_init_copy( AWS_COMMON_API int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename); +/** + * Same as aws_byte_buf_init_from_file(), but for reading "special files" like /proc/cpuinfo. + * This files don't accurately report their size, so size_hint is used as initial buffer size, + * and the buffer grows until the while file is read. + */ +AWS_COMMON_API +int aws_byte_buf_init_from_special_file( + struct aws_byte_buf *out_buf, + struct aws_allocator *alloc, + const char *filename, + size_t size_hint); + /** * Evaluates the set of properties that define the shape of all valid aws_byte_buf structures. * It is also a cheap check, in the sense it run in constant time (i.e., no loops or recursion). diff --git a/source/file.c b/source/file.c index 00723555f..35b90fceb 100644 --- a/source/file.c +++ b/source/file.c @@ -11,6 +11,17 @@ #include +/* For "special files", there's no point querying file size before reading. + * For example, on Amazon Linux 2: + * /proc/cpuinfo: size is 0, but contents are several KB of data. + * /sys/devices/virtual/dmi/id/product_name: size is 4096, but contents are "c5.2xlarge" + * + * Therefore, let users pass a hint for the buffer's initial size, + * and grow the buffer as necessary as we read until EOF. + * This is the min/max step size for growth. */ +#define MIN_BUFFER_GROWTH_READING_SPECIAL_FILES 32 +#define MAX_BUFFER_GROWTH_READING_SPECIAL_FILES 4096 + FILE *aws_fopen(const char *file_path, const char *mode) { if (!file_path || strlen(file_path) == 0) { AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to open file. path is empty"); @@ -34,7 +45,12 @@ FILE *aws_fopen(const char *file_path, const char *mode) { return file; } -int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename) { +static int s_byte_buf_init_from_file( + struct aws_byte_buf *out_buf, + struct aws_allocator *alloc, + const char *filename, + bool use_file_size_as_hint, + size_t size_hint) { AWS_ZERO_STRUCT(*out_buf); FILE *fp = aws_fopen(filename, "rb"); @@ -42,46 +58,78 @@ int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocat goto error; } - int64_t len64 = 0; - if (aws_file_get_length(fp, &len64)) { - AWS_LOGF_ERROR( - AWS_LS_COMMON_IO, - "static: Failed to get file length. file:'%s' error:%s", - filename, - aws_error_name(aws_last_error())); - goto error; - } + if (use_file_size_as_hint) { + int64_t len64 = 0; + if (aws_file_get_length(fp, &len64)) { + AWS_LOGF_ERROR( + AWS_LS_COMMON_IO, + "static: Failed to get file length. file:'%s' error:%s", + filename, + aws_error_name(aws_last_error())); + goto error; + } - if (len64 >= SIZE_MAX) { - aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); - AWS_LOGF_ERROR( - AWS_LS_COMMON_IO, - "static: File too large to read into memory. file:'%s' error:%s", - filename, - aws_error_name(aws_last_error())); - goto error; + if (len64 >= SIZE_MAX) { + aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); + AWS_LOGF_ERROR( + AWS_LS_COMMON_IO, + "static: File too large to read into memory. file:'%s' error:%s", + filename, + aws_error_name(aws_last_error())); + goto error; + } + + /* Leave space for null terminator at end of buffer */ + size_hint = (size_t)len64 + 1; } - size_t allocation_size = (size_t)len64 + 1; - aws_byte_buf_init(out_buf, alloc, allocation_size); + aws_byte_buf_init(out_buf, alloc, size_hint); + + /* Read in a loop until we hit EOF */ + while (true) { + /* Expand buffer if necessary (at a reasonable rate) */ + if (out_buf->len == out_buf->capacity) { + size_t additional_capacity = out_buf->capacity; + additional_capacity = aws_max_size(MIN_BUFFER_GROWTH_READING_SPECIAL_FILES, additional_capacity); + additional_capacity = aws_min_size(MAX_BUFFER_GROWTH_READING_SPECIAL_FILES, additional_capacity); + if (aws_byte_buf_reserve_relative(out_buf, additional_capacity)) { + AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to grow buffer for file:'%s'", filename); + goto error; + } + } - /* Ensure compatibility with null-terminated APIs, but don't consider - * the null terminator part of the length of the payload */ - out_buf->len = out_buf->capacity - 1; - out_buf->buffer[out_buf->len] = 0; + size_t space_available = out_buf->capacity - out_buf->len; + size_t bytes_read = fread(out_buf->buffer + out_buf->len, 1, space_available, fp); + out_buf->len += bytes_read; - size_t read = fread(out_buf->buffer, 1, out_buf->len, fp); - if (read < out_buf->len) { - int errno_value = ferror(fp) ? errno : 0; /* Always cache errno before potential side-effect */ - aws_translate_and_raise_io_error_or(errno_value, AWS_ERROR_FILE_READ_FAILURE); - AWS_LOGF_ERROR( - AWS_LS_COMMON_IO, - "static: Failed reading file:'%s' errno:%d aws-error:%s", - filename, - errno_value, - aws_error_name(aws_last_error())); - goto error; + /* If EOF, we're done! */ + if (feof(fp)) { + break; + } + + /* If no EOF but we read 0 bytes, there's been an error or at least we need + * to treat it like one because we can't just infinitely loop. */ + if (bytes_read == 0) { + int errno_value = ferror(fp) ? errno : 0; /* Always cache errno before potential side-effect */ + aws_translate_and_raise_io_error_or(errno_value, AWS_ERROR_FILE_READ_FAILURE); + AWS_LOGF_ERROR( + AWS_LS_COMMON_IO, + "static: Failed reading file:'%s' errno:%d aws-error:%s", + filename, + errno_value, + aws_error_name(aws_last_error())); + goto error; + } + } + + /* A null terminator is appended, but is not included as part of the length field. */ + if (out_buf->len == out_buf->capacity) { + if (aws_byte_buf_reserve_relative(out_buf, 1)) { + AWS_LOGF_ERROR(AWS_LS_COMMON_IO, "static: Failed to grow buffer for file:'%s'", filename); + goto error; + } } + out_buf->buffer[out_buf->len] = 0; fclose(fp); return AWS_OP_SUCCESS; @@ -94,6 +142,19 @@ int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocat return AWS_OP_ERR; } +int aws_byte_buf_init_from_file(struct aws_byte_buf *out_buf, struct aws_allocator *alloc, const char *filename) { + return s_byte_buf_init_from_file(out_buf, alloc, filename, true /*use_file_size_as_hint*/, 0 /*size_hint*/); +} + +int aws_byte_buf_init_from_special_file( + struct aws_byte_buf *out_buf, + struct aws_allocator *alloc, + const char *filename, + size_t size_hint) { + + return s_byte_buf_init_from_file(out_buf, alloc, filename, false /*use_file_size_as_hint*/, size_hint); +} + bool aws_is_any_directory_separator(char value) { return value == '\\' || value == '/'; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f2b580dfa..185a83532 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -480,6 +480,7 @@ add_test_case(directory_move_src_non_existent_test) add_test_case(test_home_directory_not_null) add_test_case(test_normalize_posix_directory_separator) add_test_case(test_normalize_windows_directory_separator) +add_test_case(test_byte_buf_init_from_file) add_test_case(promise_test_wait_forever) add_test_case(promise_test_wait_for_a_bit) diff --git a/tests/file_test.c b/tests/file_test.c index 6eedd264e..55e7b476a 100644 --- a/tests/file_test.c +++ b/tests/file_test.c @@ -2,6 +2,7 @@ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0. */ +#include #include #include @@ -439,3 +440,103 @@ static int s_test_normalize_windows_directory_separator(struct aws_allocator *al } AWS_TEST_CASE(test_normalize_windows_directory_separator, s_test_normalize_windows_directory_separator); + +static int s_check_byte_buf_from_file(const struct aws_byte_buf *buf, struct aws_byte_cursor expected_contents) { + ASSERT_TRUE(aws_byte_cursor_eq_byte_buf(&expected_contents, buf), "Contents should match"); + ASSERT_TRUE(buf->capacity > buf->len, "Buffer should end with null-terminator"); + ASSERT_UINT_EQUALS(0, buf->buffer[buf->len], "Buffer should end with null-terminator"); + return AWS_OP_SUCCESS; +} + +static int s_create_file_then_read_it(struct aws_allocator *allocator, struct aws_byte_cursor contents) { + /* create file */ + const char *filename = "testy"; + FILE *f = aws_fopen(filename, "wb"); + ASSERT_UINT_EQUALS(contents.len, fwrite(contents.ptr, 1, contents.len, f)); + ASSERT_INT_EQUALS(0, fclose(f)); + + struct aws_byte_buf buf; + + /* check aws_byte_buf_init_from_file() */ + ASSERT_SUCCESS(aws_byte_buf_init_from_file(&buf, allocator, filename)); + ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents)); + aws_byte_buf_clean_up(&buf); + + /* now we check aws_byte_buf_init_from_special_file() with different size_hints */ + + /* size_hint more then big enough */ + size_t size_hint = contents.len * 2; + ASSERT_SUCCESS(aws_byte_buf_init_from_special_file(&buf, allocator, filename, size_hint)); + ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents)); + aws_byte_buf_clean_up(&buf); + + /* size_hint not big enough for null-terminator */ + size_hint = contents.len; + ASSERT_SUCCESS(aws_byte_buf_init_from_special_file(&buf, allocator, filename, size_hint)); + ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents)); + aws_byte_buf_clean_up(&buf); + + /* size_hint 0 */ + size_hint = 0; + ASSERT_SUCCESS(aws_byte_buf_init_from_special_file(&buf, allocator, filename, size_hint)); + ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents)); + aws_byte_buf_clean_up(&buf); + + /* size_hint 1 */ + size_hint = 1; + ASSERT_SUCCESS(aws_byte_buf_init_from_special_file(&buf, allocator, filename, size_hint)); + ASSERT_SUCCESS(s_check_byte_buf_from_file(&buf, contents)); + aws_byte_buf_clean_up(&buf); + + remove(filename); + return AWS_OP_SUCCESS; +} + +/* Read an actual "special file" (if it exists on this machine) */ +static int s_read_special_file(struct aws_allocator *allocator, const char *filename) { + struct aws_string *filename_str = aws_string_new_from_c_str(allocator, filename); + bool exists = aws_path_exists(filename_str); + aws_string_destroy(filename_str); + if (!exists) { + return AWS_OP_SUCCESS; + } + + struct aws_byte_buf buf; + ASSERT_SUCCESS(aws_byte_buf_init_from_special_file(&buf, allocator, filename, 128)); + ASSERT_TRUE(buf.capacity > buf.len, "Buffer should end with null-terminator"); + ASSERT_UINT_EQUALS(0, buf.buffer[buf.len], "Buffer should end with null-terminator"); + + if (strcmp("/dev/null", filename) != 0) { + ASSERT_TRUE(buf.len > 0, "expected special file to have data"); + } + + aws_byte_buf_clean_up(&buf); + return AWS_OP_SUCCESS; +} + +static int s_test_byte_buf_init_from_file(struct aws_allocator *allocator, void *ctx) { + (void)ctx; + + /* simple text file */ + ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_c_str("asdf"))); + + /* empty file */ + ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_c_str(""))); + + /* large 3MB+1byte binary file */ + struct aws_byte_buf big_rando; + aws_byte_buf_init(&big_rando, allocator, (1024 * 1024 * 3) + 1); + ASSERT_SUCCESS(aws_device_random_buffer(&big_rando)); + ASSERT_SUCCESS(s_create_file_then_read_it(allocator, aws_byte_cursor_from_buf(&big_rando))); + aws_byte_buf_clean_up(&big_rando); + + /* test aws_byte_buf_init_from_special_file() on actual "special files" (if they exist) */ + ASSERT_SUCCESS(s_read_special_file(allocator, "/proc/cpuinfo")); + ASSERT_SUCCESS(s_read_special_file(allocator, "/proc/net/tcp")); + ASSERT_SUCCESS(s_read_special_file(allocator, "/sys/devices/virtual/dmi/id/sys_vendor")); + ASSERT_SUCCESS(s_read_special_file(allocator, "/dev/null")); + + return AWS_OP_SUCCESS; +} + +AWS_TEST_CASE(test_byte_buf_init_from_file, s_test_byte_buf_init_from_file)