Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JSON Escape group and dataset names #113

Merged
merged 1 commit into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions src/rest_vol.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#define BACKOFF_SCALE_FACTOR 1.5
#define BACKOFF_MAX_BEFORE_FAIL 3000000000 /* 30,000,000,000 ns -> 30 sec */

/* Number of unique characters which need to be escaped before being sent as JSON */
#define NUM_JSON_ESCAPE_CHARS 7
/*
* The VOL connector identification number.
*/
Expand Down Expand Up @@ -2233,6 +2235,7 @@ RV_find_object_by_path(RV_object_t *parent_obj, const char *obj_path, H5I_type_t
if (CURLE_OK != curl_easy_setopt(curl, CURLOPT_HTTPGET, 1))
FUNC_GOTO_ERROR(H5E_LINK, H5E_CANTSET, FAIL, "can't set up cURL to make HTTP GET request: %s",
curl_err_buf);

if (CURLE_OK != curl_easy_setopt(curl, CURLOPT_URL, request_url))
FUNC_GOTO_ERROR(H5E_LINK, H5E_CANTSET, FAIL, "can't set cURL request URL: %s", curl_err_buf);

Expand Down Expand Up @@ -3826,3 +3829,84 @@ RV_free_visited_link_hash_table_key(rv_hash_table_key_t value)
RV_free(value);
value = NULL;
} /* end RV_free_visited_link_hash_table_key() */

/*-------------------------------------------------------------------------
* Function: RV_JSON_escape_string
*
* Purpose: Helper function to escape control characters for JSON strings.
* If 'out' is NULL, out_size will be changed to the buffer size
* needed for the escaped version of 'in'.
* If 'out' is non-NULL, it should be a buffer of out_size bytes
* that will be populated with the escaped version of 'in'.
* If the provided buffer is too small and this operation fails,
* the value of the buffer will still be modified.
*
* Return: Non-negative on success/Negative on failure
*
* Programmer: Matthew Larson
* January, 2024
*/
herr_t
RV_JSON_escape_string(const char *in, char *out, size_t *out_size)
{
herr_t ret_value = SUCCEED;
size_t in_size = strlen(in);

char *out_ptr = NULL;
char escape_characters[NUM_JSON_ESCAPE_CHARS] = {'\b', '\f', '\n', '\r', '\t', '\"', '\\'};

if (out == NULL) {
/* Determine necessary buffer size */
*out_size = in_size + 1;

for (size_t i = 0; i < in_size; i++) {
char c = in[i];

for (size_t j = 0; j < NUM_JSON_ESCAPE_CHARS; j++) {
char ec = escape_characters[j];

/* Each escaped character requires additional '\' in final string */
if (c == ec)
*out_size += 1;
}
}
}
else {
/* Escaped string is at least as long as original */
if (*out_size < strlen(in) + 1)
FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "escaped buffer is smaller than original");

/* Populate provided buffer */
out_ptr = out;

for (size_t i = 0; i < in_size; i++) {
char c = in[i];

for (size_t j = 0; j < NUM_JSON_ESCAPE_CHARS; j++) {
char ec = escape_characters[j];

if (c == ec) {
if ((out_ptr - out + 1) > *out_size)
FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "buffer too small for encoded string");
out_ptr[0] = '\\';
out_ptr++;
}
}

if ((out_ptr - out + 1) > *out_size)
FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "buffer too small for encoded string");

out_ptr[0] = c;
out_ptr++;
}

if ((out_ptr - out + 1) > *out_size)
FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "buffer too small for encoded string");

out_ptr[0] = '\0';
}

done:

return ret_value;
}
3 changes: 3 additions & 0 deletions src/rest_vol.h
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,9 @@ herr_t RV_tconv_init(hid_t src_type_id, size_t *src_type_size, hid_t dst_type_id
herr_t RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested,
server_api_version server_version);

/* Helper function to escape control characters for JSON strings */
herr_t RV_JSON_escape_string(const char *in, char *out, size_t *out_size);

#define SERVER_VERSION_MATCHES_OR_EXCEEDS(version, major_needed, minor_needed, patch_needed) \
(version.major > major_needed) || (version.major == major_needed && version.minor > minor_needed) || \
(version.major == major_needed && version.minor == minor_needed && version.patch >= patch_needed)
Expand Down
28 changes: 21 additions & 7 deletions src/rest_vol_dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -3568,6 +3568,7 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t
char *creation_properties_body = NULL;
char *link_body = NULL;
char *path_dirname = NULL;
char *escaped_link_name = NULL;
int create_request_len = 0;
int link_body_len = 0;
herr_t ret_value = SUCCEED;
Expand Down Expand Up @@ -3612,11 +3613,12 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t
if (name) {
hbool_t empty_dirname;
char target_URI[URI_MAX_LENGTH];
const char *const link_basename = H5_rest_basename(name);
const char *const link_body_format = "\"link\": {"
"\"id\": \"%s\", "
"\"name\": \"%s\""
"}";
const char *const link_basename = H5_rest_basename(name);
const char *const link_body_format = "\"link\": {"
"\"id\": \"%s\", "
"\"name\": \"%s\""
"}";
size_t escaped_name_size = 0;

#ifdef RV_CONNECTOR_DEBUG
printf("-> Creating JSON link for dataset\n\n");
Expand All @@ -3643,15 +3645,25 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t
FUNC_GOTO_ERROR(H5E_DATASET, H5E_PATH, FAIL, "can't locate target for dataset link");
} /* end if */

link_body_nalloc = strlen(link_body_format) + strlen(link_basename) +
/* JSON-escape link name */
if (RV_JSON_escape_string(link_basename, escaped_link_name, &escaped_name_size) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "can't get length of JSON escaped link name");

if ((escaped_link_name = RV_malloc(escaped_name_size)) == NULL)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for escaped link name");

if (RV_JSON_escape_string(link_basename, escaped_link_name, &escaped_name_size) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "can't JSON escape link name");

link_body_nalloc = strlen(link_body_format) + strlen(escaped_link_name) +
(empty_dirname ? strlen(pobj->URI) : strlen(target_URI)) + 1;
if (NULL == (link_body = (char *)RV_malloc(link_body_nalloc)))
FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for dataset link body");

/* Form the Dataset Creation Link portion of the Dataset create request using the above format
* specifier and the corresponding arguments */
if ((link_body_len = snprintf(link_body, link_body_nalloc, link_body_format,
empty_dirname ? pobj->URI : target_URI, link_basename)) < 0)
empty_dirname ? pobj->URI : target_URI, escaped_link_name)) < 0)
FUNC_GOTO_ERROR(H5E_DATASET, H5E_SYSERRSTR, FAIL, "snprintf error");

if ((size_t)link_body_len >= link_body_nalloc)
Expand Down Expand Up @@ -3721,6 +3733,8 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t
RV_free(shape_body);
if (datatype_body)
RV_free(datatype_body);
if (escaped_link_name)
RV_free(escaped_link_name);

return ret_value;
} /* end RV_setup_dataset_create_request_body() */
Expand Down
19 changes: 17 additions & 2 deletions src/rest_vol_group.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name
char *base64_plist_buffer = NULL;
char target_URI[URI_MAX_LENGTH];
char request_url[URL_MAX_LENGTH];
char *escaped_group_name = NULL;
int create_request_body_len = 0;
int url_len = 0;
void *binary_plist_buffer = NULL;
Expand Down Expand Up @@ -128,6 +129,7 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name
if (name) {
const char *path_basename = H5_rest_basename(name);
hbool_t empty_dirname;
size_t escaped_name_size = 0;

#ifdef RV_CONNECTOR_DEBUG
printf("-> Creating JSON link for group\n\n");
Expand Down Expand Up @@ -204,15 +206,25 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name
if (RV_base64_encode(binary_plist_buffer, plist_nalloc, &base64_plist_buffer, &base64_buf_size) < 0)
FUNC_GOTO_ERROR(H5E_PLIST, H5E_CANTENCODE, NULL, "failed to base64 encode plist binary");

create_request_nalloc = strlen(fmt_string) + strlen(path_basename) +
/* Escape group name to be sent as JSON */
if (RV_JSON_escape_string(path_basename, escaped_group_name, &escaped_name_size) < 0)
FUNC_GOTO_ERROR(H5E_SYM, H5E_CANTENCODE, NULL, "can't get size of JSON escaped group name");

if ((escaped_group_name = RV_malloc(escaped_name_size)) == NULL)
FUNC_GOTO_ERROR(H5E_SYM, H5E_CANTALLOC, NULL, "can't allocate space for escaped group name");

if (RV_JSON_escape_string(path_basename, escaped_group_name, &escaped_name_size) < 0)
FUNC_GOTO_ERROR(H5E_SYM, H5E_CANTENCODE, NULL, "can't JSON escape group name");

create_request_nalloc = strlen(fmt_string) + strlen(escaped_group_name) +
(empty_dirname ? strlen(parent->URI) : strlen(target_URI)) + base64_buf_size +
1;
if (NULL == (create_request_body = (char *)RV_malloc(create_request_nalloc)))
FUNC_GOTO_ERROR(H5E_SYM, H5E_CANTALLOC, NULL,
"can't allocate space for group create request body");

if ((create_request_body_len = snprintf(create_request_body, create_request_nalloc, fmt_string,
empty_dirname ? parent->URI : target_URI, path_basename,
empty_dirname ? parent->URI : target_URI, escaped_group_name,
(char *)base64_plist_buffer)) < 0)
FUNC_GOTO_ERROR(H5E_SYM, H5E_SYSERRSTR, NULL, "snprintf error");

Expand Down Expand Up @@ -323,6 +335,9 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name
curl_headers = NULL;
} /* end if */

if (escaped_group_name)
RV_free(escaped_group_name);

PRINT_ERROR_STACK;

return ret_value;
Expand Down