From e7956e2a26709b4410ddfdc94e93950432e47d2d Mon Sep 17 00:00:00 2001 From: Matthew Larson Date: Tue, 31 Oct 2023 20:42:21 -0500 Subject: [PATCH] Tests for binary transfer of fixed UTF8 string --- tests/integ/attr_test.py | 90 ++++++++++++++++++-- tests/integ/dataset_test.py | 162 ++++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 6 deletions(-) diff --git a/tests/integ/attr_test.py b/tests/integ/attr_test.py index 2dbc1e57..370fabd2 100644 --- a/tests/integ/attr_test.py +++ b/tests/integ/attr_test.py @@ -564,9 +564,9 @@ def testPutVLenUTF8String(self): self.assertTrue("charSet" in type_json) self.assertEqual(type_json["charSet"], "H5T_CSET_UTF8") - def testPutFixedUTF8String(self): + def testPutFixedUTF8StringAttribute(self): # Test PUT value for 1d attribute with fixed length UTF-8 string - print("testPutFixedUTF8String", self.base_domain) + print("testPutFixedUTF8StringAttribute", self.base_domain) headers = helper.getRequestHeaders(domain=self.base_domain) req = self.endpoint + "/" @@ -581,17 +581,19 @@ def testPutFixedUTF8String(self): # create attr text = "this is the chinese character for the number eight: \u516b" - text_length = len(text) + 1 + # size of datatype is in bytes + byte_length = len(bytearray(text, "UTF-8")) + fixed_str_type = { "charSet": "H5T_CSET_UTF8", "class": "H5T_STRING", - "length": text_length, + "length": byte_length + 1, "strPad": "H5T_STR_NULLTERM", } scalar_shape = {"class": "H5S_SCALAR"} data = {"type": fixed_str_type, "shape": scalar_shape, "value": text} - attr_name = "str_attr" + attr_name = "fixed_unicode_str_attr" req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name rsp = self.session.put(req, data=json.dumps(data), headers=headers) self.assertEqual(rsp.status_code, 201) @@ -608,7 +610,83 @@ def testPutFixedUTF8String(self): self.assertTrue("class" in type_json) self.assertEqual(type_json["class"], "H5T_STRING") self.assertTrue("length" in type_json) - self.assertEqual(type_json["length"], text_length) + self.assertEqual(type_json["length"], byte_length + 1) + self.assertTrue("strPad" in type_json) + self.assertEqual(type_json["strPad"], "H5T_STR_NULLTERM") + self.assertTrue("charSet" in type_json) + self.assertEqual(type_json["charSet"], "H5T_CSET_UTF8") + + # write different utf8 string of same overall byte length + text = "this is the chinese character for the number eight: 888" + new_byte_length = len(bytearray(text, "UTF-8")) + self.assertEqual(byte_length, new_byte_length) + + data = {"type": fixed_str_type, "shape": scalar_shape, "value": text} + req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name + "/value" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 200) + + def testPutFixedUTF8StringAttributeBinary(self): + # Test PUT value for 1d attribute with fixed length UTF-8 string in binary + print("testPutFixedUTF8StringAttributeBinary", self.base_domain) + + headers = helper.getRequestHeaders(domain=self.base_domain) + req = self.endpoint + "/" + + # Get root uuid + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + root_uuid = rspJson["root"] + helper.validateId(root_uuid) + + # create attr with binary, null byte explicitly included + text = "this is the chinese character for the number eight: \u516b\x00" + binary_text = bytearray(text, "UTF-8") + byte_length = len(binary_text) + + fixed_str_type = { + "charSet": "H5T_CSET_UTF8", + "class": "H5T_STRING", + "length": byte_length, + "strPad": "H5T_STR_NULLTERM", + } + + scalar_shape = {"class": "H5S_SCALAR"} + data = {"type": fixed_str_type, "shape": scalar_shape, "value": text} + attr_name = "fixed_unicode_str_attr_binary" + headers["Content-Type"] = "application/octet-stream" + req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + + # write to attr in binary + text = "this is the chinese character for the number eight: 888\x00" + new_byte_length = len(bytearray(text, "UTF-8")) + self.assertEqual(byte_length, new_byte_length) + + attr_name = "fixed_unicode_str_attr_binary" + req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name + "/value" + + rsp = self.session.put(req, data={"value": text}, headers=headers) + self.assertEqual(rsp.status_code, 200) + + # read from attr + headers["Content-Type"] = "application/json" + req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name + + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("hrefs" in rspJson) + self.assertTrue("value" in rspJson) + self.assertEqual(rspJson["value"], text) + self.assertTrue("type" in rspJson) + type_json = rspJson["type"] + self.assertTrue("class" in type_json) + self.assertEqual(type_json["class"], "H5T_STRING") + self.assertTrue("length" in type_json) + self.assertEqual(type_json["length"], byte_length) self.assertTrue("strPad" in type_json) self.assertEqual(type_json["strPad"], "H5T_STR_NULLTERM") self.assertTrue("charSet" in type_json) diff --git a/tests/integ/dataset_test.py b/tests/integ/dataset_test.py index c9e1c18d..cf40ff8b 100755 --- a/tests/integ/dataset_test.py +++ b/tests/integ/dataset_test.py @@ -2431,7 +2431,169 @@ def testDatasetEmptyChunkExtent(self): rsp = self.session.post(req, data=json.dumps(payload), headers=headers) # Should fail with Bad Request due to invalid layout value self.assertEqual(rsp.status_code, 400) # create dataset + + def testPutFixedUTF8StringDataset(self): + # Test PUT value for 1d attribute with fixed length UTF-8 string + print("testPutFixedUTF8StringDataset", self.base_domain) + domain = self.base_domain + "/testPutFixedUTF8StringDataset.h5" + headers = helper.getRequestHeaders(domain=domain) + req = helper.getEndpoint() + "/" + + # Get root uuid + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("root" in rspJson) + root_uuid = rspJson["root"] + helper.validateId(root_uuid) + + # create dataset + req = self.endpoint + "/datasets" + + text = "this is the chinese character for the number eight: \u516b" + + # size of datatype is in bytes + byte_length = len(bytearray(text, "UTF-8")) + + fixed_str_type = { + "charSet": "H5T_CSET_UTF8", + "class": "H5T_STRING", + "length": byte_length + 1, + "strPad": "H5T_STR_NULLTERM", + } + + scalar_shape = {"class": "H5S_SCALAR"} + data = {"type": fixed_str_type, "shape": scalar_shape} + req = self.endpoint + "/datasets" + rsp = self.session.post(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + dset_uuid = rspJson["id"] + self.assertTrue(helper.validateId(dset_uuid)) + + # link new dataset + name = "fixed_utf8_str_dset" + req = self.endpoint + "/groups/" + root_uuid + "/links/" + name + payload = {"id": dset_uuid} + rsp = self.session.put(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 201) + + # write fixed utf8 string to dset + data = {"type": fixed_str_type, "shape": scalar_shape, "value": text} + req = self.endpoint + "/datasets/" + dset_uuid + "/value" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 200) + + # read value back from dset + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("hrefs" in rspJson) + self.assertTrue("value" in rspJson) + self.assertEqual(rspJson["value"], text) + self.assertTrue("type" in rspJson) + type_json = rspJson["type"] + self.assertTrue("class" in type_json) + self.assertEqual(type_json["class"], "H5T_STRING") + self.assertTrue("length" in type_json) + self.assertEqual(type_json["length"], byte_length + 1) + self.assertTrue("strPad" in type_json) + self.assertEqual(type_json["strPad"], "H5T_STR_NULLTERM") + self.assertTrue("charSet" in type_json) + self.assertEqual(type_json["charSet"], "H5T_CSET_UTF8") + + # write different utf8 string of same overall byte length + text = "this is the chinese character for the number eight: 888" + new_byte_length = len(bytearray(text, "UTF-8")) + self.assertEqual(byte_length, new_byte_length) + + data = {"type": fixed_str_type, "shape": scalar_shape, "value": text} + req = self.endpoint + "/datasets/" + dset_uuid + "/value" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 200) + def testPutFixedUTF8StringDatasetBinary(self): + # Test PUT value for 1d attribute with fixed length UTF-8 string in binary + print("testPutFixedUTF8StringDatasetBinary", self.base_domain) + domain = self.base_domain + "/testPutFixedUTF8StringDatasetBinary.h5" + headers = helper.getRequestHeaders(domain=domain) + req = helper.getEndpoint() + "/" + + # Get root uuid + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("root" in rspJson) + root_uuid = rspJson["root"] + helper.validateId(root_uuid) + + # create dataset + req = self.endpoint + "/datasets" + + text = "this is the chinese character for the number eight: \u516b\x00" + + # size of datatype is in bytes + binary_text = bytearray(text, "UTF-8") + byte_length = len(binary_text) + + fixed_str_type = { + "charSet": "H5T_CSET_UTF8", + "class": "H5T_STRING", + "length": byte_length, + "strPad": "H5T_STR_NULLTERM", + } + + scalar_shape = {"class": "H5S_SCALAR"} + data = {"type": fixed_str_type, "shape": scalar_shape} + req = self.endpoint + "/datasets" + rsp = self.session.post(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 201) + dset_uuid = rspJson["id"] + self.assertTrue(helper.validateId(dset_uuid)) + + # link new dataset + name = "fixed_utf8_str_dset_binary" + req = self.endpoint + "/groups/" + root_uuid + "/links/" + name + payload = {"id": dset_uuid} + rsp = self.session.put(req, data=json.dumps(payload), headers=headers) + self.assertEqual(rsp.status_code, 201) + + # write fixed utf8 binary string to dset + headers["Content-Type"] = "application/octet-stream" + data = {"type": fixed_str_type, "shape": scalar_shape, "value": text} + req = self.endpoint + "/datasets/" + dset_uuid + "/value" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 200) + + # read value back from dset + headers["Content-Type"] = "application/json" + rsp = self.session.get(req, headers=headers) + self.assertEqual(rsp.status_code, 200) + rspJson = json.loads(rsp.text) + self.assertTrue("hrefs" in rspJson) + self.assertTrue("value" in rspJson) + self.assertEqual(rspJson["value"], text) + self.assertTrue("type" in rspJson) + type_json = rspJson["type"] + self.assertTrue("class" in type_json) + self.assertEqual(type_json["class"], "H5T_STRING") + self.assertTrue("length" in type_json) + self.assertEqual(type_json["length"], byte_length) + self.assertTrue("strPad" in type_json) + self.assertEqual(type_json["strPad"], "H5T_STR_NULLTERM") + self.assertTrue("charSet" in type_json) + self.assertEqual(type_json["charSet"], "H5T_CSET_UTF8") + + # write different utf8 binary string of same overall byte length + text = "this is the chinese character for the number eight: 888\x00" + binary_text = bytearray(text, "UTF-8") + new_byte_length = len(binary_text) + self.assertEqual(byte_length, new_byte_length) + + data = {"type": fixed_str_type, "shape": scalar_shape, "value": text} + req = self.endpoint + "/datasets/" + dset_uuid + "/value" + headers["Content-Type"] = "application/octet-stream" + rsp = self.session.put(req, data=json.dumps(data), headers=headers) + self.assertEqual(rsp.status_code, 200) if __name__ == "__main__": # setup test files