Skip to content

Commit

Permalink
Tests for binary transfer of fixed UTF8 string
Browse files Browse the repository at this point in the history
  • Loading branch information
mattjala committed Nov 1, 2023
1 parent b8ccb83 commit 8e76273
Show file tree
Hide file tree
Showing 2 changed files with 247 additions and 6 deletions.
90 changes: 84 additions & 6 deletions tests/integ/attr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,9 +564,9 @@ def testPutVLenUTF8String(self):
self.assertTrue("charSet" in type_json)
self.assertEqual(type_json["charSet"], "H5T_CSET_UTF8")

def testPutFixedUTF8String(self):
def testPutFixedUTF8StringAttribute(self):
# Test PUT value for 1d attribute with fixed length UTF-8 string
print("testPutFixedUTF8String", self.base_domain)
print("testPutFixedUTF8StringAttribute", self.base_domain)

headers = helper.getRequestHeaders(domain=self.base_domain)
req = self.endpoint + "/"
Expand All @@ -581,17 +581,19 @@ def testPutFixedUTF8String(self):
# create attr
text = "this is the chinese character for the number eight: \u516b"

text_length = len(text) + 1
# size of datatype is in bytes
byte_length = len(bytearray(text, "UTF-8"))

fixed_str_type = {
"charSet": "H5T_CSET_UTF8",
"class": "H5T_STRING",
"length": text_length,
"length": byte_length + 1,
"strPad": "H5T_STR_NULLTERM",
}

scalar_shape = {"class": "H5S_SCALAR"}
data = {"type": fixed_str_type, "shape": scalar_shape, "value": text}
attr_name = "str_attr"
attr_name = "fixed_unicode_str_attr"
req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name
rsp = self.session.put(req, data=json.dumps(data), headers=headers)
self.assertEqual(rsp.status_code, 201)
Expand All @@ -608,7 +610,83 @@ def testPutFixedUTF8String(self):
self.assertTrue("class" in type_json)
self.assertEqual(type_json["class"], "H5T_STRING")
self.assertTrue("length" in type_json)
self.assertEqual(type_json["length"], text_length)
self.assertEqual(type_json["length"], byte_length + 1)
self.assertTrue("strPad" in type_json)
self.assertEqual(type_json["strPad"], "H5T_STR_NULLTERM")
self.assertTrue("charSet" in type_json)
self.assertEqual(type_json["charSet"], "H5T_CSET_UTF8")

# write different utf8 string of same overall byte length
text = "this is the chinese character for the number eight: 888"
new_byte_length = len(bytearray(text, "UTF-8"))
self.assertEqual(byte_length, new_byte_length)

data = {"type": fixed_str_type, "shape": scalar_shape, "value": text}
req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name + "/value"
rsp = self.session.put(req, data=json.dumps(data), headers=headers)
self.assertEqual(rsp.status_code, 200)

def testPutFixedUTF8StringAttributeBinary(self):
# Test PUT value for 1d attribute with fixed length UTF-8 string in binary
print("testPutFixedUTF8StringAttributeBinary", self.base_domain)

headers = helper.getRequestHeaders(domain=self.base_domain)
req = self.endpoint + "/"

# Get root uuid
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
root_uuid = rspJson["root"]
helper.validateId(root_uuid)

# create attr with binary, null byte explicitly included
text = "this is the chinese character for the number eight: \u516b\x00"
binary_text = bytearray(text, "UTF-8")
byte_length = len(binary_text)

fixed_str_type = {
"charSet": "H5T_CSET_UTF8",
"class": "H5T_STRING",
"length": byte_length,
"strPad": "H5T_STR_NULLTERM",
}

scalar_shape = {"class": "H5S_SCALAR"}
data = {"type": fixed_str_type, "shape": scalar_shape, "value": text}
attr_name = "fixed_unicode_str_attr_binary"
headers["Content-Type"] = "application/octet-stream"
req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name
rsp = self.session.put(req, data=json.dumps(data), headers=headers)
self.assertEqual(rsp.status_code, 201)

# write to attr in binary
text = "this is the chinese character for the number eight: 888\x00"
new_byte_length = len(bytearray(text, "UTF-8"))
self.assertEqual(byte_length, new_byte_length)

attr_name = "fixed_unicode_str_attr_binary"
req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name + "/value"

rsp = self.session.put(req, data={"value": text}, headers=headers)
self.assertEqual(rsp.status_code, 200)

# read from attr
headers["Content-Type"] = "application/json"
req = self.endpoint + "/groups/" + root_uuid + "/attributes/" + attr_name

rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
self.assertTrue("hrefs" in rspJson)
self.assertTrue("value" in rspJson)
self.assertEqual(rspJson["value"], text)
self.assertTrue("type" in rspJson)
type_json = rspJson["type"]
self.assertTrue("class" in type_json)
self.assertEqual(type_json["class"], "H5T_STRING")
self.assertTrue("length" in type_json)
self.assertEqual(type_json["length"], byte_length)
self.assertTrue("strPad" in type_json)
self.assertEqual(type_json["strPad"], "H5T_STR_NULLTERM")
self.assertTrue("charSet" in type_json)
Expand Down
163 changes: 163 additions & 0 deletions tests/integ/dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2432,6 +2432,169 @@ def testDatasetEmptyChunkExtent(self):
# Should fail with Bad Request due to invalid layout value
self.assertEqual(rsp.status_code, 400) # create dataset

def testPutFixedUTF8StringDataset(self):
# Test PUT value for 1d attribute with fixed length UTF-8 string
print("testPutFixedUTF8StringDataset", self.base_domain)
domain = self.base_domain + "/testPutFixedUTF8StringDataset.h5"
headers = helper.getRequestHeaders(domain=domain)
req = helper.getEndpoint() + "/"

# Get root uuid
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
self.assertTrue("root" in rspJson)
root_uuid = rspJson["root"]
helper.validateId(root_uuid)

# create dataset
req = self.endpoint + "/datasets"

text = "this is the chinese character for the number eight: \u516b"

# size of datatype is in bytes
byte_length = len(bytearray(text, "UTF-8"))

fixed_str_type = {
"charSet": "H5T_CSET_UTF8",
"class": "H5T_STRING",
"length": byte_length + 1,
"strPad": "H5T_STR_NULLTERM",
}

scalar_shape = {"class": "H5S_SCALAR"}
data = {"type": fixed_str_type, "shape": scalar_shape}
req = self.endpoint + "/datasets"
rsp = self.session.post(req, data=json.dumps(data), headers=headers)
self.assertEqual(rsp.status_code, 201)
dset_uuid = rspJson["id"]
self.assertTrue(helper.validateId(dset_uuid))

# link new dataset
name = "fixed_utf8_str_dset"
req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
payload = {"id": dset_uuid}
rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
self.assertEqual(rsp.status_code, 201)

# write fixed utf8 string to dset
data = {"type": fixed_str_type, "shape": scalar_shape, "value": text}
req = self.endpoint + "/datasets/" + dset_uuid + "/value"
rsp = self.session.put(req, data=json.dumps(data), headers=headers)
self.assertEqual(rsp.status_code, 200)

# read value back from dset
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
self.assertTrue("hrefs" in rspJson)
self.assertTrue("value" in rspJson)
self.assertEqual(rspJson["value"], text)
self.assertTrue("type" in rspJson)
type_json = rspJson["type"]
self.assertTrue("class" in type_json)
self.assertEqual(type_json["class"], "H5T_STRING")
self.assertTrue("length" in type_json)
self.assertEqual(type_json["length"], byte_length + 1)
self.assertTrue("strPad" in type_json)
self.assertEqual(type_json["strPad"], "H5T_STR_NULLTERM")
self.assertTrue("charSet" in type_json)
self.assertEqual(type_json["charSet"], "H5T_CSET_UTF8")

# write different utf8 string of same overall byte length
text = "this is the chinese character for the number eight: 888"
new_byte_length = len(bytearray(text, "UTF-8"))
self.assertEqual(byte_length, new_byte_length)

data = {"type": fixed_str_type, "shape": scalar_shape, "value": text}
req = self.endpoint + "/datasets/" + dset_uuid + "/value"
rsp = self.session.put(req, data=json.dumps(data), headers=headers)
self.assertEqual(rsp.status_code, 200)

def testPutFixedUTF8StringDatasetBinary(self):
# Test PUT value for 1d attribute with fixed length UTF-8 string in binary
print("testPutFixedUTF8StringDatasetBinary", self.base_domain)
domain = self.base_domain + "/testPutFixedUTF8StringDatasetBinary.h5"
headers = helper.getRequestHeaders(domain=domain)
req = helper.getEndpoint() + "/"

# Get root uuid
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
self.assertTrue("root" in rspJson)
root_uuid = rspJson["root"]
helper.validateId(root_uuid)

# create dataset
req = self.endpoint + "/datasets"

text = "this is the chinese character for the number eight: \u516b\x00"

# size of datatype is in bytes
binary_text = bytearray(text, "UTF-8")
byte_length = len(binary_text)

fixed_str_type = {
"charSet": "H5T_CSET_UTF8",
"class": "H5T_STRING",
"length": byte_length,
"strPad": "H5T_STR_NULLTERM",
}

scalar_shape = {"class": "H5S_SCALAR"}
data = {"type": fixed_str_type, "shape": scalar_shape}
req = self.endpoint + "/datasets"
rsp = self.session.post(req, data=json.dumps(data), headers=headers)
self.assertEqual(rsp.status_code, 201)
dset_uuid = rspJson["id"]
self.assertTrue(helper.validateId(dset_uuid))

# link new dataset
name = "fixed_utf8_str_dset_binary"
req = self.endpoint + "/groups/" + root_uuid + "/links/" + name
payload = {"id": dset_uuid}
rsp = self.session.put(req, data=json.dumps(payload), headers=headers)
self.assertEqual(rsp.status_code, 201)

# write fixed utf8 binary string to dset
headers["Content-Type"] = "application/octet-stream"
data = {"type": fixed_str_type, "shape": scalar_shape, "value": text}
req = self.endpoint + "/datasets/" + dset_uuid + "/value"
rsp = self.session.put(req, data=json.dumps(data), headers=headers)
self.assertEqual(rsp.status_code, 200)

# read value back from dset
headers["Content-Type"] = "application/json"
rsp = self.session.get(req, headers=headers)
self.assertEqual(rsp.status_code, 200)
rspJson = json.loads(rsp.text)
self.assertTrue("hrefs" in rspJson)
self.assertTrue("value" in rspJson)
self.assertEqual(rspJson["value"], text)
self.assertTrue("type" in rspJson)
type_json = rspJson["type"]
self.assertTrue("class" in type_json)
self.assertEqual(type_json["class"], "H5T_STRING")
self.assertTrue("length" in type_json)
self.assertEqual(type_json["length"], byte_length)
self.assertTrue("strPad" in type_json)
self.assertEqual(type_json["strPad"], "H5T_STR_NULLTERM")
self.assertTrue("charSet" in type_json)
self.assertEqual(type_json["charSet"], "H5T_CSET_UTF8")

# write different utf8 binary string of same overall byte length
text = "this is the chinese character for the number eight: 888\x00"
binary_text = bytearray(text, "UTF-8")
new_byte_length = len(binary_text)
self.assertEqual(byte_length, new_byte_length)

data = {"type": fixed_str_type, "shape": scalar_shape, "value": text}
req = self.endpoint + "/datasets/" + dset_uuid + "/value"
headers["Content-Type"] = "application/octet-stream"
rsp = self.session.put(req, data=json.dumps(data), headers=headers)
self.assertEqual(rsp.status_code, 200)


if __name__ == "__main__":
# setup test files
Expand Down

0 comments on commit 8e76273

Please sign in to comment.