From 0621e7d243c8a0b612825d3b415ac79426a17eeb Mon Sep 17 00:00:00 2001 From: Tom Beach Date: Sat, 2 Sep 2023 13:52:03 +0100 Subject: [PATCH 1/3] String Parsing Fixes --- src/wasm/parsing/string_parsing.cpp | 142 +++++++++++++++++++++++++++- tests/artifacts/example.ifc.test | 4 +- tests/functional/WebIfcApi.spec.ts | 4 + 3 files changed, 147 insertions(+), 3 deletions(-) diff --git a/src/wasm/parsing/string_parsing.cpp b/src/wasm/parsing/string_parsing.cpp index db694b2a..29cc9432 100644 --- a/src/wasm/parsing/string_parsing.cpp +++ b/src/wasm/parsing/string_parsing.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace webifc::parsing { @@ -90,7 +91,10 @@ namespace webifc::parsing { char str[2]; str[0] = (d1 << 4) | d2; str[1] = 0; - std::u16string u16str(reinterpret_cast(str), 1); + auto cA = reinterpret_cast(str); + cA[0]=checkRomanEncoding(cA[0]); + std::u16string u16str(cA, 1); + for (int i=0; i < u16str.size();i++) u16str[i]=checkRomanEncoding(u16str[i]); std::wstring_convert,char16_t> convert; std::string utf8 = convert.to_bytes(u16str); std::copy(utf8.begin(), utf8.end(), std::back_inserter(result)); @@ -245,6 +249,142 @@ namespace webifc::parsing { } std::copy(utf8.begin(), utf8.end(), std::back_inserter(result)); } + + const char16_t checkRomanEncoding(char16_t input) const + { + if (input < 0x80 || input > 0xFF) return input; + switch (input) + { + case 0x80: return 196; + case 0x81: return 197; + case 0x82: return 199; + case 0x83: return 201; + case 0x84: return 209; + case 0x85: return 214; + case 0x86: return 220; + case 0x87: return 225; + case 0x88: return 224; + case 0x89: return 226; + case 0x8A: return 228; + case 0x8B: return 227; + case 0x8C: return 229; + case 0x8D: return 231; + case 0x8E: return 233; + case 0x8F: return 232; + case 0x90: return 234; + case 0x91: return 235; + case 0x92: return 237; + case 0x93: return 236; + case 0x94: return 238; + case 0x95: return 239; + case 0x96: return 241; + case 0x97: return 243; + case 0x98: return 242; + case 0x99: return 244; + case 0x9A: return 246; + case 0x9B: return 245; + case 0x9C: return 250; + case 0x9D: return 249; + case 0x9E: return 251; + case 0x9F: return 252; + case 0xA0: return 8224; + case 0xA1: return 176; + case 0xA2: return 162; + case 0xA3: return 163; + case 0xA4: return 167; + case 0xA5: return 8226; + case 0xA6: return 182; + case 0xA7: return 223; + case 0xA8: return 174; + case 0xA9: return 169; + case 0xAA: return 8482; + case 0xAB: return 180; + case 0xAC: return 168; + case 0xAD: return 8800; + case 0xAE: return 198; + case 0xAF: return 216; + case 0xB0: return 8734; + case 0xB1: return 177; + case 0xB2: return 8804; + case 0xB3: return 8805; + case 0xB4: return 165; + case 0xB5: return 181; + case 0xB6: return 8706; + case 0xB7: return 8721; + case 0xB8: return 8719; + case 0xB9: return 960; + case 0xBA: return 8747; + case 0xBB: return 170; + case 0xBC: return 186; + case 0xBD: return 937; + case 0xBE: return 230; + case 0xBF: return 248; + case 0xC0: return 191; + case 0xC1: return 161; + case 0xC2: return 172; + case 0xC3: return 8730; + case 0xC4: return 402; + case 0xC5: return 8776; + case 0xC6: return 8710; + case 0xC7: return 171; + case 0xC8: return 187; + case 0xC9: return 8230; + case 0xCA: return 160; + case 0xCB: return 192; + case 0xCC: return 195; + case 0xCD: return 213; + case 0xCE: return 338; + case 0xCF: return 339; + case 0xD0: return 8211; + case 0xD1: return 8212; + case 0xD2: return 8220; + case 0xD3: return 8221; + case 0xD4: return 8216; + case 0xD5: return 8217; + case 0xD6: return 247; + case 0xD7: return 9674; + case 0xD8: return 255; + case 0xD9: return 376; + case 0xDA: return 8260; + case 0xDB: return 8364; + case 0xDC: return 8249; + case 0xDD: return 8250; + case 0xDE: return 64257; + case 0xDF: return 64258; + case 0xE0: return 8225; + case 0xE1: return 183; + case 0xE2: return 8218; + case 0xE3: return 8222; + case 0xE4: return 8240; + case 0xE5: return 194; + case 0xE6: return 202; + case 0xE7: return 193; + case 0xE8: return 203; + case 0xE9: return 200; + case 0xEA: return 205; + case 0xEB: return 206; + case 0xEC: return 207; + case 0xED: return 204; + case 0xEE: return 211; + case 0xEF: return 212; + case 0xF0: return 63743; + case 0xF1: return 210; + case 0xF2: return 218; + case 0xF3: return 219; + case 0xF4: return 217; + case 0xF5: return 305; + case 0xF6: return 710; + case 0xF7: return 732; + case 0xF8: return 175; + case 0xF9: return 728; + case 0xFA: return 729; + case 0xFB: return 730; + case 0xFC: return 184; + case 0xFD: return 733; + case 0xFE: return 731; + case 0xFF: return 711; + } + } }; diff --git a/tests/artifacts/example.ifc.test b/tests/artifacts/example.ifc.test index 55a27807..280c4a5f 100644 --- a/tests/artifacts/example.ifc.test +++ b/tests/artifacts/example.ifc.test @@ -25,8 +25,8 @@ FILE_SCHEMA(('IFC2X3')); ENDSEC; DATA; -#1= IFCORGANIZATION($,'Autodesk Revit 2021 (ENU)',$,$,$); -#5= IFCAPPLICATION(#1,'2021','Autodesk Revit 2021 (ENU)','Revit'); +#1= IFCORGANIZATION($,'Autodesk Revit 2021 (ENU) C\X\8E',$,$,$); +#5= IFCAPPLICATION(#1,'2021','Autodesk Revit 2021 (ENU) ','Revit'); #6= IFCCARTESIANPOINT((0.,0.,0.)); #9= IFCCARTESIANPOINT((0.,0.)); #11= IFCDIRECTION((1.,0.,0.)); diff --git a/tests/functional/WebIfcApi.spec.ts b/tests/functional/WebIfcApi.spec.ts index 5144de50..257adebe 100644 --- a/tests/functional/WebIfcApi.spec.ts +++ b/tests/functional/WebIfcApi.spec.ts @@ -138,6 +138,10 @@ describe('WebIfcApi reading methods', () => { let line: RawLineData = ifcApi.GetRawLineData(modelID, expressId); expect(line.ID).toEqual(expressId); }) + test('can read MAC ROMAN Characters', () => { + let line: RawLineData = ifcApi.GetRawLineData(modelID, 1); + expect(line.arguments[1].value).toEqual('Autodesk Revit 2021 (ENU) CÈ'); + }) test('can count errors in ifc file', () => { let errors: any = ifcApi.GetAndClearErrors(modelID); expect(errors.size()).toEqual(quantityOfknownErrors); From b60110e0c606587d8c7a5bc2952b41d8a4c345c7 Mon Sep 17 00:00:00 2001 From: Tom Beach Date: Sat, 2 Sep 2023 14:17:00 +0100 Subject: [PATCH 2/3] Fix Bug --- src/wasm/parsing/string_parsing.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wasm/parsing/string_parsing.cpp b/src/wasm/parsing/string_parsing.cpp index 29cc9432..c51809de 100644 --- a/src/wasm/parsing/string_parsing.cpp +++ b/src/wasm/parsing/string_parsing.cpp @@ -384,6 +384,7 @@ namespace webifc::parsing { case 0xFE: return 731; case 0xFF: return 711; } + return input; } }; From 7c888aee8c12dcfaca434fd7359345bde3593359 Mon Sep 17 00:00:00 2001 From: Tom Beach Date: Sat, 2 Sep 2023 14:36:01 +0100 Subject: [PATCH 3/3] Bug Fixes --- src/wasm/parsing/string_parsing.cpp | 7 ++++--- tests/functional/WebIfcApi.spec.ts | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/wasm/parsing/string_parsing.cpp b/src/wasm/parsing/string_parsing.cpp index c51809de..5423ee83 100644 --- a/src/wasm/parsing/string_parsing.cpp +++ b/src/wasm/parsing/string_parsing.cpp @@ -10,10 +10,11 @@ #include #include #include -#include namespace webifc::parsing { + bool foundRoman = false; + void encodeCharacters(std::ostringstream &stream,std::string &data) { std::u16string utf16 = std::wstring_convert, char16_t>{}.from_bytes(data.data()); @@ -92,9 +93,9 @@ namespace webifc::parsing { str[0] = (d1 << 4) | d2; str[1] = 0; auto cA = reinterpret_cast(str); - cA[0]=checkRomanEncoding(cA[0]); + if (cA[0] >= 0x80 && cA[0] <= 0x9F) foundRoman = true; + if (foundRoman) cA[0]=checkRomanEncoding(cA[0]); std::u16string u16str(cA, 1); - for (int i=0; i < u16str.size();i++) u16str[i]=checkRomanEncoding(u16str[i]); std::wstring_convert,char16_t> convert; std::string utf8 = convert.to_bytes(u16str); std::copy(utf8.begin(), utf8.end(), std::back_inserter(result)); diff --git a/tests/functional/WebIfcApi.spec.ts b/tests/functional/WebIfcApi.spec.ts index 257adebe..33d4083f 100644 --- a/tests/functional/WebIfcApi.spec.ts +++ b/tests/functional/WebIfcApi.spec.ts @@ -140,7 +140,7 @@ describe('WebIfcApi reading methods', () => { }) test('can read MAC ROMAN Characters', () => { let line: RawLineData = ifcApi.GetRawLineData(modelID, 1); - expect(line.arguments[1].value).toEqual('Autodesk Revit 2021 (ENU) CÈ'); + expect(line.arguments[1].value).toEqual('Autodesk Revit 2021 (ENU) Cé'); }) test('can count errors in ifc file', () => { let errors: any = ifcApi.GetAndClearErrors(modelID);