From 82950b46d2ec8f757eb57cd07c767745de76b496 Mon Sep 17 00:00:00 2001 From: ppillot Date: Sun, 19 Nov 2023 18:10:48 -0500 Subject: [PATCH 1/2] #999 _chem_comp can be defined on mmCIF files The code was assuming that the _chem_comp property was only available for non macromolecular structures (such as cif files for hetero atoms entities). It appears that some PDB files (e.g. `32C2`) may define a _chem_comp loop around every chemical component such as its amino-acids. This breaks the previous code as `cif.chem_comp` then contains an array of strings instead of a string. The fix here is to assume that if `struct` is present then we should not enter in the block for parsing simple entities. --- src/parser/cif-parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/cif-parser.ts b/src/parser/cif-parser.ts index 6a493d12..64594545 100644 --- a/src/parser/cif-parser.ts +++ b/src/parser/cif-parser.ts @@ -1072,7 +1072,7 @@ class CifParser extends StructureParser { _parseChunkOfLines(0, lines.length, lines) }) - if (cif.chem_comp && cif.chem_comp_atom) { + if (cif.chem_comp && cif.chem_comp_atom && !cif.struct) { parseChemComp(cif, s, sb) sb.finalize() s.finalizeAtoms() From bbd7a7ed0e1ee389077b7356682c78564cdec746 Mon Sep 17 00:00:00 2001 From: ppillot Date: Sun, 19 Nov 2023 18:49:31 -0500 Subject: [PATCH 2/2] #950 handle type symbols with charges --- src/parser/cif-parser.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/parser/cif-parser.ts b/src/parser/cif-parser.ts index 64594545..18ebb03a 100644 --- a/src/parser/cif-parser.ts +++ b/src/parser/cif-parser.ts @@ -25,6 +25,7 @@ const reWhitespace = /\s+/ const reQuotedWhitespace = /'((?:(?!'\s).)*)'|"((?:(?!"\s).)*)"|(\S+)/g const reDoubleQuote = /"/g const reTrimQuotes = /^['"]+|['"]+$/g +const reAtomSymbol = /^\D{1,2}/ // atom symbol in atom_site_label interface Cif {[k: string]: any} @@ -186,11 +187,20 @@ function parseCore (cif: Cif, structure: Structure, structureBuilder: StructureB const c = new Vector3() const n = cif.atom_site_type_symbol.length + const typeSymbolMap: Record = {} + for (let i = 0; i < n; ++i) { atomStore.growIfFull() const atomname = cif.atom_site_label[ i ] - const element = cif.atom_site_type_symbol[ i ] + const typeSymbol = cif.atom_site_type_symbol[ i ] + + // typeSymbol can be like `Al2.5+`. Retain element symbol only. + let element = typeSymbolMap[typeSymbol] + if (!element) { + const match = typeSymbol.match(reAtomSymbol) + typeSymbolMap[typeSymbol] = element = match?.[0] ?? typeSymbol + } atomStore.atomTypeId[ i ] = atomMap.add(atomname, element)