From b36c6351ccb17a2c248acc031787392c933be940 Mon Sep 17 00:00:00 2001 From: Aliaksandr Dziarkach <18146690+AliaksandrDziarkach@users.noreply.github.com> Date: Tue, 8 Oct 2024 18:45:31 +0300 Subject: [PATCH] Backmerge: #2312 - Export of ambiguous monomers doesn't work (error appears) to any export format (even to SVG/PNG) except KET (#2530) --- .../ref/formats/ket_ambiguous_export.py.out | 10 + .../tests/formats/ket_ambiguous_export.py | 53 +++ .../formats/molecules/macro/ambiguous.ket | 449 ++++++++++++++++++ .../molecule/molecule_json_loader.h | 1 + .../molecule/molecule_json_saver.h | 1 + core/indigo-core/molecule/molecule_tgroups.h | 4 + .../molecule/src/base_molecule.cpp | 2 + .../molecule/src/molecule_json_loader.cpp | 55 ++- .../molecule/src/molecule_json_saver.cpp | 45 +- .../molecule/src/molecule_tgroups.cpp | 22 +- .../molecule/src/molfile_saver.cpp | 2 + 11 files changed, 637 insertions(+), 7 deletions(-) create mode 100644 api/tests/integration/ref/formats/ket_ambiguous_export.py.out create mode 100644 api/tests/integration/tests/formats/ket_ambiguous_export.py create mode 100644 api/tests/integration/tests/formats/molecules/macro/ambiguous.ket diff --git a/api/tests/integration/ref/formats/ket_ambiguous_export.py.out b/api/tests/integration/ref/formats/ket_ambiguous_export.py.out new file mode 100644 index 0000000000..96a0096209 --- /dev/null +++ b/api/tests/integration/ref/formats/ket_ambiguous_export.py.out @@ -0,0 +1,10 @@ +Test mol v2000 +molecule: Ambiguous monomer cannot be transform to SGroup. +Test mol v3000 +molfile saver: Ambiguous monomer cannot be saved to molfile. +Test cdxml +molecule: Ambiguous monomer cannot be transform to SGroup. +Test cml +molecule: Ambiguous monomer cannot be transform to SGroup. +Test smiles +* diff --git a/api/tests/integration/tests/formats/ket_ambiguous_export.py b/api/tests/integration/tests/formats/ket_ambiguous_export.py new file mode 100644 index 0000000000..ca6004ab58 --- /dev/null +++ b/api/tests/integration/tests/formats/ket_ambiguous_export.py @@ -0,0 +1,53 @@ +import difflib +import os +import sys + + +def find_diff(a, b): + return "\n".join(difflib.unified_diff(a.splitlines(), b.splitlines())) + + +sys.path.append( + os.path.normpath( + os.path.join(os.path.abspath(__file__), "..", "..", "..", "common") + ) +) +from env_indigo import ( # noqa + Indigo, + IndigoException, + getIndigoExceptionText, + joinPathPy, +) + +indigo = Indigo() + +root = joinPathPy("molecules/macro/", __file__) + +mol = indigo.loadMoleculeFromFile(os.path.join(root, "ambiguous.ket")) +print("Test mol v2000") +try: + indigo.setOption("molfile-saving-mode", "2000") + print(mol.molfile()) +except IndigoException as e: + print(getIndigoExceptionText(e)) +print("Test mol v3000") +try: + indigo.setOption("molfile-saving-mode", "3000") + print(mol.molfile()) +except IndigoException as e: + print(getIndigoExceptionText(e)) +print("Test cdxml") +try: + print(mol.cdxml()) +except IndigoException as e: + print(getIndigoExceptionText(e)) +print("Test cml") +try: + print(mol.cml()) +except IndigoException as e: + print(getIndigoExceptionText(e)) +print("Test smiles") +try: + print(mol.smiles()) +except IndigoException as e: + print(getIndigoExceptionText(e)) diff --git a/api/tests/integration/tests/formats/molecules/macro/ambiguous.ket b/api/tests/integration/tests/formats/molecules/macro/ambiguous.ket new file mode 100644 index 0000000000..4e1e598ef5 --- /dev/null +++ b/api/tests/integration/tests/formats/molecules/macro/ambiguous.ket @@ -0,0 +1,449 @@ +{ + "root": { + "nodes": [ + { + "$ref": "monomer4" + } + ], + "connections": [], + "templates": [ + { + "$ref": "ambiguousMonomerTemplate-alternatives__D___Aspartic acid__N___Asparagine_" + }, + { + "$ref": "monomerTemplate-D___Aspartic acid" + }, + { + "$ref": "monomerTemplate-N___Asparagine" + } + ] + }, + "monomer4": { + "type": "ambiguousMonomer", + "id": "4", + "position": { + "x": 7.966666686534882, + "y": -6.075 + }, + "alias": "B", + "templateId": "alternatives__D___Aspartic acid__N___Asparagine_" + }, + "ambiguousMonomerTemplate-alternatives__D___Aspartic acid__N___Asparagine_": { + "type": "ambiguousMonomerTemplate", + "id": "alternatives__D___Aspartic acid__N___Asparagine_", + "alias": "B", + "subtype": "alternatives", + "options": [ + { + "templateId": "D___Aspartic acid" + }, + { + "templateId": "N___Asparagine" + } + ] + }, + "monomerTemplate-D___Aspartic acid": { + "type": "monomerTemplate", + "atoms": [ + { + "label": "C", + "location": [ + 1.631, + -1.5578, + 0 + ] + }, + { + "label": "O", + "location": [ + 1.6327, + -2.7392, + 0 + ] + }, + { + "label": "C", + "location": [ + 0.3507, + -0.8201, + 0 + ], + "stereoLabel": "abs" + }, + { + "label": "N", + "location": [ + -0.9295, + -1.5578, + 0 + ] + }, + { + "label": "H", + "location": [ + -1.9525, + -0.9669, + 0 + ] + }, + { + "label": "C", + "location": [ + 0.3485, + 0.6575, + 0 + ] + }, + { + "label": "C", + "location": [ + -0.9317, + 1.3952, + 0 + ] + }, + { + "label": "O", + "location": [ + -1.9542, + 0.8032, + 0 + ] + }, + { + "label": "O", + "location": [ + -0.9335, + 2.5766, + 0 + ] + }, + { + "label": "O", + "location": [ + 2.6534, + -0.9658, + 0 + ] + }, + { + "label": "H", + "location": [ + 0.0851, + 3.1751, + 0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 1, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 6 + ] + }, + { + "type": 2, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 8, + 10 + ] + } + ], + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "id": "D___Aspartic acid", + "fullName": "Aspartic acid", + "alias": "D", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 4 + ] + }, + "type": "left" + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 9 + ] + }, + "type": "right" + }, + { + "attachmentAtom": 8, + "leavingGroup": { + "atoms": [ + 10 + ] + }, + "type": "side" + } + ], + "naturalAnalogShort": "D" + }, + "monomerTemplate-N___Asparagine": { + "type": "monomerTemplate", + "atoms": [ + { + "label": "C", + "location": [ + 1.8929, + -1.4175, + 0 + ] + }, + { + "label": "O", + "location": [ + 1.8947, + -2.5989, + 0 + ] + }, + { + "label": "C", + "location": [ + 0.6127, + -0.6799, + 0 + ], + "stereoLabel": "abs" + }, + { + "label": "N", + "location": [ + -0.6676, + -1.4175, + 0 + ] + }, + { + "label": "H", + "location": [ + -1.6907, + -0.8266, + 0 + ] + }, + { + "label": "C", + "location": [ + 0.6104, + 0.7978, + 0 + ] + }, + { + "label": "C", + "location": [ + -0.6698, + 1.5354, + 0 + ] + }, + { + "label": "N", + "location": [ + -1.6922, + 0.9434, + 0 + ] + }, + { + "label": "O", + "location": [ + -0.6716, + 2.7168, + 0 + ] + }, + { + "label": "O", + "location": [ + 2.9153, + -0.8255, + 0 + ] + }, + { + "label": "H", + "location": [ + -2.5341, + 1.7724, + 0 + ] + } + ], + "bonds": [ + { + "type": 2, + "atoms": [ + 1, + 0 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 4 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 5 + ], + "stereo": 1 + }, + { + "type": 1, + "atoms": [ + 5, + 6 + ] + }, + { + "type": 1, + "atoms": [ + 6, + 7 + ] + }, + { + "type": 2, + "atoms": [ + 6, + 8 + ] + }, + { + "type": 1, + "atoms": [ + 0, + 9 + ] + }, + { + "type": 1, + "atoms": [ + 7, + 10 + ] + } + ], + "class": "AminoAcid", + "classHELM": "PEPTIDE", + "id": "N___Asparagine", + "fullName": "Asparagine", + "alias": "N", + "attachmentPoints": [ + { + "attachmentAtom": 3, + "leavingGroup": { + "atoms": [ + 4 + ] + }, + "type": "left" + }, + { + "attachmentAtom": 0, + "leavingGroup": { + "atoms": [ + 9 + ] + }, + "type": "right" + }, + { + "attachmentAtom": 7, + "leavingGroup": { + "atoms": [ + 10 + ] + }, + "type": "side" + } + ], + "naturalAnalogShort": "N" + } +} \ No newline at end of file diff --git a/core/indigo-core/molecule/molecule_json_loader.h b/core/indigo-core/molecule/molecule_json_loader.h index afc1255193..b4cc301ee1 100644 --- a/core/indigo-core/molecule/molecule_json_loader.h +++ b/core/indigo-core/molecule/molecule_json_loader.h @@ -84,6 +84,7 @@ namespace indigo static void loadMetaObjects(rapidjson::Value& meta_objects, MetaDataStorage& meta); static int parseMonomerTemplate(const rapidjson::Value& monomer_template, BaseMolecule& mol, StereocentersOptions stereochemistry_options); + void parseAmbiguousMonomerTemplate(const rapidjson::Value& monomer_template, BaseMolecule& mol); protected: struct EnhancedStereoCenter diff --git a/core/indigo-core/molecule/molecule_json_saver.h b/core/indigo-core/molecule/molecule_json_saver.h index 0057a40f32..caee627f02 100644 --- a/core/indigo-core/molecule/molecule_json_saver.h +++ b/core/indigo-core/molecule/molecule_json_saver.h @@ -71,6 +71,7 @@ namespace indigo void saveRGroup(PtrPool& fragments, int rgnum, JsonWriter& writer); void saveFragment(BaseMolecule& fragment, JsonWriter& writer); void saveMonomerTemplate(TGroup& tg, JsonWriter& writer); + void saveAmbiguousMonomerTemplate(TGroup& tg, JsonWriter& writer); void saveMonomerAttachmentPoints(TGroup& tg, JsonWriter& writer); void saveSuperatomAttachmentPoints(Superatom& sa, JsonWriter& writer); diff --git a/core/indigo-core/molecule/molecule_tgroups.h b/core/indigo-core/molecule/molecule_tgroups.h index 5920fbf9c9..35c86ea2ee 100644 --- a/core/indigo-core/molecule/molecule_tgroups.h +++ b/core/indigo-core/molecule/molecule_tgroups.h @@ -48,6 +48,10 @@ namespace indigo int tgroup_id; bool unresolved; Array idt_alias; + bool ambiguous; + bool mixture; + ObjArray> aliases; + Array ratios; TGroup(); ~TGroup(); diff --git a/core/indigo-core/molecule/src/base_molecule.cpp b/core/indigo-core/molecule/src/base_molecule.cpp index c3f4dbe076..860fe49903 100644 --- a/core/indigo-core/molecule/src/base_molecule.cpp +++ b/core/indigo-core/molecule/src/base_molecule.cpp @@ -2996,6 +2996,8 @@ int BaseMolecule::_transformTGroupToSGroup(int idx, int t_idx) tg_idx = tgroups.findTGroup(getTemplateAtom(idx)); TGroup& tgroup = tgroups.getTGroup(tg_idx); + if (tgroup.ambiguous) + throw Error("Ambiguous monomer cannot be transform to SGroup."); fragment.clear(); fragment.clone(*tgroup.fragment.get()); diff --git a/core/indigo-core/molecule/src/molecule_json_loader.cpp b/core/indigo-core/molecule/src/molecule_json_loader.cpp index a24a421dbf..cc3f41a4ab 100644 --- a/core/indigo-core/molecule/src/molecule_json_loader.cpp +++ b/core/indigo-core/molecule/src/molecule_json_loader.cpp @@ -60,6 +60,10 @@ void MoleculeJsonLoader::parse_ket(Document& ket) { _monomer_array.PushBack(node, ket.GetAllocator()); } + else if (node_type.compare("ambiguousMonomer") == 0) + { + _monomer_array.PushBack(node, ket.GetAllocator()); + } else throw Error("Unknows node type: %s", node_type.c_str()); } @@ -1254,6 +1258,7 @@ int MoleculeJsonLoader::parseMonomerTemplate(const rapidjson::Value& monomer_tem auto tg_idx = mol.tgroups.addTGroup(); TGroup& tg = mol.tgroups.getTGroup(tg_idx); tg.tgroup_id = tg_idx + 1; + tg.ambiguous = false; Value one_tgroup(kArrayType); Document data; rapidjson::Value monomer_template_cp; @@ -1445,6 +1450,52 @@ int MoleculeJsonLoader::parseMonomerTemplate(const rapidjson::Value& monomer_tem return tg_idx; } +void MoleculeJsonLoader::parseAmbiguousMonomerTemplate(const rapidjson::Value& monomer_template, BaseMolecule& mol) +{ + auto tg_idx = mol.tgroups.addTGroup(); + TGroup& tg = mol.tgroups.getTGroup(tg_idx); + tg.tgroup_id = tg_idx + 1; + tg.ambiguous = true; + if (monomer_template.HasMember("id")) + { + std::string id = monomer_template["id"].GetString(); + tg.tgroup_text_id.appendString(id.c_str(), true); + } + if (monomer_template.HasMember("idtAliases")) + + tg.idt_alias.readString(parseIdtAlias(monomer_template).getBase().c_str(), true); + if (monomer_template.HasMember("subtype")) + tg.mixture = strcmp(monomer_template["subtype"].GetString(), "mixture") == 0; + else + tg.mixture = true; + if (monomer_template.HasMember("alias")) + tg.tgroup_alias.readString(monomer_template["alias"].GetString(), true); + + if (monomer_template.HasMember("options")) + { + auto& options = monomer_template["options"]; + int att_index = 0; + const char* num_name = tg.mixture ? "ratio" : "probability"; + for (SizeType i = 0; i < options.Size(); i++) + { + auto& option = options[i]; + auto& alias = tg.aliases.push(); + const char* template_id = option["templateId"].GetString(); + alias.readString(template_id, true); + if (i == 0) + { + auto& templ = _templates[_id_to_template.at(template_id)]; + tg.tgroup_class.readString(templ["class"].GetString(), true); + } + auto& ratio = tg.ratios.push(); + if (option.HasMember(num_name)) + ratio = option[num_name].GetFloat(); + else + ratio = -1; + } + } +} + std::string MoleculeJsonLoader::monomerMolClass(const std::string& class_name) { auto mclass = class_name; @@ -1568,7 +1619,9 @@ void MoleculeJsonLoader::loadMolecule(BaseMolecule& mol, bool load_arrows) auto& mt = _templates[i]; // int tp = mt.GetType(); if (mt.HasMember("type") && mt["type"].GetString() == std::string("monomerTemplate")) - int tgroup_id = parseMonomerTemplate(mt, mol, stereochemistry_options); + parseMonomerTemplate(mt, mol, stereochemistry_options); + else if (mt.HasMember("type") && mt["type"].GetString() == std::string("ambiguousMonomerTemplate")) + parseAmbiguousMonomerTemplate(mt, mol); } std::unordered_map monomer_id_mapping; diff --git a/core/indigo-core/molecule/src/molecule_json_saver.cpp b/core/indigo-core/molecule/src/molecule_json_saver.cpp index 0f9845f6b8..c9a3c77581 100644 --- a/core/indigo-core/molecule/src/molecule_json_saver.cpp +++ b/core/indigo-core/molecule/src/molecule_json_saver.cpp @@ -1192,6 +1192,42 @@ void MoleculeJsonSaver::saveMonomerTemplate(TGroup& tg, JsonWriter& writer) writer.EndObject(); } +void MoleculeJsonSaver::saveAmbiguousMonomerTemplate(TGroup& tg, JsonWriter& writer) +{ + std::string template_id("ambiguousMonomerTemplate-"); + std::string tg_id(monomerId(tg)); + std::string template_class(monomerKETClass(tg.tgroup_class.ptr())); + std::string helm_class(monomerHELMClass(tg.tgroup_class.ptr())); + template_id += tg_id; + writer.Key(template_id.c_str()); + writer.StartObject(); + writer.Key("type"); + writer.String("ambiguousMonomerTemplate"); + writer.Key("subtype"); + writer.String(tg.mixture ? "mixture" : "alternatives"); + writer.Key("id"); + writer.String(tg_id.c_str()); + writer.Key("alias"); + writer.String(tg.tgroup_alias.ptr()); + writer.Key("options"); + writer.StartArray(); + const char* num_name = tg.mixture ? "ratio" : "probability"; + for (int i = 0; i < tg.aliases.size(); i++) + { + writer.StartObject(); + writer.Key("templateId"); + writer.String(tg.aliases[i].ptr()); + writer.EndObject(); + if (tg.ratios[i] >= 0) + { + writer.Key(num_name); + saveNativeFloat(writer, tg.ratios[i]); + } + } + writer.EndArray(); + writer.EndObject(); +} + void MoleculeJsonSaver::saveSuperatomAttachmentPoints(Superatom& sa, JsonWriter& writer) { std::map sorted_attachment_points; @@ -1561,7 +1597,8 @@ void MoleculeJsonSaver::saveMolecule(BaseMolecule& bmol, JsonWriter& writer) writer.Key((std::string("monomer") + std::to_string(mon_id)).c_str()); writer.StartObject(); writer.Key("type"); - writer.String("monomer"); + int temp_idx = mol->getTemplateAtomTemplateIndex(i); + writer.String(temp_idx > -1 && bmol.tgroups.getTGroup(temp_idx).ambiguous ? "ambiguousMonomer" : "monomer"); writer.Key("id"); writer.String(std::to_string(mon_id).c_str()); auto seqid = mol->getTemplateAtomSeqid(i); @@ -1585,7 +1622,6 @@ void MoleculeJsonSaver::saveMolecule(BaseMolecule& bmol, JsonWriter& writer) auto alias = mol->getTemplateAtom(i); writer.String(alias); auto mon_class = mol->getTemplateAtomClass(i); - int temp_idx = mol->getTemplateAtomTemplateIndex(i); if (temp_idx > -1) { auto& tg = bmol.tgroups.getTGroup(temp_idx); @@ -1613,7 +1649,10 @@ void MoleculeJsonSaver::saveMolecule(BaseMolecule& bmol, JsonWriter& writer) for (int i = mol->tgroups.begin(); i != mol->tgroups.end(); i = mol->tgroups.next(i)) { TGroup& tg = mol->tgroups.getTGroup(i); - saveMonomerTemplate(tg, writer); + if (tg.ambiguous) + saveAmbiguousMonomerTemplate(tg, writer); + else + saveMonomerTemplate(tg, writer); } // save molecules diff --git a/core/indigo-core/molecule/src/molecule_tgroups.cpp b/core/indigo-core/molecule/src/molecule_tgroups.cpp index cf1449d13a..e4933b8159 100644 --- a/core/indigo-core/molecule/src/molecule_tgroups.cpp +++ b/core/indigo-core/molecule/src/molecule_tgroups.cpp @@ -23,7 +23,7 @@ using namespace indigo; -TGroup::TGroup() : unresolved(false) +TGroup::TGroup() : unresolved(false), ambiguous(false) { } @@ -34,6 +34,7 @@ TGroup::~TGroup() void TGroup::clear() { unresolved = false; + ambiguous = false; } int TGroup::cmp(TGroup& tg1, TGroup& tg2, void* /*context*/) @@ -51,6 +52,11 @@ int TGroup::cmp(TGroup& tg1, TGroup& tg2, void* /*context*/) else if (!tg1.unresolved && tg2.unresolved) return -1; + if (tg1.ambiguous && !tg2.ambiguous) + return 1; + else if (!tg1.ambiguous && tg2.ambiguous) + return -1; + lgrps.clear(); bgrps.clear(); @@ -118,8 +124,18 @@ void TGroup::copy(const TGroup& other) tgroup_id = other.tgroup_id; unresolved = other.unresolved; idt_alias.copy(other.idt_alias); - fragment.reset(other.fragment->neu()); - fragment->clone(*other.fragment.get(), 0, 0); + if (!other.ambiguous) + { + fragment.reset(other.fragment->neu()); + fragment->clone(*other.fragment.get(), 0, 0); + } + ambiguous = other.ambiguous; + mixture = other.mixture; + for (int i = 0; i < other.aliases.size(); i++) + { + aliases.push().copy(other.aliases[i]); + } + ratios.copy(other.ratios); } IMPL_ERROR(MoleculeTGroups, "molecule tgroups"); diff --git a/core/indigo-core/molecule/src/molfile_saver.cpp b/core/indigo-core/molecule/src/molfile_saver.cpp index 7b1b828fdd..b67db2a14c 100644 --- a/core/indigo-core/molecule/src/molfile_saver.cpp +++ b/core/indigo-core/molecule/src/molfile_saver.cpp @@ -1168,6 +1168,8 @@ void MolfileSaver::_writeTGroup(Output& output, BaseMolecule& mol, int tg_idx) QS_DEF(Array, buf); ArrayOutput out(buf); TGroup& tgroup = mol.tgroups.getTGroup(tg_idx); + if (tgroup.ambiguous) + throw Error("Ambiguous monomer cannot be saved to molfile."); std::string natreplace; if (tgroup.tgroup_natreplace.size() > 0) natreplace = tgroup.tgroup_natreplace.ptr();