diff --git a/alphafold_mining/Q9GYP2.png b/alphafold_mining/Q9GYP2.png new file mode 100644 index 0000000..f2281ec Binary files /dev/null and b/alphafold_mining/Q9GYP2.png differ diff --git a/alphafold_mining/af_mining.md b/alphafold_mining/af_mining.md new file mode 100644 index 0000000..f8d8f6e --- /dev/null +++ b/alphafold_mining/af_mining.md @@ -0,0 +1,196 @@ +# Search AlphaFold database for transmembrane cysteines + +This is an example of searching the C elegans proteome for transmembrane proteins that have at least two nearby cysteines. This uses AlphaFold database predicted structures and UniProt annotations to identify transmembrane residues. It searches 19827 UniProt entries for C elegans, finds 5756 with annotated transmembrane regions, and then finds 783 proteins with two cysteines in the transmembrane region closer than 5 Angstroms to each other (SG to SG atom distance). The search takes 36 seconds on a Mac laptop, so it is feasible to try many variations of this kind of structural feature search in a short time. + +## UniProt Data + +In order to make the search fast we download in advance the [UniProt annotations for all C elegans proteins](https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000001940/UP000001940_6239.xml.gz) as a single XML file (193 Mbytes). The [C elegans proteome](https://www.uniprot.org/proteomes/UP000001940) is described here + + https://www.uniprot.org/proteomes/UP000001940 + +and the XML file is here + + https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Eukaryota/UP000001940/UP000001940_6239.xml.gz + +## AlphaFold database structures + +We also download all of the AlphaFold database structures for C elegans in advance (2.6 Gbytes) described [here](https://alphafold.com/download) + + https://alphafold.com/download + +and the file is [here](https://ftp.ebi.ac.uk/pub/databases/alphafold/latest/UP000001940_6239_CAEEL_v4.tar) + + https://ftp.ebi.ac.uk/pub/databases/alphafold/latest/UP000001940_6239_CAEEL_v4.tar + +We put the structures in a directory called alphafold_models and uncompress them + + mkdir alphafold_models + cd alphafold_models + tar xf ~/Downloads/UP000001940_6239_CAEEL_v4.tar --include '*.cif.gz' + gunzip *.gz + +## Running the search in ChimeraX + +Opening the Python script [cyssearch.py](cyssearch.py) in ChimeraX 1.7 then does the search and prints the results to the log. The script uses standard Python capabilities to read the UniProt annotation XML file, and uses ChimeraX capabilities to read the AlphaFold structures to measure distances between cysteines. + +Use ChimeraX commands to change to the directory with the UniProt XML file, Python script and alphafold_models directory, then open the script in ChimeraX. + + cd ~/af_search + open cyssearch.py + +The following [output](results.txt) is logged listing the UniProt ids with pairs of cysteines and also a short list of structures for which there was no AlphaFold model (too new 2022 or too large >1400 amino acids for AlphaFold database). + + 19827 UniProt entries + 5756 entries with annotated transmembrane regions + 4518 entries with 1 or more transmembrane cysteines + 783 with two cysteines closer than 5A, at least 1 being transmembrane + + O16978 209,260 + O17230 137,210 + O17956 93,97 76,84 + O17960 47,91 + O18083 17,306 99,153 + Q18595 123,468 + Q19061 85,93 + Q65XS8 243,302 + Q9NA41 149,204 + A0A067XG43 63,259 + A4UVM0 150,216 + Q86D06 138,180 + Q8WQB6 37,309 + E5QCI3 177,217 + O16880 634,638 + P34315 17,388 + Q19975 47,55 + Q93655 280,531 + Q966F7 18,189 + Q9GYL0 145,165 + O45767 162,181 33,72 + Q9U3P2 1103,1107 + G5ED82 141,145 254,323 + O16494 97,153 + O45971 54,107 + Q7YWW4 37,41 + Q9GYP2 75,77 72,75 71,83 77,78 71,73 72,78 71,86 73,83 72,77 + ... + + No alphafold model for 44 entries with transmembrane regions: + A0A8S4SQM5 502 + Q9UB28 4450 + A0A8D9MJW7 973 + A0A8D9J776 335 + ... + +## Example structure + +Here is an image of the protein Q9GYP2, an uncharacterized protein with a cluster 11 closely spaced cysteines near transmembrane helices. + + + +## Python script + +Here is the ChimeraX Python script that does the search [cyssearch.py](cyssearch.py) + + # Dengke Ma wants to find all C elegans proteins with pairs of close cysteines in transmembrane regions. + # Can use UniProt to identify transmembrane residues, then use AlphaFold database predicted structures + # to see if there are close cysteines. + + def find_uniprot_transmembrane_cysteines(uniprot_xml_path, namespace = '{http://uniprot.org/uniprot}'): + import xml.etree.ElementTree as ET + tree = ET.parse(uniprot_xml_path) + tm = [] + for child in tree.getroot(): + if child.tag == namespace + 'entry': + rr = transmembrane_residue_ranges(child, namespace) + uniprot_id = child.find(namespace + 'accession').text + seq = child.find(namespace + 'sequence').text + cys_count = ''.join(seq[b-1:e] for b,e in rr).count('C') + tm.append((uniprot_id, cys_count, len(seq), rr)) + return tm + + def transmembrane_residue_ranges(protein_xml_entry, namespace): + ranges = [] + for feature in protein_xml_entry.iter(namespace + 'feature'): + fattrib = feature.attrib + if 'type' in fattrib and fattrib['type'] == 'transmembrane region': + for loc in feature.iter(namespace + 'location'): + b,e = loc.find(namespace + 'begin'), loc.find(namespace + 'end') + if b is not None and e is not None: + if 'position' in b.attrib and 'position' in e.attrib: + r = (int(b.attrib['position']), int(e.attrib['position'])) + ranges.append(r) + return ranges + + def close_cysteines(structure, membrane_residue_ranges, max_distance = 5): + cys_res = [r for r in structure.residues if r.name == 'CYS'] + cys_xyz = [(r.number, r.find_atom('SG').coord) for r in cys_res] + + mb_res_nums = set() + for b,e in membrane_residue_ranges: + for rnum in range(b,e+1): + mb_res_nums.add(rnum) + + mb_cys = [r for r in cys_res if r.number in mb_res_nums] + mb_xyz = [(r.number, r.find_atom('SG').coord) for r in mb_cys] + + close_pairs = set() + from chimerax.geometry import distance + for rnum, xyz in mb_xyz: + for rnum2, xyz2 in cys_xyz: + if rnum2 != rnum and distance(xyz, xyz2) <= max_distance: + pair = (rnum, rnum2) if rnum < rnum2 else (rnum2, rnum) + close_pairs.add(pair) + + return list(close_pairs) + + def check_for_close_cysteines(session, ulist, alphafold_dir, max_distance): + found = [] + missing = [] + for uniprot_id, ncys, seq_len, tm_res_ranges in ulist: + if ncys == 0: + continue + m = alphafold_database_model(session, uniprot_id, alphafold_dir) + if m is None: + missing.append((uniprot_id, seq_len)) + continue + close_pairs = close_cysteines(m, tm_res_ranges, max_distance) + if close_pairs: + found.append((uniprot_id, close_pairs)) + m.delete() + return found, missing + + def alphafold_database_model(session, uniprot_id, alphafold_dir): + filename = f'AF-{uniprot_id}-F1-model_v4.cif' + from os.path import join, exists + path = join(alphafold_dir, filename) + if not exists(path): + return None + from chimerax.mmcif import open_mmcif + s, msg = open_mmcif(session, path) + return s[0] + + uniprot_xml_path = 'UP000001940_6239.xml' + alphafold_dir = 'alphafold_models' + max_distance = 5 + + ulist = find_uniprot_transmembrane_cysteines(uniprot_xml_path) + uclose, missing = check_for_close_cysteines(session, ulist, alphafold_dir, max_distance) + ntm = len([uniprot_id for uniprot_id, ncys, seq_len, tm_res_ranges in ulist if tm_res_ranges]) + ntmc = len([uniprot_id for uniprot_id, ncys, seq_len, tm_res_ranges in ulist if ncys > 0]) + print(f'{len(ulist)} UniProt entries') + print(f'{ntm} entries with annotated transmembrane regions') + print(f'{ntmc} entries with 1 or more transmembrane cysteines') + print(f'{len(uclose)} with two cysteines closer than {max_distance}A, at least 1 being transmembrane') + + entries = [] + for uniprot_id, res_pairs in uclose: + rpairs = ' '.join(f'{r1},{r2}' for r1,r2 in res_pairs) + entries.append(f'{uniprot_id} {rpairs}') + print() + print('\n'.join(entries)) + print() + + me = '\n'.join(f'{uniprot_id} {seq_length}' for uniprot_id, seq_length in missing) + print(f'No alphafold model for {len(missing)} entries with transmembrane regions:\n{me}') + +Tom Goddard, January 19, 2024 diff --git a/alphafold_mining/cyssearch.py b/alphafold_mining/cyssearch.py new file mode 100644 index 0000000..b695015 --- /dev/null +++ b/alphafold_mining/cyssearch.py @@ -0,0 +1,102 @@ +# Dengke Ma wants to find all C elegans proteins with pairs of close cysteines in transmembrane regions. +# Can use UniProt to identify transmembrane residues, then use AlphaFold database predicted structures +# to see if there are close cysteines. + +def find_uniprot_transmembrane_cysteines(uniprot_xml_path, namespace = '{http://uniprot.org/uniprot}'): + import xml.etree.ElementTree as ET + tree = ET.parse(uniprot_xml_path) + tm = [] + for child in tree.getroot(): + if child.tag == namespace + 'entry': + rr = transmembrane_residue_ranges(child, namespace) + uniprot_id = child.find(namespace + 'accession').text + seq = child.find(namespace + 'sequence').text + cys_count = ''.join(seq[b-1:e] for b,e in rr).count('C') + tm.append((uniprot_id, cys_count, len(seq), rr)) + return tm + +def transmembrane_residue_ranges(protein_xml_entry, namespace): + ranges = [] + for feature in protein_xml_entry.iter(namespace + 'feature'): + fattrib = feature.attrib + if 'type' in fattrib and fattrib['type'] == 'transmembrane region': + for loc in feature.iter(namespace + 'location'): + b,e = loc.find(namespace + 'begin'), loc.find(namespace + 'end') + if b is not None and e is not None: + if 'position' in b.attrib and 'position' in e.attrib: + r = (int(b.attrib['position']), int(e.attrib['position'])) + ranges.append(r) + return ranges + +def close_cysteines(structure, membrane_residue_ranges, max_distance = 5): + cys_res = [r for r in structure.residues if r.name == 'CYS'] + cys_xyz = [(r.number, r.find_atom('SG').coord) for r in cys_res] + + mb_res_nums = set() + for b,e in membrane_residue_ranges: + for rnum in range(b,e+1): + mb_res_nums.add(rnum) + + mb_cys = [r for r in cys_res if r.number in mb_res_nums] + mb_xyz = [(r.number, r.find_atom('SG').coord) for r in mb_cys] + + close_pairs = set() + from chimerax.geometry import distance + for rnum, xyz in mb_xyz: + for rnum2, xyz2 in cys_xyz: + if rnum2 != rnum and distance(xyz, xyz2) <= max_distance: + pair = (rnum, rnum2) if rnum < rnum2 else (rnum2, rnum) + close_pairs.add(pair) + + return list(close_pairs) + +def check_for_close_cysteines(session, ulist, alphafold_dir, max_distance): + found = [] + missing = [] + for uniprot_id, ncys, seq_len, tm_res_ranges in ulist: + if ncys == 0: + continue + m = alphafold_database_model(session, uniprot_id, alphafold_dir) + if m is None: + missing.append((uniprot_id, seq_len)) + continue + close_pairs = close_cysteines(m, tm_res_ranges, max_distance) + if close_pairs: + found.append((uniprot_id, close_pairs)) + m.delete() + return found, missing + +def alphafold_database_model(session, uniprot_id, alphafold_dir): + filename = f'AF-{uniprot_id}-F1-model_v4.cif' + from os.path import join, exists + path = join(alphafold_dir, filename) + if not exists(path): + return None + from chimerax.mmcif import open_mmcif + s, msg = open_mmcif(session, path) + return s[0] + +uniprot_xml_path = 'UP000001940_6239.xml' +alphafold_dir = 'alphafold_models' +max_distance = 5 + +ulist = find_uniprot_transmembrane_cysteines(uniprot_xml_path) +uclose, missing = check_for_close_cysteines(session, ulist, alphafold_dir, max_distance) +ntm = len([uniprot_id for uniprot_id, ncys, seq_len, tm_res_ranges in ulist if tm_res_ranges]) +ntmc = len([uniprot_id for uniprot_id, ncys, seq_len, tm_res_ranges in ulist if ncys > 0]) +print(f'{len(ulist)} UniProt entries') +print(f'{ntm} entries with annotated transmembrane regions') +print(f'{ntmc} entries with 1 or more transmembrane cysteines') +print(f'{len(uclose)} with two cysteines closer than {max_distance}A, at least 1 being transmembrane') + +entries = [] +for uniprot_id, res_pairs in uclose: + rpairs = ' '.join(f'{r1},{r2}' for r1,r2 in res_pairs) + entries.append(f'{uniprot_id} {rpairs}') +print() +print('\n'.join(entries)) +print() + +me = '\n'.join(f'{uniprot_id} {seq_length}' for uniprot_id, seq_length in missing) +print(f'No alphafold model for {len(missing)} entries with transmembrane regions:\n{me}') + diff --git a/alphafold_mining/results.txt b/alphafold_mining/results.txt new file mode 100644 index 0000000..5be2fae --- /dev/null +++ b/alphafold_mining/results.txt @@ -0,0 +1,836 @@ +open cyssearch.py, ran in 36 seconds + +19827 UniProt entries +5756 entries with annotated transmembrane regions +4518 entries with 1 or more transmembrane cysteines +783 with two cysteines closer than 5A, at least 1 being transmembrane + +O16978 209,260 +O17230 137,210 +O17956 93,97 76,84 +O17960 47,91 +O18083 17,306 99,153 +Q18595 123,468 +Q19061 85,93 +Q65XS8 243,302 +Q9NA41 149,204 +A0A067XG43 63,259 +A4UVM0 150,216 +Q86D06 138,180 +Q8WQB6 37,309 +E5QCI3 177,217 +O16880 634,638 +P34315 17,388 +Q19975 47,55 +Q93655 280,531 +Q966F7 18,189 +Q9GYL0 145,165 +O45767 162,181 33,72 +Q9U3P2 1103,1107 +G5ED82 141,145 254,323 +O16494 97,153 +O45971 54,107 +Q7YWW4 37,41 +Q9GYP2 75,77 72,75 71,83 77,78 71,73 72,78 71,86 73,83 72,77 +G5EG36 17,56 +O17809 96,148 +O45759 48,112 +P90858 538,590 +Q86B42 102,189 +Q94177 294,431 +Q9XUG7 272,304 +O18689 257,298 +Q17638 14,55 +B2D6M3 46,50 +O61958 50,57 +Q11073 300,360 +Q18177 20,86 86,87 +Q18545 427,428 +Q9TYL7 19,99 +Q18582 87,92 +Q20735 166,202 +Q22937 187,191 +E3CTH4 69,73 +O16697 77,81 +Q22686 403,438 163,193 454,458 403,437 +Q8I108 22,37 22,38 +Q95QL0 167,178 149,293 +G5EFS0 269,320 171,175 +O17844 258,297 +Q23135 20,24 +Q9U539 426,430 48,230 +O16909 31,287 +P91574 159,177 +Q9NEI8 28,169 +Q9U2J1 49,307 +U4PMZ6 50,55 32,50 15,65 +I2HA80 19,64 24,60 19,23 +O02071 119,150 +Q17519 187,229 +Q21788 140,226 +Q22938 291,333 +O16269 15,256 +P90824 149,167 +Q09344 52,93 139,178 +Q19870 685,690 +Q22284 267,290 +Q93340 143,147 +B6VQ87 286,290 +O62311 124,128 +Q22594 17,53 +Q22976 217,386 237,359 +Q23633 22,79 +Q5F4U1 130,208 +Q9N3Y9 420,499 +A0A4V0IJX9 35,154 35,155 +O45990 70,170 +O62362 58,91 +Q19125 149,235 +Q93198 79,83 +Q9N4Z1 96,149 +A0A0K3AYF5 93,462 +A3QMA7 58,308 +O17091 166,182 +O44643 143,162 +Q17975 158,176 +Q18936 61,292 +O45309 144,163 +O45372 50,111 +O16329 142,213 +O16755 211,215 +Q21960 32,176 +Q4PIV7 109,151 +Q9XU36 25,119 +E1B6S1 276,280 +F0IWT0 20,263 +Q23393 55,85 +Q9XWA2 123,230 +G3MTY1 140,232 +Q19607 24,60 +Q20929 354,358 +Q22977 198,199 394,504 +C8JQS5 50,76 +H2KML1 884,911 884,906 725,916 877,911 +O45809 30,34 +Q9N5W5 287,291 +D6RYD5 81,108 +Q17890 44,107 +Q20411 68,289 +Q9BKN9 21,69 +A1EHR5 30,167 187,191 +O16492 97,153 +J7RNM5 41,45 +Q18712 153,173 +A1IMB8 618,691 +H2KYP2 26,970 +H2L039 294,312 +H2L269 12,111 +Q09366 11,268 +Q6BET0 287,346 294,353 +G4S3P8 354,504 +Q9N4W0 71,118 +Q22271 407,472 +O44691 151,189 +Q17903 11,15 +Q19293 168,241 +Q9U362 24,91 +F3Y5R4 78,205 +O17818 52,74 52,70 +O62268 168,184 +O62270 413,501 420,501 413,420 +O76735 60,69 +Q19889 96,150 +Q6F3C9 128,213 +Q95YD7 107,184 +Q9U2H5 85,277 +O17925 99,152 +O45813 596,645 +O62224 117,187 +Q17701 271,582 +Q21714 83,91 +Q22209 51,69 +Q9U2S3 539,561 532,566 400,571 539,566 +Q17478 59,335 +Q19498 132,135 +Q20116 112,116 +O01608 117,208 +P91504 96,150 +Q09213 114,156 +Q19508 45,49 +G5EDU2 338,360 +O17240 62,110 +O17786 34,302 +O61857 63,140 66,134 +Q09502 128,250 +Q22542 50,54 +H2KYR4 105,202 108,202 +O16272 180,184 +Q21008 27,252 +A0A2C9C3K2 260,379 +G5EGP4 630,679 +O44491 59,137 27,266 +Q9N532 260,478 260,475 +A8WFN1 4,12 +Q1ZXT2 76,158 +Q20152 166,185 +Q23087 74,258 +Q9BHL2 34,263 +Q9U3J7 154,159 +O16505 94,150 +P90747 1049,1155 +Q20046 439,464 +Q9XTV7 130,134 +Q21434 320,352 +Q23305 101,182 +Q19992 101,109 36,76 +Q4PIV2 100,118 +Q4W5S0 164,168 +Q9U2J4 52,310 +O17899 51,133 102,148 +O61200 48,66 +Q17760 20,56 +Q21304 111,145 +Q7YTM8 98,102 +Q7YXG6 128,169 103,194 233,285 +Q9U2T7 90,143 +G5EC12 7,285 +O17717 110,163 +O44442 117,187 +P46570 243,283 +A0A0K3AU93 29,33 +A0A0M9JJ71 62,201 +O62033 821,827 +Q17453 200,206 +Q21715 79,87 +Q9U2X4 148,168 +O45474 103,179 131,218 +O45891 38,141 +P91211 76,84 +Q18700 174,179 +D3KFU9 58,62 +Q19473 68,81 81,93 85,93 +Q5CZ37 127,258 +Q9GYM6 113,202 +A0A0K3AXT4 66,299 90,133 +G5ECX0 905,1123 960,1032 +O17030 84,167 +O61897 144,163 +P91297 207,235 +Q22109 81,86 +Q27GU6 129,133 +Q5FC35 321,331 +C7FZU0 322,326 +J7SA46 59,104 +Q18323 211,215 +Q22118 64,76 +Q9U1Z2 20,57 20,238 +Q9XW59 146,226 +O76623 784,790 +Q27GU4 280,314 +Q5DTE7 50,308 +Q95QA2 8,296 +O62051 172,177 +P46572 6,85 +Q7YX40 115,118 +P24888 66,78 65,78 66,76 +P91840 49,345 +Q966A1 116,120 +Q9XUW4 42,80 +G5ED05 143,216 +Q93690 80,141 +Q9UAX3 115,169 +A0A0K3ASB3 114,151 +G5EC09 302,360 +H2KZM1 28,310 +O62192 79,271 +Q21685 152,153 +A0A2K5ATT4 79,86 +G4SNR2 52,125 +H9G2R4 226,264 119,271 +O44196 201,263 +O44533 101,145 276,283 +O44674 61,62 +A0A2I2LDW9 23,255 +P90945 88,92 +Q23481 7,29 +Q6BER8 147,167 +Q9UAX5 17,53 +O16416 127,171 +B1Q258 30,80 +H2KZD1 111,334 +Q19084 395,399 +A0A3B1E8T2 29,67 +Q22410 271,441 +E3CTG4 13,50 +G5EEK9 621,659 +O17358 104,211 +O17663 50,115 +Q17526 114,118 113,278 +U4PMM5 350,353 +Q17970 45,46 +Q7YTN2 187,191 +Q9XVW9 34,37 +O61865 338,443 342,443 +O62254 91,116 +Q8IFX9 7,26 104,195 +G5EGF9 33,287 +O62511 82,86 +Q22225 71,77 +Q22570 264,322 +U4PFA8 132,136 86,109 +O17876 164,182 +Q19468 366,433 +Q20258 108,152 +O18019 102,179 +Q09965 68,94 +Q19563 65,113 47,319 +Q9N5H7 228,271 +Q9TZD5 10,73 +Q9XV42 97,174 +P91489 58,136 +Q10042 154,237 +Q22569 184,188 +Q5F4V5 148,168 +O01323 3,6 +O16471 39,270 64,107 +O76687 177,205 +G4RWX0 87,91 +G5EF94 110,129 +Q21142 126,190 +Q9N519 81,89 77,89 +Q9U3D1 40,44 +B3WFW1 7,70 +O17961 47,91 50,91 206,210 +Q5CCI4 245,272 +O62308 50,115 +Q18398 198,247 +Q19757 49,102 +Q5GMI1 29,55 +A0A0K3ART6 79,132 +A0A3P6PAZ5 28,30 +O45194 28,48 +Q09638 258,293 +Q18593 211,215 +O17089 165,181 +O17831 114,148 144,148 +Q19710 20,89 58,89 20,58 +Q20265 363,381 +O18277 137,179 280,284 +O61959 41,61 +Q19932 310,343 +Q8IFY0 128,169 103,195 +G5EED0 477,481 +O45662 144,163 +S6FWN4 232,688 +O16698 65,69 +Q18818 96,123 123,127 96,127 +Q20961 110,114 +Q9N539 128,136 8,71 71,100 +D1YSH1 11,164 +D7UU25 130,217 +O17819 9,88 +Q03613 103,192 +Q19559 108,112 +Q86GC5 191,222 +Q8IFY1 127,168 +O45165 25,73 +O45238 213,333 +Q17710 23,62 65,91 +Q18375 68,114 +Q20325 113,193 +Q7YXA6 216,247 +Q8I4E4 403,444 182,475 +Q9TXK7 76,124 +Q9U2T2 23,53 +Q22949 411,507 +Q7Z003 153,799 +A0A1X7RBT7 188,192 +O17130 28,294 +P90819 147,165 +Q19026 143,162 +Q58AA2 147,162 +Q7YX65 81,97 +H2FLH4 61,135 +O01545 84,257 +O17354 96,150 +P91160 128,169 +Q20686 55,309 +Q20830 116,120 +Q2MGE8 79,160 47,128 +Q93380 419,529 +Q9N5V7 173,241 +Q9TYV2 160,193 +Q22211 35,47 95,189 92,189 +Q9XUG5 347,513 +O45145 19,201 +Q10935 193,246 +Q19897 245,272 +Q21717 79,87 +Q7YX09 177,221 253,285 253,281 +Q4W521 29,56 +O17125 94,148 +Q20404 277,305 750,857 +Q9NAQ9 87,159 +O17470 129,207 +Q9XWD7 312,360 +Q86D22 228,273 +Q23186 62,67 +Q8ITZ9 160,165 +Q965G4 144,163 +Q9GUC0 97,150 +A0A4V0IMR4 131,142 +P90745 124,214 +P92015 93,97 76,84 +Q19676 171,460 +O17137 144,163 +Q19625 264,367 986,1059 +Q8I4C6 22,87 +Q966G9 149,150 +G5ECB2 512,604 +Q4EVX9 228,283 189,193 +Q52GX7 51,55 +Q93561 108,112 +Q9N482 99,152 +Q9U3D4 291,328 +G5EF54 99,152 +O16330 142,215 149,208 +O16503 106,197 +O17096 148,169 +O17181 180,208 +O18074 44,67 +P91380 128,169 +Q9U2G5 47,119 +G5EC34 394,398 +I7LFE4 309,342 +Q18517 96,131 +Q9NAN9 253,277 +Q9XW19 166,467 +A0A131MBV2 289,323 +G5EGL6 164,180 +O01466 23,59 +Q400M3 129,253 +Q9N4V9 264,321 +B6VQ56 823,827 +Q17959 81,152 84,152 +Q19055 354,395 +Q22669 48,52 +O45660 34,305 +Q19127 367,681 +Q9XTY8 278,287 +G5EE14 742,744 +G5EF08 44,875 +O16244 32,44 +P91384 127,168 127,171 +G5EE77 283,317 +Q20416 98,149 53,68 94,149 +Q6BET3 274,280 +Q9GS10 107,217 +Q8MXJ2 71,75 +O44130 351,412 94,226 +O45773 103,147 +Q21567 396,400 +Q5WRR5 71,88 +X5LV99 390,502 +A3KFD1 78,113 +O76689 346,347 +Q22568 69,72 +Q22576 257,296 +Q9NAG1 223,227 +P34551 19,288 +P91379 128,169 +Q8T5S1 293,311 +O16498 17,253 +P91451 767,773 +Q10917 46,50 +G5EBF9 47,91 206,210 +Q9GZD2 30,69 33,69 +Q20838 45,49 +Q22478 130,152 +O18690 258,297 128,207 +O45520 380,691 231,691 +P91378 128,169 +P91487 87,162 +Q22409 284,454 +Q9GS09 137,210 +Q9GYH8 47,179 +O16576 14,141 +O17660 62,87 +P91348 53,57 +A0A0K3ASC2 377,399 +O01456 99,153 +O16271 186,190 +Q17428 205,253 +A0A486WXM4 121,164 +A0T4F7 69,114 +Q09972 97,235 +Q20656 114,171 114,174 +P91209 76,84 +Q17734 14,61 +Q93533 56,115 +Q9U8C8 85,178 +Q09617 301,428 +Q22806 121,600 117,121 117,600 +A7LPI5 22,75 25,30 22,45 22,51 +G5EC02 598,650 +G5EF74 82,90 78,90 +O17017 144,163 +Q9GPA2 96,194 +Q9N4T9 41,68 +Q9UAY8 97,376 +O16415 48,324 127,171 +O17872 99,152 60,101 +O45838 93,97 +Q19612 33,68 202,207 +Q5WRM9 185,189 +Q9XVW1 290,296 +Q9XWS9 67,106 +G8JZM6 164,405 +O16246 113,210 +P34298 75,261 +P90821 145,164 +Q95ZY4 58,148 +V6CKM5 485,510 +G5EDW2 614,685 702,706 +O45337 86,127 +Q5FC78 34,90 319,323 +Q93840 34,479 +Q95XT6 50,54 +Q4R156 140,151 +Q9XWH7 51,69 +O16336 14,290 10,290 10,14 +O17997 29,298 +O45426 69,72 +P90816 160,178 +O17822 45,49 +O45972 60,101 +Q9N2Z6 428,755 +A0A2C9C337 93,97 +Q9XVZ3 220,251 +G5EDI0 88,307 +O17346 63,67 +P92166 162,166 +Q7YXB0 30,298 +H2KZH1 864,868 328,432 +O01966 824,848 +O62367 258,297 +O17846 258,297 +Q4R106 117,123 +O17842 256,295 +O45915 96,459 +Q19931 1541,1618 +U4PRX8 493,512 +A5Z2T9 236,240 +O17168 148,152 +O18115 202,206 +Q564X0 244,407 +O02241 93,315 +O16341 110,196 84,133 +O76837 30,34 +Q18732 96,149 +Q9N2S9 96,181 +O17926 229,274 +Q9N321 131,423 +Q20963 254,258 +C1P646 46,50 +P92002 17,53 +Q94303 6,175 +Q94305 67,207 +Q9XVM9 78,93 +G5EFQ7 81,89 77,89 +Q22383 101,176 +Q9N500 177,218 +Q9NA73 40,66 62,66 +A0A061ACU2 1246,1273 1246,1278 +A0A2C9C396 99,122 180,259 +G5EGT4 534,540 +O17360 51,119 +O44879 245,250 +Q20444 72,163 259,274 +Q53U87 1230,1234 +Q7KPX1 155,174 +P46567 21,100 +P91994 199,248 +O01635 778,808 +O45808 44,48 +Q21767 97,105 29,79 +Q3LFN0 85,195 +Q86MI7 77,89 +Q9XXR3 280,502 +O18053 82,130 +Q11122 405,477 +Q9GZF9 42,77 41,77 14,246 210,240 +Q9XVG5 28,48 +G5EGQ9 977,993 +Q9U399 210,247 +A0A1T5HUX0 64,90 +D2KT80 64,70 +G5EE13 918,922 324,353 +P91227 765,771 +P91245 189,207 186,207 185,210 189,206 185,189 +Q10936 125,203 +O44453 100,179 +Q9NLD2 54,156 +O44616 50,242 +Q965U1 149,150 +G5EF52 50,115 +Q21402 57,88 +Q6RYS9 551,560 +Q8IFZ2 32,44 89,186 92,186 +Q9N2T0 96,181 +Q17678 176,408 +Q18795 365,369 +G5EC25 181,197 +G5EE39 152,481 +Q18775 116,196 +Q93869 230,280 +A0A0K3AST9 29,271 +H2L0J1 279,298 +O17629 94,100 +Q7YX89 26,78 +Q9XTV1 160,238 +O16474 105,184 +O45832 47,91 +Q5CZ45 216,272 +H2KZ99 260,480 +O44811 81,89 +Q9N5G6 144,163 +Q9XWH6 51,73 +H2KZR1 57,66 82,110 +Q9UAT3 396,447 +A0A131MBP5 7,204 +Q9TXJ9 125,260 +O62081 101,139 +Q7JL16 146,221 80,191 +Q86DC3 71,118 +Q8WTK1 77,239 97,249 89,244 +Q9XX84 48,112 +O16477 282,286 +O18061 48,105 +O61969 31,287 +O16539 70,170 +O61912 128,169 103,194 +O62029 99,184 +P34358 33,319 +P54145 75,146 +P90825 102,133 +Q9XVK3 148,149 +A0A3B1E616 620,814 +A0A486WTG7 169,181 +Q09303 35,61 +Q17498 244,301 +Q20026 610,614 607,611 +Q21784 41,299 +O17010 42,60 62,110 +O17820 14,93 +P34389 346,640 275,838 347,640 +Q9UAQ0 28,275 +Q8I7H8 105,109 +Q94413 129,144 +Q9UAZ9 138,429 +O01927 370,395 +Q17757 59,112 +Q1RS86 818,851 +Q22506 171,200 127,170 +O62030 165,222 112,165 108,112 +Q9N3C7 168,187 +P91382 128,169 +U4PLY8 31,35 +Q8WQF6 150,168 +D3KFS2 52,89 +Q17943 95,148 +Q9U335 101,139 +B3WFZ2 304,547 +P34261 28,226 +Q20249 77,112 +O17800 81,89 77,89 +O44443 216,223 +O45710 60,101 +O62039 54,75 +Q03611 126,293 +Q20908 259,265 +Q95XW0 90,125 89,125 +G5EFG6 177,181 +O01468 86,94 +O62240 114,148 144,148 +O17951 281,461 +O44639 142,161 +P46568 22,99 +Q19027 143,162 +Q23254 108,181 +Q7YX55 40,44 +Q7YZV6 143,147 +Q9N309 495,499 +Q9N556 14,32 +Q9XUK8 233,265 +P90817 114,145 +Q18179 316,319 312,316 +Q19669 13,166 +Q9GYF4 155,156 +O62368 258,297 +P91096 100,176 96,176 96,100 +Q21286 315,317 +A0A0K3AWD3 143,159 +O17138 144,163 +P34272 41,219 +Q18428 13,92 259,262 +Q23156 111,115 +Q9N549 46,111 +G5ECD9 241,245 103,228 +O01451 34,301 +O16906 209,260 260,261 +O17080 105,182 +O18076 23,36 +Q9XTZ4 11,13 +B1Q278 45,67 +O17016 28,272 +Q8MXJ0 7,162 +O45326 104,201 +Q20984 7,163 +O44166 15,249 +O61220 654,731 +Q11186 62,280 55,111 +Q21433 307,339 +Q22113 84,123 +Q9GZC8 559,670 +A0A3B1DV80 109,120 +G5EBQ8 1092,1183 +G5ECZ3 1218,1248 +O44477 9,76 +P90954 23,62 +Q23387 31,92 +Q93896 441,492 101,106 +Q966G6 99,153 +Q9NEU8 87,91 +Q22358 48,112 +Q4PIX1 370,461 +A0A4V0IKT9 36,137 +O01431 27,47 30,37 +O17355 96,150 +P91118 34,302 +O16236 149,150 +O16690 82,95 +Q95Y88 454,484 +G4SDH4 86,196 +O17572 40,70 +P90890 195,521 +Q9U2L5 67,266 +O44607 105,118 +Q7YXU6 145,164 +Q9N5J0 147,166 +Q9XXQ9 281,510 +O17136 144,163 +P91511 95,151 +Q17824 306,339 +H2KYK1 604,622 +O44981 35,105 36,105 +Q18130 158,229 +Q3V5K7 103,107 +Q9N5I6 132,214 +Q9N5X1 81,89 77,89 +G5EEY1 14,19 87,135 +Q19776 48,330 +Q21132 20,24 +Q23059 121,218 +Q23514 351,388 332,390 +C9IY26 72,213 +O02072 145,176 +O17816 201,268 +O62050 127,214 +Q6A587 252,254 +Q95PY1 84,167 +Q9XWT7 302,306 +O16470 70,303 94,137 +Q9TZE1 243,360 +G4RRB3 52,106 +G5EBG5 260,320 +O02308 3,8 +O61938 20,24 +Q19494 117,187 +Q21652 151,450 +Q9U3L8 284,414 284,288 290,356 +G5EGE5 273,280 +O18046 104,213 90,146 +Q95XG8 308,344 +O17615 80,100 +O17987 95,99 77,160 +Q9U2K1 304,308 +Q10934 118,168 +Q23349 213,252 +Q94056 239,562 +G5EDR9 488,564 +O45831 50,91 +Q21838 100,153 +G5ECQ2 244,508 +P30638 350,419 +P41941 197,201 +Q21997 22,60 +Q7JNP8 144,162 +Q8I4F6 312,487 +Q8I7G0 82,129 +Q9GS01 61,80 +G5EEE2 231,263 +O44673 148,149 +Q94295 144,164 +Q9U3P7 7,11 +O01447 25,73 +O45339 97,150 +O45475 127,214 +U3UB87 102,261 +G5EE33 571,580 +O17847 254,293 +O45305 33,301 +O76658 107,216 +Q95Q97 264,323 +Q9U3K1 85,146 +U4PLX3 91,245 +O16951 209,261 +Q10054 36,277 +Q23530 303,307 498,502 +Q9N4Q8 160,165 150,170 +Q20980 35,54 + +No alphafold model for 44 entries with transmembrane regions: +A0A8S4SQM5 502 +Q9UB28 4450 +A0A8D9MJW7 973 +A0A8D9J776 335 +Q9N4M4 8545 +A0A8D9N0B5 151 +A0A7R9SUN4 263 +P90891 2957 +Q9Y0A1 2892 +A0A7R9XLN9 333 +A0A7R9SVA6 222 +A0A8D8EJW4 266 +A0A8D9MZS8 185 +Q19319 4328 +A0A8S0XG47 459 +A0A7I9HS13 345 +A0A7R9XMR0 250 +A0A8S4QC16 625 +P34616 3343 +H2KMM0 912 +A0A8S4SRB2 365 +Q5F4W0 2779 +Q9XV66 3263 +A0A8S5HT27 446 +A0A8D9MTS1 203 +P34576 3767 +A0A4V6M3X3 2893 +A0A8S4QC42 289 +A0A8S4QBE7 168 +A0A8S5I9P8 856 +A0A8S4QBU4 475 +A0A8S4SRX2 653 +A0A7I9IA72 701 +A0A8D6PJU0 853 +A0A8S4QD70 413 +A0A2C9C3E8 5202 +O17575 3118 +A0A8D6P3F2 311 +A0A8S4Q9Q4 122 +A0A8S4QGH6 839 +Q04833 4753 +A0A8S4QCC9 180 +I6Z0I7 2781 +Q09624 3284 diff --git a/index.md b/index.md index a871a72..20cb5d3 100644 --- a/index.md +++ b/index.md @@ -16,6 +16,7 @@ These are examples of [ChimeraX](https://www.cgl.ucsf.edu/chimerax/) command use ## Python Examples + * [Search AlphaFold database for transmembrane proteins](alphafold_mining/af_mining.md). January 19, 2024 * [Measure spots seen in 3D microscopy](spots/spots.md). November 6, 2023 * [Set atom size proportional to bfactor](atomsize/atomsize.md). July 24, 2023 * [Run Python code when mouse hovers over atom](hover/hover.md). July 6, 2023