-
Notifications
You must be signed in to change notification settings - Fork 0
/
men.py
85 lines (65 loc) · 2.48 KB
/
men.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import requests
import re
# KEGG API base URL
KEGG_API_BASE = "http://rest.kegg.jp/"
def get_species_list(phylum):
organism_url = KEGG_API_BASE + "list/organism"
response = requests.get(organism_url)
lines = response.text.split("\n")
species_list = []
for line in lines:
if line and phylum in line:
parts = line.split("\t")
species_code = parts[1]
species_name = parts[2]
species_list.append((species_code, species_name))
return species_list
def get_metabolic_pathways(species_code):
# species_code = species_code
pathways_url = KEGG_API_BASE + f"list/pathway/{species_code}"
response = requests.get(pathways_url)
lines = response.text.split("\n")
pathways = []
for line in lines:
if line:
parts = line.split("\t")
pathway_id=parts[0]
# pathway_id = parts[0].split(":")[1]
pathways.append(pathway_id)
return pathways
a = get_species_list("Verrucomicrobia") # 修改为要搜索的门名
b = []
for c, d in a:
b.append(get_metabolic_pathways(c))
import numpy as np
import pandas as pd
def pathways_to_matrix(pathway_list):
# Flatten the list and extract unique prefixes
flattened_list = [item for sublist in pathway_list for item in sublist]
prefixes = sorted(set([item[:item.find("0")] for item in flattened_list]))
# Extract unique pathway IDs
pathway_ids = sorted(set([item[item.find("0"):] for item in flattened_list]))
# Create a zero-filled matrix
matrix = np.zeros((len(prefixes), len(pathway_ids)))
# Fill the matrix with 1s for existing pathways
for pathway in flattened_list:
prefix = pathway[:pathway.find("0")]
pathway_id = pathway[pathway.find("0"):]
row_idx = prefixes.index(prefix)
col_idx = pathway_ids.index(pathway_id)
matrix[row_idx][col_idx] = 1
df = pd.DataFrame(matrix, index=prefixes, columns=pathway_ids)
return df
pathway_matrix = pathways_to_matrix(b)
print(pathway_matrix)
pathway_matrix.to_csv('Verrucomicrobia_matrix.csv', index=True, header=True, index_label='row_name', float_format='%.0f')
import seaborn as sns
import matplotlib.pyplot as plt
def plot_heatmap(matrix):
plt.figure(figsize=(20, 10))
sns.heatmap(matrix, cmap='coolwarm', annot=True, fmt=".0f", linewidths=.5)
plt.xlabel("Pathway ID")
plt.ylabel("Species Prefix")
plt.title("Metabolic Pathway Presence Heatmap")
plt.show()
plot_heatmap(pathway_matrix)