-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathnist_Lines.py
140 lines (91 loc) · 2.9 KB
/
nist_Lines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#https://towardsdatascience.com/controlling-the-web-with-python-6fceb22c5f08
'''
This scripts downloads the line lists from the NIST database.
it uses geckodriver to navigate the NIST website. See the comments below for using
geckodriver.
Date: November 2019
Author: Simon Grimm
'''
'''
Download the proper geckodriver depending upon your operating system and System Architecture from the url here - geckodriver
Now follow the below steps -
Extract the geckodriver file from the downloaded zip file. Now depending upon your operating system do the following.
For Linux system :
Open terminal and login as root user. copy/move the extracted geckodriver to bin directory.
In my case I moved the file to /usr/bin directory. Because the driver finds geckodriver binary in '/usr/bin' path and the problem is solved now.
To move the file inside bin directory use command like -
$mv 'geckodriver binary source path' 'destination path'
destination path should be the binary folder path as per user system.
'''
#for pyperclip:
#sudo apt install xsel
import sys
import time
import pyperclip
import subprocess
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import argparse
def Lines(Z, I):
print(Z, I)
driver = webdriver.Firefox()
driver.get('https://physics.nist.gov/PhysRefData/ASD/lines_form.html')
spectra = driver.find_element_by_name("spectra")
spectra.send_keys("Z = %d %d" % (Z, I))
#print("click advanced")
advanced = driver.find_element_by_name("show_advanced")
advanced.click()
fo = driver.find_element_by_id("format")
fo.send_keys(Keys.DOWN, Keys.DOWN)
wn = driver.find_element_by_id("show_wn")
wn.click()
#print("click wn")
wl = driver.find_element_by_name("show_av")
wl.send_keys(Keys.DOWN, Keys.DOWN, Keys.DOWN)
g = driver.find_element_by_id("g_out")
g.click()
submit = driver.find_element_by_name("submit")
submit.click()
url = driver.current_url
print(url)
while(url == 'https://www.doi2bib.org/'):
time.sleep(1.0)
url = driver.current_url
print(url)
lenS = 0
lenSOld = 0
for t in range(10):
lenSOld = lenS
data = driver.find_element_by_css_selector("body")
data.send_keys(Keys.CONTROL, "a")
data.send_keys(Keys.CONTROL, "c")
s = pyperclip.paste()
lenS = len(s)
print("lenght of data", len(s))
if(lenS == lenSOld and lenS > 3000):
break
time.sleep(5)
driver.quit()
s1 = s.replace('?', '')
s2 = s1.replace('=', '')
s3 = s2.replace('[', '')
s4 = s3.replace(']', '')
s5 = s4.replace('(', '')
s6 = s5.replace(')', '')
with open("NIST_Lines%02d%02d.dat" % (Z, I), "w") as f:
f.write(s6)
'''
echo "T"
#replace " with ' '
#sed -i 's/"//g' test.dat
'''
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-Z', '--Z', type=int,
help='Z', default = 1)
parser.add_argument('-I', '--I', type=int,
help='I', default = 0)
args = parser.parse_args()
Z = args.Z
I = args.I
Lines(Z,I)