-
Notifications
You must be signed in to change notification settings - Fork 0
/
Corona_world_scrap.py
48 lines (40 loc) · 1.31 KB
/
Corona_world_scrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 15 19:07:34 2020
@author: jagveer
"""
import pandas as pd
from selenium import webdriver
from datetime import datetime
#from collections import OrderedDict
#from bs4 import BeautifulSoup as BS
url="https://www.worldometers.info/coronavirus/"
#driver path
browser = webdriver.Firefox(executable_path="/home/jagveer/anaconda3/geckodriver")
browser.get(url)
right_table=browser.find_element_by_class_name('table')
column_name=right_table.find_elements_by_tag_name('th') #Give column name
col_name=[]
for i in range(1,len(column_name)-1):
col_name.append(column_name[i].text)
get_corona = browser.find_element_by_xpath('/html/body/div[3]/div[3]/div/div[4]/div[1]/div/table/tbody[1]')
right_table=get_corona.find_elements_by_tag_name('tr')
data_list=[]
data_list2=[]
for row in right_table:
cells = row.find_elements_by_tag_name('td')
for i in cells:
data_list.append(i.text)
data_list2.append(data_list)
data_list=[]
df=pd.DataFrame(data_list2)
df=df.iloc[:,1:14]
df.columns=col_name
dir_csv='covid19_daily'
filename = datetime.now().strftime('Date-%Y-%m-%d.csv')
df.to_csv(dir_csv+'/'+filename,index= False)
browser.quit()
df=pd.read_csv('covid19_world_data.csv')
new_df = df.dropna(axis=0,how='all')
new_df = new_df.dropna(axis=1,thresh=30)