-
Notifications
You must be signed in to change notification settings - Fork 0
/
village.py
75 lines (67 loc) · 1.54 KB
/
village.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- coding: utf-8
from __future__ import print_function # Only needed for Python 2
import sys
from textblob import TextBlob
f=file("village1.php","w")
reload(sys)
sys.setdefaultencoding('utf-8')
header="""<?php
$data = array (
"sessionId" => "10001.SES.18.08.2017.17.35.13.2",
"txId" => "0",
"deviceUUId" => "395e0e1d2f76c8f1",
"type" => 85,
"subType" => 0,
"action" => 1,
"OID" => "",
"fromTime" => "",
"toTime" => "",
"index" => 0,
"idList" => array(),
"info" =>array(
"""
print (header)
import string
from bs4 import BeautifulSoup
import urllib2
def php(a):
blob = TextBlob(a)
h="***"
try:
h=blob.translate(to="hi")
except Exception, e:
pass
print ("""array(
"objectId"=> "",
"tehsilName" => "%s",
"districtId" => "",
"tehsilNameHindi" => "%s",
"creationTime" => 0,
"modificationTime" => 0),
"tehsilId" => ""
,"""%(a,h))
print ("\n\n")
url="https://en.wikipedia.org/wiki/Category:Villages_in_Uttar_Pradesh"
page = urllib2.urlopen(url)
soup=BeautifulSoup(page,'html.parser')
i=0
for text in soup.findAll("div",{"class":"mw-category"}):
for t in text.findAll("a"):
te=t.text.split(' ')
te=te[2]
#php(te)
u=t['href']
url1="https://en.wikipedia.org"+u
page1 = urllib2.urlopen(url1)
soup1=BeautifulSoup(page1,'html.parser')
for text1 in soup1.findAll("div",{"class":"mw-category"}):
for t1 in text1.findAll("a"):
t2=t1.text.split(' ')
if(t2[0]!='List'):
if("Villages" not in t1.text):
php(t1.text)
pass
#te= unicode(te, "utf-8")
#print te
#te.replace("—","-")
print ("))")