-
Notifications
You must be signed in to change notification settings - Fork 0
/
stats.py
26 lines (23 loc) · 818 Bytes
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import matplotlib.pyplot as plt
from statistics import variance, mean
from bs4 import BeautifulSoup
from subprocess import Popen, PIPE
import os
import re
def main():
path = 'vrbo/'
prices = []
for filename in os.listdir(path):
try:
with open(path+filename, "rb") as input_file:
soup = BeautifulSoup(input_file, 'html.parser')
price = int(soup.find("meta",{"property":"og:price:amount"}).get("content"))
prices.append(price)
except:
print("we lost a page, are you currently crawling?")
print("average price is: " + str(mean(prices)))
print("variance is: " + str(variance(prices)))
plt.hist(prices, color = 'blue', edgecolor = 'black', bins = 50)
plt.show()
if __name__ == "__main__":
main()