Skip to content

Commit

Permalink
make clear file.close() will make data appear in the written csv; add…
Browse files Browse the repository at this point in the history
… print()s; cleanup earlier runs
  • Loading branch information
thejqs committed Mar 5, 2020
1 parent d260647 commit 692de55
Showing 1 changed file with 25 additions and 22 deletions.
47 changes: 25 additions & 22 deletions completed/scraper_notebook_complete.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -34,7 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -50,11 +50,12 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"html = r.text"
"html = r.text\n",
"print(html)"
]
},
{
Expand All @@ -78,11 +79,12 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"soup = BeautifulSoup(html, \"html.parser\")"
"soup = BeautifulSoup(html, \"html.parser\")\n",
"print(soup)"
]
},
{
Expand All @@ -94,11 +96,12 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"table = soup.find('table',{'class':'entChartTable'})"
"table = soup.find('table',{'class':'entChartTable'})\n",
"print(table)"
]
},
{
Expand All @@ -110,13 +113,15 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rows = table.find_all('tr')\n",
"\n",
"rows = rows[2:]"
"# print(rows)\n",
"#skip the blank rows\n",
"rows = rows[2:]\n",
"# print(rows)"
]
},
{
Expand All @@ -128,7 +133,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -144,7 +149,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -171,26 +176,24 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"output = csv.DictWriter(csvfile, fieldnames = fieldnames, delimiter=',',quotechar='\"',quoting=csv.QUOTE_MINIMAL)\n",
"output.writeheader()"

"output = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter=',',quotechar='\"',quoting=csv.QUOTE_MINIMAL)\n",
"output.writeheader()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#loop through the rows\n",
"for row in rows:\n",
" #grab the table cells from each row\n",
" cells = row.find_all('td')\n",
" #skip the blank rows\n",
" #create a dictionary and assign the cell values to keys in our dictionary\n",
" result = {\n",
" \"title\" : cells[0].text.strip(),\n",
Expand All @@ -214,12 +217,12 @@
"collapsed": true
},
"source": [
"close the csv file"
"close the csv file to officially finish writing to it"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -246,7 +249,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
"version": "3.6.8"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 692de55

Please sign in to comment.