forked from MikeMeliz/TorCrawl.py
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Implements passing test case for crawler.canonical. - Implements passing test case for crawler.excludes. - TODO: Implement tests for crawler.crawl against live application. - TODO: Implement tests for extractor against live application. See Issue: MikeMeliz#16
- Loading branch information
1 parent
139031b
commit 1843c64
Showing
1 changed file
with
57 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,57 @@ | ||
# TODO: Implement extractor tests on non-tor sites. | ||
import shutil | ||
import unittest | ||
|
||
from ..crawler import Crawler | ||
from ..checker import url_canon | ||
from ..checker import extract_domain | ||
from ..checker import folder | ||
|
||
|
||
class TestCrawlerFunctions(unittest.TestCase): | ||
def setUp(self): | ||
_website = url_canon('torcrawl.com', False) | ||
self.out_path = out_path = folder(extract_domain(_website), False) | ||
self.crawler = Crawler(_website, 0, 1, out_path, False, False) | ||
|
||
def tearDown(self): | ||
""" Test Suite Teardown. """ | ||
# Remove test folder. | ||
shutil.rmtree(self.out_path) | ||
|
||
def test_excludes(self): | ||
""" Test crawler.excludes function. | ||
Return True if the function successfully excludes the the provided | ||
failing links. | ||
""" | ||
_uri = 'http://www.torcrawl.com' | ||
failing_links = ['#', 'tel:012-013-104-5', | ||
'mailto:[email protected]', f'{_uri}/res/test.pdf', | ||
f'{_uri}/res/test.doc', f'{_uri}/res/test.jpg', | ||
f'{_uri}/res/test.png', f'{_uri}/res/test.jpeg', | ||
f'{_uri}/res/test.gif'] | ||
for link in failing_links: | ||
self.assertTrue(self.crawler.excludes(link), | ||
f'Test Fail:: Link: {link} - not excluded') | ||
|
||
def test_canonical(self): | ||
""" Test crawler.canonical function. | ||
Return True if the function successfully normalizes the provided | ||
failing links. | ||
""" | ||
_uri = 'http://www.torcrawl.com/' | ||
links = [[f'{_uri}sundance', f'{_uri}sundance'], | ||
['/sundance', f'{_uri}sundance'], | ||
[f'{_uri}bob.html', f'{_uri}bob.html'], | ||
[f'bob.html', f'{_uri}bob.html']] | ||
|
||
for link in links: | ||
result = self.crawler.canonical(link[0]) | ||
self.assertEqual(link[1], result, | ||
f'Test Fail:: Canon returned = {result}, ' | ||
f'expected {link[1]}') | ||
|
||
def test_crawl(self): | ||
""" Test Crawlwer.crawl functionality""" | ||
# TODO: Test Crawler.crawl against live web application. | ||
# Re-instantiate crawler with live application. | ||
pass |