Skip to content

Commit

Permalink
Implement Unit Tests.
Browse files Browse the repository at this point in the history
- Implements passing test case for crawler.canonical.
- Implements passing test case for crawler.excludes.

- TODO: Implement tests for crawler.crawl against live application.
- TODO: Implement tests for extractor against live application.

See Issue: MikeMeliz#16
  • Loading branch information
the-siegfried committed Jul 3, 2022
1 parent 139031b commit 1843c64
Showing 1 changed file with 57 additions and 1 deletion.
58 changes: 57 additions & 1 deletion modules/tests/test_crawler.py
Original file line number Diff line number Diff line change
@@ -1 +1,57 @@
# TODO: Implement extractor tests on non-tor sites.
import shutil
import unittest

from ..crawler import Crawler
from ..checker import url_canon
from ..checker import extract_domain
from ..checker import folder


class TestCrawlerFunctions(unittest.TestCase):
def setUp(self):
_website = url_canon('torcrawl.com', False)
self.out_path = out_path = folder(extract_domain(_website), False)
self.crawler = Crawler(_website, 0, 1, out_path, False, False)

def tearDown(self):
""" Test Suite Teardown. """
# Remove test folder.
shutil.rmtree(self.out_path)

def test_excludes(self):
""" Test crawler.excludes function.
Return True if the function successfully excludes the the provided
failing links.
"""
_uri = 'http://www.torcrawl.com'
failing_links = ['#', 'tel:012-013-104-5',
'mailto:[email protected]', f'{_uri}/res/test.pdf',
f'{_uri}/res/test.doc', f'{_uri}/res/test.jpg',
f'{_uri}/res/test.png', f'{_uri}/res/test.jpeg',
f'{_uri}/res/test.gif']
for link in failing_links:
self.assertTrue(self.crawler.excludes(link),
f'Test Fail:: Link: {link} - not excluded')

def test_canonical(self):
""" Test crawler.canonical function.
Return True if the function successfully normalizes the provided
failing links.
"""
_uri = 'http://www.torcrawl.com/'
links = [[f'{_uri}sundance', f'{_uri}sundance'],
['/sundance', f'{_uri}sundance'],
[f'{_uri}bob.html', f'{_uri}bob.html'],
[f'bob.html', f'{_uri}bob.html']]

for link in links:
result = self.crawler.canonical(link[0])
self.assertEqual(link[1], result,
f'Test Fail:: Canon returned = {result}, '
f'expected {link[1]}')

def test_crawl(self):
""" Test Crawlwer.crawl functionality"""
# TODO: Test Crawler.crawl against live web application.
# Re-instantiate crawler with live application.
pass

0 comments on commit 1843c64

Please sign in to comment.