Implement Unit Tests.

- Implements passing test case for crawler.canonical. - Implements passing test case for crawler.excludes. - TODO: Implement tests for crawler.crawl against live application. - TODO: Implement tests for extractor against live application. See Issue: MikeMeliz#16
the-siegfried · Jul 3, 2022 · 1843c64 · 1843c64
1 parent 139031b
commit 1843c64
Showing 1 changed file with 57 additions and 1 deletion.
diff --git a/modules/tests/test_crawler.py b/modules/tests/test_crawler.py
@@ -1 +1,57 @@
-# TODO: Implement extractor tests on non-tor sites.
+import shutil
+import unittest
+
+from ..crawler import Crawler
+from ..checker import url_canon
+from ..checker import extract_domain
+from ..checker import folder
+
+
+class TestCrawlerFunctions(unittest.TestCase):
+    def setUp(self):
+        _website = url_canon('torcrawl.com', False)
+        self.out_path = out_path = folder(extract_domain(_website), False)
+        self.crawler = Crawler(_website, 0, 1, out_path, False, False)
+
+    def tearDown(self):
+        """ Test Suite Teardown. """
+        # Remove test folder.
+        shutil.rmtree(self.out_path)
+
+    def test_excludes(self):
+        """ Test crawler.excludes function.
+        Return True if the function successfully excludes the the provided
+        failing links.
+        """
+        _uri = 'http://www.torcrawl.com'
+        failing_links = ['#', 'tel:012-013-104-5',
+                         'mailto:[email protected]', f'{_uri}/res/test.pdf',
+                         f'{_uri}/res/test.doc', f'{_uri}/res/test.jpg',
+                         f'{_uri}/res/test.png', f'{_uri}/res/test.jpeg',
+                         f'{_uri}/res/test.gif']
+        for link in failing_links:
+            self.assertTrue(self.crawler.excludes(link),
+                            f'Test Fail:: Link: {link} - not excluded')
+
+    def test_canonical(self):
+        """ Test crawler.canonical function.
+        Return True if the function successfully normalizes the provided
+        failing links.
+        """
+        _uri = 'http://www.torcrawl.com/'
+        links = [[f'{_uri}sundance', f'{_uri}sundance'],
+                 ['/sundance', f'{_uri}sundance'],
+                 [f'{_uri}bob.html', f'{_uri}bob.html'],
+                 [f'bob.html', f'{_uri}bob.html']]
+
+        for link in links:
+            result = self.crawler.canonical(link[0])
+            self.assertEqual(link[1], result,
+                             f'Test Fail:: Canon returned = {result}, '
+                             f'expected {link[1]}')
+
+    def test_crawl(self):
+        """ Test Crawlwer.crawl functionality"""
+        # TODO: Test Crawler.crawl against live web application.
+        # Re-instantiate crawler with live application.
+        pass