63 lines
2.2 KiB
Python
63 lines
2.2 KiB
Python
''' Crawl site and validate every page renders somewhat correctly '''
|
|
import unittest
|
|
import re
|
|
import requests
|
|
|
|
class ContentTest(unittest.TestCase):
|
|
''' Run a functional test to validate content being served '''
|
|
|
|
def setUp(self):
|
|
''' Create some starter data to be used in tests '''
|
|
self.domain = "http://127.0.0.1"
|
|
self.search_string = "Version"
|
|
|
|
def tearDown(self):
|
|
''' Destroy starter data '''
|
|
self.domain = None
|
|
self.search_string = "None"
|
|
|
|
def request_recurse(self, url, requested=None):
|
|
''' recursively request each page checking the return code and urls '''
|
|
counts = {
|
|
'pass' : 0,
|
|
'fail' : 0,
|
|
}
|
|
if requested is None:
|
|
requested = []
|
|
if url in requested:
|
|
return counts, requested
|
|
else:
|
|
requested.append(url)
|
|
url = self.domain + url
|
|
results = requests.get(url, allow_redirects=True, verify=False)
|
|
if self.search_string in results.text:
|
|
counts['pass'] = counts['pass'] + 1
|
|
else:
|
|
counts['fail'] = counts['fail'] + 1
|
|
urls = re.findall(
|
|
'href="/(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
|
|
results.text
|
|
)
|
|
for url in urls:
|
|
url = url.lstrip('href="')
|
|
if "/static/" not in url:
|
|
if "//" not in url:
|
|
results, requested = self.request_recurse(url, requested=requested)
|
|
# Add counts for status codes
|
|
for key in results.keys():
|
|
if key in counts:
|
|
counts[key] = counts[key] + results[key]
|
|
else:
|
|
counts[key] = results[key]
|
|
return counts, requested
|
|
|
|
class CrawlSite(ContentTest):
|
|
''' Verify no broken links are present within blog '''
|
|
def runTest(self):
|
|
''' Execute recursive request '''
|
|
results, requested_pages = self.request_recurse("/")
|
|
self.assertFalse(
|
|
results['fail'] > 0,
|
|
"Found {0} pages that did not return keyword".format(results['fail'])
|
|
)
|