deadlink-crawler

[unmaintained] crawls a site to detect dead links
Log | Files | Refs | README

commit ec4b6107255053ec1c9e88cdf79bfdad4e5508cd
parent cc5c2daedf04d7b2d16a717c0c54547e4062bc80
Author: Stefan <stefan@eliteinformatiker.de>
Date:   Thu, 24 Jan 2013 15:19:32 +0100

added better output of deadlinks

Diffstat:
Mcrawler.py | 14++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/crawler.py b/crawler.py @@ -45,8 +45,7 @@ class Crawler(object): except urllib2.URLError: continue - print("DEADLINKS") - print(self.deadlinks) + self.print_deadlinks(self.deadlinks) def visit_url(self, url, found_via): response = self.check_url(url, found_via) @@ -98,6 +97,17 @@ class Crawler(object): def excluded(self, url): return self.url_match != None and not self.url_match.search(url) + def print_deadlinks(self, deadlinks): + if len(deadlinks) == 0: + print("No deadlinks were found. Hooray!") + else: + print("The following deadlinks were found") + print() + for via in deadlinks: + print("%s" % via) + for target in deadlinks[via]: + print("\t%s" % target) + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Search a website for deadlinks")