deadlink-crawler

[unmaintained] crawls a site to detect dead links
Log | Files | Refs | README

commit ab9fcee143b68917dedb705b466813bee60c4b67
parent 2c24ea6016fdf202b6ef25b4b51369e8e6d4f249
Author: Stefan <cct@stefan-koch.name>
Date:   Wed, 23 Jan 2013 20:35:52 +0100

added something, but diff shows no differences?

Diffstat:
Mcrawler.py | 5+++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/crawler.py b/crawler.py @@ -2,6 +2,7 @@ from BeautifulSoup import BeautifulSoup import urllib2, urlparse import re import time +import httplib # TODO: Do not apply wait time to external links @@ -72,8 +73,8 @@ class Crawler(object): request = urllib2.Request(url) try: - response = urllib2.urlopen(request) - except urllib2.HTTPError: + response = urllib2.urlopen(request, None, 10) + except (urllib2.HTTPError, httplib.BadStatusLine): # We receive an exception in case of 404 self.add_to_deadlinks(url, found_via) return