commit ab9fcee143b68917dedb705b466813bee60c4b67
parent 2c24ea6016fdf202b6ef25b4b51369e8e6d4f249
Author: Stefan <cct@stefan-koch.name>
Date: Wed, 23 Jan 2013 20:35:52 +0100
added something, but diff shows no differences?
Diffstat:
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/crawler.py b/crawler.py
@@ -2,6 +2,7 @@ from BeautifulSoup import BeautifulSoup
import urllib2, urlparse
import re
import time
+import httplib
# TODO: Do not apply wait time to external links
@@ -72,8 +73,8 @@ class Crawler(object):
request = urllib2.Request(url)
try:
- response = urllib2.urlopen(request)
- except urllib2.HTTPError:
+ response = urllib2.urlopen(request, None, 10)
+ except (urllib2.HTTPError, httplib.BadStatusLine):
# We receive an exception in case of 404
self.add_to_deadlinks(url, found_via)
return