From 8484a2e6eeb35f54bc9be45af84c88c623733223 Mon Sep 17 00:00:00 2001 From: Cowry Date: Sat, 31 Mar 2018 16:50:20 +0800 Subject: [PATCH] fix some problems findall(html) shoul be modified to findall(str(html)) --- proxypool/getter.py | 16 ++++++++-------- proxypool/schedule.py | 7 +++++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/proxypool/getter.py b/proxypool/getter.py index cb99766..b9906c6 100644 --- a/proxypool/getter.py +++ b/proxypool/getter.py @@ -35,7 +35,7 @@ def crawl_ip181(self): html = get_page(start_url) ip_adress = re.compile('\s*(.*?)\s*(.*?)') # \s* 匹配空格,起到换行作用 - re_ip_adress = ip_adress.findall(html) + re_ip_adress = ip_adress.findall(str(html)) for adress, port in re_ip_adress: result = adress + ':' + port yield result.replace(' ', '') @@ -48,7 +48,7 @@ def crawl_kuaidaili(self): ip_adress = re.compile( '(.*)\s*(\w+)' ) - re_ip_adress = ip_adress.findall(html) + re_ip_adress = ip_adress.findall(str(html)) for adress, port in re_ip_adress: result = adress + ':' + port yield result.replace(' ', '') @@ -61,7 +61,7 @@ def crawl_xicidaili(self): 'Cn\s*(.*?)\s*(.*?)' ) # \s* 匹配空格,起到换行作用 - re_ip_adress = ip_adress.findall(html) + re_ip_adress = ip_adress.findall(str(html)) for adress, port in re_ip_adress: result = adress + ':' + port yield result.replace(' ', '') @@ -88,7 +88,7 @@ def crawl_data5u(self): '