Python 爬虫抓取代理IP,并检测联通性
帮朋友抓了一些代理IP,并根据测试联的通性,放在了不通的文件夹下。特将源码分享
注意:
1,环境Python3.5
2,安装BeautifulSoup4 requests
代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
#-*- coding:gb18030 -*- from bs4 import BeautifulSoup
import requests
import time
import os,sys
all_url_add = {
\'url2\' : \'http://ip84.com/gn/\' ,
}
def func(url):
r = requests.get(url)
content = r.text
soup = BeautifulSoup(content, "html.parser" )
ListTable = soup.find_all( "table" , class_ = "list" )
for table in ListTable:
ListTr = table.find_all( "tr" )
for tr in ListTr:
try :
ListTd = tr.find_all( "td" )
ipaddress = str (ListTd[ 0 ].get_text()).strip()
port = str (ListTd[ 1 ].get_text()).strip()
city = str (ListTd[ 2 ].get_text()).strip().replace( "\n" , "")
leixing = str (ListTd[ 3 ].get_text()).strip()
xieyi = str (ListTd[ 4 ].get_text()).strip()
shudu = str (ListTd[ 5 ].get_text()).strip()
time1 = str (ListTd[ 6 ].get_text()).strip()
f = open ( "ip" + \'.txt\' , \'a\' )
f.write(ipaddress + ":" + port + \'\n\' )
f.close()
print ( \'地址:\' + ipaddress + "端口:" + port + "地区:" + city + "类型:" + leixing + "协议" + xieyi + "速度" + shudu + "时间:" + time1)
except Exception as e:
print (u "-------------------程序异常-----------------------" )
return \'success\'
print (u \'本页抓取结束,正在跳转下一页\' )
def pin():
f2 = open ( \'ip.txt\' , \'r\' )
count = len ( open ( \'ip.txt\' , \'rU\' ).readlines())
for x in range (count):
ip = f2.readline().split( \':\' )[ 0 ]
return1 = os.system( \'ping -n 5 -w 5 %s\' % ip)
if return1:
print ( \'测试失败\' )
else :
print ( \'测试成功,正在写入新文件\' )
f3 = open ( \'SuccessIp.txt\' , \'a\' )
f3.write(f2.readline() + \'\n\' )
f3.close()
f2.close()
print ( \'程序结束,可用IP已放在SuccessIp中\' )
if __name__ = = \'__main__\' :
for x in all_url_add:
print (x)
for y in range ( 1 , 50 ):
url = all_url_add[x] + str (y)
print (url)
status = func(url)
if status = = \'success\' :
print (y, \'页结束\' )
print (u \'****程序抓取运行结束,正在检查所得IP连通性,请勿关闭窗口*****\' )
pin()
|
有点乱,有时间将数据存储在数据库,再将这个功能集成在博客当中。
Rex博客保留所有权利