获取免费代理IP库

#!/usr/local/bin/python3
# coding:utf-8

# ====================================================
# Author: chang - EMail:changbo@hmg100.com
# Last modified: 2017-4-22
# Filename: iplibrary.py
# Description: get ip library files,base urlib, re
# blog:http://www.cnblogs.com/changbo
# ====================================================

import urllib.request
import re


def filter_tags(htmlstr):
    re_cdata = re.compile(\'//<!\[CDATA\[[^>]*//\]\]>\', re.I)  # 匹配CDATA
    re_script = re.compile(\'<\s*script[^>]*>[^<]*<\s*/\s*script\s*>\', re.I)  # Script
    re_style = re.compile(\'<\s*style[^>]*>[^<]*<\s*/\s*style\s*>\', re.I)  # style
    # re_br = re.compile(\'<br\s*?/?>\')  # 处理换行
    re_h = re.compile(\'</?\w+[^>]*>\')  # HTML标签
    re_comment = re.compile(\'<!--[^>]*-->\')  # HTML注释
    s = re_cdata.sub(\'\', htmlstr)  # 去掉CDATA
    s = re_script.sub(\'\', s)  # 去掉SCRIPT
    s = re_style.sub(\'\', s)  # 去掉style
    # s = re_br.sub(\'\n\', s)  # 将br转换为换行
    # s = re_h.sub(\'\', s)  # 去掉HTML 标签
    s = re_comment.sub(\'\', s)
    blank_line = re.compile(\'\n+\')
    s = blank_line.sub(\'\n\', s)
    return s


def getiplist(ipnumber):
    # url = \'http://ip.taobao.com/service/getIpInfo.php?ip=%s\' % ip
    url = \'http://www.89ip.cn/api/?&tqsl=%d&sxa=&sxb=&tta=&ports=&ktip=&cf=1\' % ipnumber
    f = urllib.request.Request(url)
    f.add_header(\'User-Agent\', \'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0\')
    response = ((urllib.request.urlopen(f)).read()).decode(\'gbk\')
    ipinfo = filter_tags(response)
    # print(response)
    iplist = (((ipinfo.split(\'<br/>\')[1])[:-46]).strip()).split(\'<BR>\')
    for i in iplist:
        iptmp = i.split(\':\')
        ip = iptmp[0]
        port = iptmp[1]
        print(ip + \' ---- \' + port)
getiplist(30)

END!

版权声明:本文为changbo原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://www.cnblogs.com/changbo/p/6747624.html