代码:

 1 # coding=utf-8
 2 import sys
 3 import csv
 4 import requests
 5 from bs4 import BeautifulSoup
 6 
 7 reload(sys)
 8 sys.setdefaultencoding(\'utf-8\')
 9 # 请求头设置
10 
11 def download(url):
12     db_data = requests.get(url)
13     soup = BeautifulSoup(db_data.text, \'lxml\')
14     titles = soup.select(
15         \'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > h2 > a:nth-of-type(1)\')
16     houses = soup.select(\'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.room\')
17     oneaddresss = soup.select(
18         \'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.add > a:nth-of-type(1)\')
19     twoaddresss = soup.select(
20         \'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.add > a:nth-of-type(2)\')
21     prices = soup.select(
22         \'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.listliright > div.money > b\')
23     for title, house, oneaddress, twoaddress, price in zip(titles, houses, oneaddresss, twoaddresss, prices):
24         data = [
25             (
26                 str(title.string).replace(\' \', \'\').replace(\'\n\', \'\'),
27                 house.get_text().split(\' \')[0].replace(\' \', \'\').replace("\n", ""),
28                 house.get_text().split(\' \')[-1].replace(\' \', \'\').replace("\n", ""),
29                 oneaddress.get_text().replace(\' \', \'\').replace("\n", ""),
30                 twoaddress.get_text().replace(\' \', \'\').replace("\n", ""),
31                 price.get_text().replace(\' \', \'\').replace("\n", "")
32             )
33         ]
34 
35         csvfile = open(\'kf.csv\', \'ab\')
36         writer = csv.writer(csvfile)
37         print(\'write one house\')
38         writer.writerows(data)
39         csvfile.close()
40 
41 
42 # 初始化csv文件
43 def info():
44     csvinfo = open(\'kf.csv\', \'ab\')
45     begcsv = csv.writer(csvinfo)
46     begcsv.writerow([\'title\', \'house\', \'area\', \'address1\', \'address2\', \'price\'])
47     csvinfo.close()
48 
49 
50 if __name__ == \'__main__\':
51     info()
52     download(url)

 

 

 

 

版权声明:本文为LexMoon原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://www.cnblogs.com/LexMoon/p/58tc_1.html