Python爬虫(一)——开封市58同城租房信息
代码:
1 # coding=utf-8 2 import sys 3 import csv 4 import requests 5 from bs4 import BeautifulSoup 6 7 reload(sys) 8 sys.setdefaultencoding(\'utf-8\') 9 # 请求头设置 10 11 def download(url): 12 db_data = requests.get(url) 13 soup = BeautifulSoup(db_data.text, \'lxml\') 14 titles = soup.select( 15 \'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > h2 > a:nth-of-type(1)\') 16 houses = soup.select(\'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.room\') 17 oneaddresss = soup.select( 18 \'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.add > a:nth-of-type(1)\') 19 twoaddresss = soup.select( 20 \'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.des > p.add > a:nth-of-type(2)\') 21 prices = soup.select( 22 \'body > div.mainbox > div.main > div.content > div.listBox > ul > li > div.listliright > div.money > b\') 23 for title, house, oneaddress, twoaddress, price in zip(titles, houses, oneaddresss, twoaddresss, prices): 24 data = [ 25 ( 26 str(title.string).replace(\' \', \'\').replace(\'\n\', \'\'), 27 house.get_text().split(\' \')[0].replace(\' \', \'\').replace("\n", ""), 28 house.get_text().split(\' \')[-1].replace(\' \', \'\').replace("\n", ""), 29 oneaddress.get_text().replace(\' \', \'\').replace("\n", ""), 30 twoaddress.get_text().replace(\' \', \'\').replace("\n", ""), 31 price.get_text().replace(\' \', \'\').replace("\n", "") 32 ) 33 ] 34 35 csvfile = open(\'kf.csv\', \'ab\') 36 writer = csv.writer(csvfile) 37 print(\'write one house\') 38 writer.writerows(data) 39 csvfile.close() 40 41 42 # 初始化csv文件 43 def info(): 44 csvinfo = open(\'kf.csv\', \'ab\') 45 begcsv = csv.writer(csvinfo) 46 begcsv.writerow([\'title\', \'house\', \'area\', \'address1\', \'address2\', \'price\']) 47 csvinfo.close() 48 49 50 if __name__ == \'__main__\': 51 info() 52 download(url)