爬取58同城二手房数据存储到redis数据库和mysql数据库
主程序代码:
1 import scrapy 2 from scrapyDemo.items import ScrapydemoItem 3 4 class PostSpider(scrapy.Spider): 5 name = \'home\' 6 # allowed_domains = [\'www.baidu.com\'] 7 start_urls = [\'https://bj.58.com/shahe/ershoufang/e4/?PGTID=0d30000c-0142-1050-f5c4-dad0a3db3138&ClickID=1\'] 8 9 def parse(self, response): 10 li_list=response.xpath(\'//ul[@class="house-list-wrap"]/li\') 11 for li in li_list: 12 title=li.xpath(\'./div[2]/h2/a/text()\').extract_first() 13 price=li.xpath(\'./div[3]/p/b/text()\').extract_first() 14 15 item=ScrapydemoItem() 16 item["title"]=title 17 item["price"]=price 18 19 yield item
爬虫代码
管道代码:
1 import pymysql 2 from redis import Redis 3 4 #存储到mysql数据库 5 class Scrapydemotomysql(object): 6 #连接 7 conn=None 8 #游标 9 cursor=None 10 11 def open_spider(self,spider): 12 self.conn = pymysql.Connect(host="127.0.0.1", port=3306, user="root", password="root", db="spider") 13 14 def process_item(self, item, spider): 15 title=item["title"] 16 price=item["price"] 17 sql=\'insert into house values ("%s","%s")\'%(title,price) 18 self.cursor=self.conn.cursor() 19 try: 20 self.cursor.execute(sql) 21 self.conn.commit() 22 except Exception as e: 23 print(str(e)) 24 self.conn.rollback() 25 return item 26 def close_spider(self,spider): 27 self.cursor.close() 28 self.conn.close() 29 30 #存储到redis数据库 31 class Scrapydemotoredis(object): 32 #连接 33 conn=None 34 def open_spider(self,spider): 35 self.conn = Redis(host=\'127.0.0.1\',port=6379) 36 37 def process_item(self, item, spider): 38 title=item["title"] 39 price=item["price"] 40 dict={ 41 \'title\':title, 42 \'price\':price 43 } 44 dict=str(dict) 45 self.conn.lpush(\'home\',dict) 46 return item
pipelines
版权声明:本文为duanhaoxin原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。