美团App热门商圈团购采集(1)
环境:
针对美团版本5.4
在tutorial项目下
新建一个spider
scrapy genspider -t basic Meituan_City meituan.com
编辑items.py
class MeituanCity(Item):
data = Field()
编辑 Meituan_City.py
# -*- coding: utf-8 -*-
import scrapy
import json
from tutorial.items import MeituanCity
class MeituanCitySpider(scrapy.Spider):
name = "Meituan_City"
allowed_domains = ["meituan.com"]
start_urls = (
'http://api.mobile.meituan.com/group/v1/city/list?show=all',
)
def parse(self, response):
data = json.loads(response.body)
for item in data['data']:
cityId = item['id']
# http://api.mobile.meituan.com/group/v2/area/list?cityId=42&spatialFields=center
url = 'http://api.meituan.com/group/v2/area/list?cityId=%s&spatialFields=center' % cityId
print url
yield scrapy.Request(
url,
callback=self.Parse_Geo,
meta={'item': item}
)
break
def Parse_Geo(self, response):
print response.url
data = json.loads(response.body)
metaitem = response.meta['item']
# 商区信息
subareasinfo = dict()
if 'data' in data:
if 'subareasinfo' in data['data']:
for item in data['data']['subareasinfo']:
subareasinfo[item['id']] = item
if 'data' in data:
if 'areasinfo' in data['data']:
for line in data['data']['areasinfo']:
# 行政区
districtName = line['name']
districtId = line['id']
for tmp in line['subareas']:
# 商圈信息
area = subareasinfo[tmp]
center = area['center']
center = center.replace('POINT(', '').replace(')', '').split()
if len(center) > 1:
lat = center[1]
lng = center[0]
longitude = None
latitude = None
try:
longitude = str(int(float(lng) * 1000000))
latitude = str(int(float(lat) * 1000000))
except:
pass
Item = MeituanCity()
Item['data'] =dict()
geoItem=Item['data']
# 城市信息
geoItem['cityid'] = metaitem['id']
geoItem['cityname'] = metaitem['name']
# 行政区
geoItem['districtId'] = districtId
geoItem['districtName'] = districtName
# 商圈
geoItem['SubAreaId'] = area['id']
geoItem['secondArea'] = area['name']
# 经纬度
geoItem['longitude'] = longitude
geoItem['latitude'] = latitude
yield Item
此时运行:
scrapy runspider tutorial/spiders/Meituan_City.py