美团App热门商圈团购采集(1)

环境:

针对美团版本5.4

在tutorial项目下

新建一个spider

scrapy genspider -t basic Meituan_City meituan.com

编辑items.py

class MeituanCity(Item):
    data = Field()

编辑 Meituan_City.py

# -*- coding: utf-8 -*-
import scrapy
import json
from tutorial.items import MeituanCity

class MeituanCitySpider(scrapy.Spider):
    name = "Meituan_City"
    allowed_domains = ["meituan.com"]
    start_urls = (
        'http://api.mobile.meituan.com/group/v1/city/list?show=all',
    )

    def parse(self, response):
        data = json.loads(response.body)
        for item in data['data']:
            cityId = item['id']
            # http://api.mobile.meituan.com/group/v2/area/list?cityId=42&spatialFields=center
            url = 'http://api.meituan.com/group/v2/area/list?cityId=%s&spatialFields=center' % cityId
            print url
            yield scrapy.Request(
                url,
                callback=self.Parse_Geo,
                meta={'item': item}
            )
            break


    def Parse_Geo(self, response):
        print response.url
        data = json.loads(response.body)
        metaitem = response.meta['item']

        # 商区信息
        subareasinfo = dict()
        if 'data' in data:
            if 'subareasinfo' in data['data']:
                for item in data['data']['subareasinfo']:
                    subareasinfo[item['id']] = item

        if 'data' in data:
            if 'areasinfo' in data['data']:
                for line in data['data']['areasinfo']:
                    # 行政区
                    districtName = line['name']
                    districtId = line['id']
                    for tmp in line['subareas']:
                        # 商圈信息
                        area = subareasinfo[tmp]
                        center = area['center']
                        center = center.replace('POINT(', '').replace(')', '').split()
                        if len(center) > 1:
                            lat = center[1]
                            lng = center[0]
                        longitude = None
                        latitude = None
                        try:
                            longitude = str(int(float(lng) * 1000000))
                            latitude = str(int(float(lat) * 1000000))
                        except:
                            pass
                        Item = MeituanCity()
                        Item['data'] =dict()
                        geoItem=Item['data'] 
                        # 城市信息
                        geoItem['cityid'] = metaitem['id']
                        geoItem['cityname'] = metaitem['name']
                        # 行政区
                        geoItem['districtId'] = districtId
                        geoItem['districtName'] = districtName
                        # 商圈
                        geoItem['SubAreaId'] = area['id']
                        geoItem['secondArea'] = area['name']
                        # 经纬度
                        geoItem['longitude'] = longitude
                        geoItem['latitude'] = latitude
                        yield Item

此时运行:

scrapy runspider tutorial/spiders/Meituan_City.py