LOADING

加载过慢请开启缓存 浏览器默认开启

urllib库(6)ajax

2023/11/6

什么是ajax?

含有多个多个页面时,常有ajax请求,ajax请求也分为ajax的post请求和ajax的get请求

如何判断运用了ajax请求?

当headers中出现:X-Requested-With: XMLHttpRequest时,说明就是一个ajax请求

代码示例:获得豆瓣电影前20页的数据

import urllib.request
import urllib.parse
url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'

def create_request(page):
    base_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'
    data = {
        'cname': '上海',
        'pid':'',
        'pageIndex': page,
        'pageSize': 10
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36 Edg/118.0.2088.46',
        'Accept': '*/*',
        'Cookie': 'll="108296"; bid=DIZJHJYe40Y; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1697503350%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D7mX7_DYj1rvpVbk1-XVpnHlGWhhuOHcRgjFNbC0KoWEMe4-wytQfL4FZvJtRsTxC%26wd%3D%26eqid%3De80b52bb0000c24600000005652dd873%22%5D; _pk_ses.100001.4cf6=1; ap_v=0,6.0; __yadk_uid=cSeoVvLDVBPux8QedPv8ZhjqjQMyW7Ka; _vwo_uuid_v2=D0271D0AFA2BFE022D7B32A84C9C79BA9|9f33d547e5bc95d46eac63303551a3cb; _pk_id.100001.4cf6=3c8e0eace73a58d7.1697503374.; Hm_lvt_16a14f3002af32bf3a75dfe352478639=1697503401; Hm_lpvt_16a14f3002af32bf3a75dfe352478639=1697503401; __utma=30149280.267631944.1656475391.1656491114.1697503809.3; __utmb=30149280.0.10.1697503809; __utmc=30149280; __utmz=30149280.1697503809.3.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utma=223695111.902583869.1656491114.1656491114.1697503809.2; __utmb=223695111.0.10.1697503809; __utmc=223695111; __utmz=223695111.1697503809.2.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
    }

    data = urllib.parse.urlencode(data).encode('utf-8')
    request = urllib.request.Request(url=base_url,data=data,headers=headers)
    return request

def get_content(request):
    response = urllib.request.urlopen(request)
    content = response.read().decode('utf-8')
    return content


def download(content,page):
    with open('kfc'+str(page)+'.json','w',encoding='utf-8') as file_object:
        file_object.write(content)
if __name__ == '__main__':
    start_page = int(input('请输入起始页码: '))
    end_page = int(input('请输入终止页码: '))
    for page in range(start_page,end_page+1):
        request = create_request(page)
        content = get_content(request)
        download(content,page)