什么是ajax?
含有多个多个页面时,常有ajax请求,ajax请求也分为ajax的post请求和ajax的get请求
如何判断运用了ajax请求?
当headers中出现:X-Requested-With: XMLHttpRequest时,说明就是一个ajax请求
代码示例:获得豆瓣电影前20页的数据
import urllib.request
import urllib.parse
url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'
def create_request(page):
base_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'
data = {
'cname': '上海',
'pid':'',
'pageIndex': page,
'pageSize': 10
}
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36 Edg/118.0.2088.46',
'Accept': '*/*',
'Cookie': 'll="108296"; bid=DIZJHJYe40Y; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1697503350%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D7mX7_DYj1rvpVbk1-XVpnHlGWhhuOHcRgjFNbC0KoWEMe4-wytQfL4FZvJtRsTxC%26wd%3D%26eqid%3De80b52bb0000c24600000005652dd873%22%5D; _pk_ses.100001.4cf6=1; ap_v=0,6.0; __yadk_uid=cSeoVvLDVBPux8QedPv8ZhjqjQMyW7Ka; _vwo_uuid_v2=D0271D0AFA2BFE022D7B32A84C9C79BA9|9f33d547e5bc95d46eac63303551a3cb; _pk_id.100001.4cf6=3c8e0eace73a58d7.1697503374.; Hm_lvt_16a14f3002af32bf3a75dfe352478639=1697503401; Hm_lpvt_16a14f3002af32bf3a75dfe352478639=1697503401; __utma=30149280.267631944.1656475391.1656491114.1697503809.3; __utmb=30149280.0.10.1697503809; __utmc=30149280; __utmz=30149280.1697503809.3.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utma=223695111.902583869.1656491114.1656491114.1697503809.2; __utmb=223695111.0.10.1697503809; __utmc=223695111; __utmz=223695111.1697503809.2.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
}
data = urllib.parse.urlencode(data).encode('utf-8')
request = urllib.request.Request(url=base_url,data=data,headers=headers)
return request
def get_content(request):
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
return content
def download(content,page):
with open('kfc'+str(page)+'.json','w',encoding='utf-8') as file_object:
file_object.write(content)
if __name__ == '__main__':
start_page = int(input('请输入起始页码: '))
end_page = int(input('请输入终止页码: '))
for page in range(start_page,end_page+1):
request = create_request(page)
content = get_content(request)
download(content,page)