最近做项目, 需要很多数据,写了些爬虫, 没什么,拿出来分享
- 一个大学排名的
import requests,json
page=1
url='https://www.topuniversities.com/rankings/endpoint?nid=3897789&page={0}&items_per_page=20&tab=®ion=Asia&countries=&cities=&search=&star=&sort_by=rank&order_by=asc&program_type='
data=[]
result=requests.get(url.format(page)).json()
surplus=result['total_record']-20
data+=result['score_nodes']
while surplus>0:
page+=1
result=requests.get(url.format(page)).json()
data+=result['score_nodes']
surplus-=int(result['items_per_page'])
with open('data/3.json','w',encoding='utf-8') as f:
json.dump(data,f)
- 校徽图片的
from concurrent.futures import ThreadPoolExecutor
from lxml import etree
import requests
def download(i):
image_url=f'https://www.urongda.com/img/{i}-logo.png'
with open(f'image/{i}.png','wb') as f:
f.write(requests.get(image_url).content)
result=requests.get('https://www.urongda.com/')
html = etree.HTML(result.text)
href=html.xpath('/html/body/main/div[1]/div//@data-name')
with ThreadPoolExecutor(max_workers=10) as t:
for i in href:
t.submit(download,i)
- 虎扑评分的爬虫,一点点加密
import hashlib,time, requests, json
from concurrent.futures import ThreadPoolExecutor
header={
'user-agent': 'Dalvik/2.1.0 (Linux; U; Android 9; ASUS_I005DA Build/PI) kanqiu/8.0.77.04267/9403',
'cookie': 'cpck=eyJpZGZhIjoiIiwiY2xpZW50IjoiMzAzMzE0N2Q4YjdlN2M4NiIsInByb2plY3RJZCI6MX0%3D',
'x-hupu-token': ''
}
mdata=[]
def main(page):
t= str(int(time.time()*1000))
data_list=[['channel', 'miui'], ['clientId', 156524155], ['crt', t], ['deviceId', 'BDqaLzSrCAXBtDgH3bbkMVITrkolufoUCXPmuc4+Uzmui0h/fDc6Aw3SbPKhc3OyEjVAlGxR8gyt8clM4NXpffg=='], ['night', 0], ['nodeId', 1421265], ['page', page], ['pageSize', 10], ['queryType', 'hot'], ['teenagers', 0], ['time_zone', 'Asia/Shanghai']]
s=[f'{i[0]}={i[1]}' for i in data_list]
data='&'.join(s)+'HUPU_SALT_AKJfoiwer394Jeiow4u309'
sign=hashlib.md5(data.encode()).hexdigest()
body={
'pageSize': 10,
'page': page,
'nodeId': 1421265,
'queryType': 'hot',
'clientId': 156524155,
'crt':t,
'night': 0,
'channel': 'miui',
'teenagers': 0,
'time_zone': 'Asia/Shanghai',
'deviceId': 'BDqaLzSrCAXBtDgH3bbkMVITrkolufoUCXPmuc4+Uzmui0h/fDc6Aw3SbPKhc3OyEjVAlGxR8gyt8clM4NXpffg==',
'sign':sign
}
result=requests.get('https://games.mobileapi.hupu.com/1/8.0.77/bplcommentapi/bpl/score_tree/groupAndSubNodes',params=body,headers=header)
for i in result.json()['data']['nodePageResult']['data']:
print(i['node']['name'],i['node']['image'][0],i['node']['scoreAvg'],i['node']['hottestComments'])
mdata.append({'name':i['node']['name'],'image':i['node']['image'][0],'scoreAvg':i['node']['scoreAvg'],'hottestComments':i['node']['hottestComments']})
with ThreadPoolExecutor(max_workers=10) as t:
for x in range(1,51):
t.submit(main,x)
with open('hp1.json','w') as f:
json.dump(mdata,f)
然后就是谈谈最近, 我发现我自己已经树立了一种沉默寡言高冷的大佬学霸的一种形象, 欸,无所谓吧 就让他们自己去烦恼吧。我为什么要平等的去帮他们,明智的做法, enn. 维护这些人际关系真的超级累好吧,人机。 有些人也是真机把烦,已经和全班第一烦平起平坐了。
就是这些发完牢骚了