分享以及杂谈

分享以及杂谈

最近做项目, 需要很多数据,写了些爬虫, 没什么,拿出来分享

  1. 一个大学排名的
import requests,json
page=1
url='https://www.topuniversities.com/rankings/endpoint?nid=3897789&page={0}&items_per_page=20&tab=&region=Asia&countries=&cities=&search=&star=&sort_by=rank&order_by=asc&program_type='
data=[]
result=requests.get(url.format(page)).json()

surplus=result['total_record']-20
data+=result['score_nodes']
while surplus>0:
    page+=1
    result=requests.get(url.format(page)).json()
    data+=result['score_nodes']
    surplus-=int(result['items_per_page'])

with open('data/3.json','w',encoding='utf-8') as f:
    json.dump(data,f)
  1. 校徽图片的
from concurrent.futures import ThreadPoolExecutor
from lxml import etree
import requests


def download(i):
    image_url=f'https://www.urongda.com/img/{i}-logo.png'
    with open(f'image/{i}.png','wb') as f:
        f.write(requests.get(image_url).content)

result=requests.get('https://www.urongda.com/')
html = etree.HTML(result.text)
href=html.xpath('/html/body/main/div[1]/div//@data-name')

with ThreadPoolExecutor(max_workers=10) as t:
    for i in href:
        t.submit(download,i)
  1. 虎扑评分的爬虫,一点点加密
import hashlib,time, requests, json
from concurrent.futures import ThreadPoolExecutor
header={
    'user-agent': 'Dalvik/2.1.0 (Linux; U; Android 9; ASUS_I005DA Build/PI) kanqiu/8.0.77.04267/9403',
    'cookie': 'cpck=eyJpZGZhIjoiIiwiY2xpZW50IjoiMzAzMzE0N2Q4YjdlN2M4NiIsInByb2plY3RJZCI6MX0%3D',
    'x-hupu-token': ''
}
mdata=[]
def main(page):
    
    t= str(int(time.time()*1000))
    data_list=[['channel', 'miui'], ['clientId', 156524155], ['crt', t], ['deviceId', 'BDqaLzSrCAXBtDgH3bbkMVITrkolufoUCXPmuc4+Uzmui0h/fDc6Aw3SbPKhc3OyEjVAlGxR8gyt8clM4NXpffg=='], ['night', 0], ['nodeId', 1421265], ['page', page], ['pageSize', 10], ['queryType', 'hot'], ['teenagers', 0], ['time_zone', 'Asia/Shanghai']]
    s=[f'{i[0]}={i[1]}' for i in data_list]

    data='&'.join(s)+'HUPU_SALT_AKJfoiwer394Jeiow4u309'
    sign=hashlib.md5(data.encode()).hexdigest()
    body={
        'pageSize': 10,
        'page': page,
        'nodeId': 1421265,
        'queryType': 'hot',
        'clientId': 156524155,
        'crt':t,
        'night': 0,
        'channel': 'miui',
        'teenagers': 0,
        'time_zone': 'Asia/Shanghai',
        'deviceId': 'BDqaLzSrCAXBtDgH3bbkMVITrkolufoUCXPmuc4+Uzmui0h/fDc6Aw3SbPKhc3OyEjVAlGxR8gyt8clM4NXpffg==',
        'sign':sign
    }
    result=requests.get('https://games.mobileapi.hupu.com/1/8.0.77/bplcommentapi/bpl/score_tree/groupAndSubNodes',params=body,headers=header)
    
    for i in result.json()['data']['nodePageResult']['data']:
        print(i['node']['name'],i['node']['image'][0],i['node']['scoreAvg'],i['node']['hottestComments'])
        mdata.append({'name':i['node']['name'],'image':i['node']['image'][0],'scoreAvg':i['node']['scoreAvg'],'hottestComments':i['node']['hottestComments']})
with ThreadPoolExecutor(max_workers=10) as t:
    for x in range(1,51):
        t.submit(main,x)

        
with open('hp1.json','w') as f:
    json.dump(mdata,f)

然后就是谈谈最近, 我发现我自己已经树立了一种沉默寡言高冷的大佬学霸的一种形象, 欸,无所谓吧 就让他们自己去烦恼吧。我为什么要平等的去帮他们,明智的做法, enn. 维护这些人际关系真的超级累好吧,人机。 有些人也是真机把烦,已经和全班第一烦平起平坐了。

就是这些发完牢骚了