1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| import random import time import re import requests
for page in range(1,11): res = requests.get( url = f'https://movie.douban.com/top250?start={(page - 1) * 25}', headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0' })
pattern1 = re.compile(r'<span\sclass="title">([^&]*?)</span>') titles = pattern1.findall(res.text)
pattern2 = re.compile(r'<span class="rating_num".*?>(.*?)</span>') ranks = pattern2.findall(res.text)
for title, rank in zip(titles, ranks): print(title, rank)
time.sleep(random.random() * 4 + 1)
|