参考:https://blog.csdn.net/qq_21933615/article/details/81171951
废话不多说直接上代码

from urllib import request
from bs4 import BeautifulSoup

url = 'http://kan.2345.com/vip/list/-----.html' # 这个网页编码是gb2312,下面写gb2312

url = 'https://movie.douban.com/cinema/nowplaying/shanghai/' # utf-8

def getHtml(url): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" } req = request.Request(url, headers=headers) response = request.urlopen(req)

if response.getcode() == 200:
    htmlStr = response.read().decode("gb2312")
    return htmlStr
else:
    return print('返回头不是200')

def analysisData(url): html = getHtml(url) soup = BeautifulSoup(html, 'html.parser') findData1 = soup.find('div', attrs={'class': 'v_picConBox'}) findData2 = findData1.find('ul', attrs={'class': 'v_picTxt'}) findData3 = findData2.find_all('li') for liVal in findData3: imgUrl = liVal.find('img').get('data-src') score = liVal.find('em', attrs={'class': 'emScore'}).getText() title = liVal.find('em', attrs={'class': 'emTit'}).getText() print(title)

analysisData(url)

其实跟php差不多,然后就是“美味汤”语法要好好看看