import requests
from bs4 import BeautifulSoup
if __name__ =='__main__':
headers = {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Mobile Safari/537.36 Edg/90.0.818.62'
}
url = 'https://m.ranwen.la/files/article/104/104485/list.html'
page_text = requests.get(url = url,headers = headers).text
#在首页中解析出章节的标题和详情页的url
#1.实例化BeautifulSoup对象,需要将页面源码数据加载到该对象中
soup = BeautifulSoup(page_text,'xml')
#解析章节标题和
li_list = soup.select('.read > ul > li')
fp = open('./mingchaonaxieshier.txt','w',encoding='utf-8')
for li in li_list:
titile = li.a.string
data_url = 'https://m.ranwen.la'+li.a['href']
# 解析出详情页内相关的章节内容
datial_page_text = requests.get(url = data_url,headers=headers).text
datial_soup = BeautifulSoup(datial_page_text,'xml')
div_tag = datial_soup.find('div',id="nr")
#解析到了章节的内容
content = div_tag.text
fp.write(titile+':'+content+'\n')
print(data_url)
print(titile,'攫取文章中')


以上代码是在BILI上看教学视频上的,换到这个网页上自己不会定位了,有没有佬帮写下并介绍下函数 方法.fing 这段标签层级怎么定位的。有劳大佬运行下。 只要用bs4的这个就行,跪谢。
from bs4 import BeautifulSoup
if __name__ =='__main__':
headers = {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Mobile Safari/537.36 Edg/90.0.818.62'
}
url = 'https://m.ranwen.la/files/article/104/104485/list.html'
page_text = requests.get(url = url,headers = headers).text
#在首页中解析出章节的标题和详情页的url
#1.实例化BeautifulSoup对象,需要将页面源码数据加载到该对象中
soup = BeautifulSoup(page_text,'xml')
#解析章节标题和
li_list = soup.select('.read > ul > li')
fp = open('./mingchaonaxieshier.txt','w',encoding='utf-8')
for li in li_list:
titile = li.a.string
data_url = 'https://m.ranwen.la'+li.a['href']
# 解析出详情页内相关的章节内容
datial_page_text = requests.get(url = data_url,headers=headers).text
datial_soup = BeautifulSoup(datial_page_text,'xml')
div_tag = datial_soup.find('div',id="nr")
#解析到了章节的内容
content = div_tag.text
fp.write(titile+':'+content+'\n')
print(data_url)
print(titile,'攫取文章中')


以上代码是在BILI上看教学视频上的,换到这个网页上自己不会定位了,有没有佬帮写下并介绍下函数 方法.fing 这段标签层级怎么定位的。有劳大佬运行下。 只要用bs4的这个就行,跪谢。