import requests
from bs4 import BeautifulSoup
url = "http://www.shixiaolei.com/posts/3/"
r = requests.get(url)
soup = BeautifulSoup(r.text,'lxml')
data = soup.select('div.title a')
# soup对象get(元素的属性名),可以获取元素的属性值
for d in data:
url = "http://www.shixiaolei.com" + d.get('href')
r = requests.get(url)
soup = BeautifulSoup(r.text,'lxml')
title = soup.select('div.title h1')
date = soup.select('p.date')
content = soup.select('.post_text')
data = {
'title':title[0].get_text(),
'date':date[0].get_text(),
'content':content[0].get_text(strip=True)
}
print(data)
import requests
from bs4 import BeautifulSoup
url = "https://news.hebust.edu.cn/zhxw/index.htm"
r = requests.get(url)
r.encoding = 'utf-8'
soup = BeautifulSoup(r.text,'lxml')
data = soup.select('span.newsTxt02 a')
# data
# soup对象get(元素的属性名),可以获取元素的属性值
for d in data:
# https://news.hebust.edu.cn/zhxw/03de2c19b9dd484eaf2d54a34d13e569.htm
url = "https://news.hebust.edu.cn/zhxw/" + d.get('href')
# print(url)
r = requests.get(url)
r.encoding = 'utf-8'
soup = BeautifulSoup(r.text,'lxml')
title = soup.select('div.articleTitle h3')
date = soup.select('.articleAuthor')
content = soup.select('.article')
data = {
'title':title[0].get_text(),
'date':date[0].get_text(),
'content':content[0].get_text(strip=True)
}
print(data)
上海海洋大学
import requests
from bs4 import BeautifulSoup
url = "https://www.shou.edu.cn/yw/list.htm"
r = requests.get(url)
r.encoding = 'utf-8'
soup = BeautifulSoup(r.text,'lxml')
data = soup.select('.col_news_item')
for d in data:
"https://www.shou.edu.cn/2022/0309/c147a304911/page.htm"
# print(d.get('href'))
url = "https://www.shou.edu.cn" + d.get('href')
r = requests.get(url)
r.encoding = 'utf-8'
soup = BeautifulSoup(r.text,'lxml')
title = soup.select('h1.arti_title')
date = soup.select('.arti_update')
auther = soup.select('.editor')
content = soup.select('.wp_articlecontent')
data = {
'title':title[0].get_text(),
'date':date[0].get_text(),
'auther':auther[0].get_text(),
'content':content[0].get_text()
}
print(data)
医院
import requests
from bs4 import BeautifulSoup
url = "http://hdzxyy.com/news.asp?cid=1"
r = requests.get(url)
r.encoding = 'gb2312'
soup = BeautifulSoup(r.text,'lxml')
data = soup.select('.nei_right_con_dt span a')
# print(data)
for d in data[::2]:
# "http://hdzxyy.com/news_info.asp?id=4082&cid=1"
# "news_info.asp?id=4082&cid=1"
# print(d.get('href'))
url = "http://hdzxyy.com/" + d.get('href')
r = requests.get(url)
r.encoding = 'gb2312'
soup = BeautifulSoup(r.text,'lxml')
title = soup.select('.news_con_tit font')
# date = soup.select('.arti_update')
# auther = soup.select('.editor')
content = soup.select('.news_con_con')
data = {
'title':title[0].get_text(),
# 'date':date[0].get_text(),
# 'auther':auther[0].get_text(),
'content':content[0].get_text(strip=True)
}
print(data)
医院2
import requests
from bs4 import BeautifulSoup
url = "https://www.cyfy.cn/lists/6.html"
r = requests.get(url)
r.encoding = 'utf-8'
soup = BeautifulSoup(r.text,'lxml')
data = soup.select('.pad .h3 a')
# print(data)
for d in data:
# "http://hdzxyy.com/news_info.asp?id=4082&cid=1"
# "news_info.asp?id=4082&cid=1"
# print(d.get('href'))
url = d.get('href')
r = requests.get(url)
r.encoding = 'utf-8'
soup = BeautifulSoup(r.text,'lxml')
title = soup.select('.t')
# date = soup.select('.arti_update')
# auther = soup.select('.editor')
content = soup.select('.text')
data = {
'title':title[0].get_text(),
# 'date':date[0].get_text(),
# 'auther':auther[0].get_text(),
'content':content[0].get_text(strip=True)
}
print(data)
留言