第十周实训
部分内容
:::success
Yes :tada:
:::
第一个代码
# 单行注释
'''
# 单行注释
'''
str="我是一只来自北方的狼"
bnum=True
#复赋
com=2+1j
print(type(com))
if type(com)==complex:
print("我是复数来着")
#列表
list1 = [1,3,4,True,com,str]
for item in list1:
print(item,"***********************\n")
list1.append("哈哈哈 我是卖报的小行家")
#删除第一项
del list1[0]
print(list1)
#自增运算法,数据不能跨越类型,可以进行拼接
tub1 = (1,2,3,4,5,6,7)
tub2 = ("a","b","c","d","e","f","g")
#tub = tub1+tub2
#tub1[0]=888
#print(tub)
[(1,"a"), (2,"b"), (3,"c")]
list1 = [ item for item in zip(tub1,tub2)]
print(list1)
# 单行注释
'''
# 单行注释
'''
str = "我是一只来自北方的狼"
bnum = True
# 复赋
com = 2 + 1j
print(type(com))
if type(com) == complex:
print("我是复数来着")
# 列表
list1 = [1, 3, 4, True, com, str]
for item in list1:
print(item, "***********************\n")
list1.append("哈哈哈 我是卖报的小行家")
# 删除第一项
del list1[0]
print(list1)
# 自增运算法,数据不能跨越类型,可以进行拼接
tub1 = (1, 2, 3, 4, 5, 6, 7)
tub2 = ("a", "b", "c", "d", "e", "f", "g")
# tub = tub1+tub2
# tub1[0]=888
# print(tub)
[(1, "a"), (2, "b"), (3, "c")]
list1 = [item for item in zip(tub1, tub2)]
print(list1)
list1 = [ item for item in zip(tub1,tub2)]
# 序列 作业
list2 = sorted(list1,key=lambda x:x[0],reverse=False)
print(list2)
#字典 使用对应Java Map类似 属性:属性值
dict1={
"name":"古他那黑暗之神赵四",
"age":20,
"hobby":"亚洲舞王之炸雷"
}
print(list(dict1.keys()))
print(list(dict1.values()))
#集合
lis3 =[{
'name':'尼古拉嘶赵四',
'age':60,
"hobby":"亚洲舞王之炸雷"
},{
"name":"最强最强妖王谢广坤",
"age":62,
"hobby":"作妖",
},{
"name":"迈克尔刘能",
"age":60,
"hobby":"烫头"}
]
import csv
#将数据库写入到表格
#写入字符串 模式 wb写入字节码 ,会覆盖原来的数据
#a 追加模式,念在末尾添加的数据,不会覆盖
#r读入模式 rb读入学节码
with open("infor.csv","w",newline="",encoding="utf-8") as f:
#获取写入器
write = csv.DictWriter(f,fieldnames=["name","age","hobby"])
write.writerows(lis3)
with open("infor.csv","r",encoding="utf-8") as f:
fread = csv.DictReader(f,fieldnames=["name","age","hobby"])
print("*****************************************")
for item in fread:
print(item)
第二个代码
def difNum(list1):
for item1 in list1:
for item2 in list1:
if item1 != item2:
list2.append("%d%d" % (item1, item2))
for item3 in list2:
if item3 not in list3:
list3.append(item3)
print(list3)
# 构建一组数,一个数的两位不重复,数之间也不重复
list1 = [1, 2, 3, 4, 6, 7, 8, 8, 9, 9, 5, 3]
list2 = [] # 用来装载中间变量的列表
list3 = [] # 最终去除重复的列表
difNum(sorted(list1))
code3 类继承
class Person:
name="人类"
age=20
# 初始化函数
# 父类独有的属性子类不能继承
__priMoney=5000
def __init__(self):
self.name = "小明"
self.age = 40
#动态函数
def MyPrint(self):
print(self.name,f"{self.age}了,大龄剩男",)
return "搞钱吧,别想别的,你已经不是充钱的少年了"
#构析函数
def __del__(self):
print("end_Person_class")
class Mother:
def fly(self):
print("会飞翔")
class Child(Person,Mother):
name = "小强"
age = 18
# def __init__(self):
# self.name = "小量"
# self.age = 17
__priMoney = 1000
# 定义类的函数
def SayHello(self):
print("你好,我是{0},我今年{1}岁了".format(self.name, self.age))
def MyPrint(self):
print("我是子类的输出函数")
print(self.name, "{}了,人够男的".format(self.age))
return "愉快吧,别想别的,你已经不是充钱的少年了"
def priv_money(self):
print("私房钱剩余",self.__priMoney)
pass
if __name__=="__main__":
cc = Child()
cc.SayHello()
cc.MyPrint()
cc.fly()
cc.priv_money()
code4 豆瓣爬虫
镜像源
快下但是好像pycharm不是很喜欢。卡卡的
北外(ustc跳转)镜像源:https://mirrors.ustc.edu.cn/pypi/web/simple
或者直接通过pip install requests beautifulsoup4 bs4 -i https://mirrors.ustc.edu.cn/pypi/web/simple
安装
代码
# 获取请求,给远程服务器发送信息 获取数据
import requests
# 网页都是标签组成的,标签之间有嵌套,BeautifulSoup把标签层级化,可以通过内部方法,找到标签属性,和文字
from bs4 import BeautifulSoup
import lxml
import time
url = 'https://movie.douban.com/subject/36208094/comments?status=P'
# 浏览器头部伪装
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Host": "movie.douban.com",
"Cookie": "ll='108302'; bid=UNCukvvPqR8; __pk_id.100001.4cf6=ba0528c5b4d17da0.1704161561.; __yadk_uid=NAiBkhl9crL0ghh6yzHiXUPk"
}
# 获取豆瓣浏览器的响应信息
resp = requests.get(url=url, headers=headers)
print(resp.status_code)
# print(resp.text)
# 解析网页源码
bs = BeautifulSoup(resp.content, "lxml")
# 先找到所有的div class='comment-item'
divs = bs.find_all("div", attrs={"class": "comment-item"})
# print(divs)
for item in divs:
# 当前的item是每个昵称下的评论块
review = item.find("span", attrs={"class": "short"}).text
print("*********************************")
print(review)
nickname = item.find("span", attrs={"class": "comment-info"}).find("a").text
# 输出昵称
print(nickname)
score_tag = item.find("span", attrs={"class": "rating"})
if score_tag:
score = score_tag['title']
print(score)
else:
print("No rating found")
code5 爬虫爬多页
# 获取请求,给远程服务器发送信息 获取数据
import requests
# 网页都是标签组成的,标签之间有嵌套,BeautifulSoup把标签层级化,可以通过内部方法,找到标签属性,和文字
from bs4 import BeautifulSoup
import lxml
import time
# 浏览器头部伪装
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Host": "movie.douban.com",
"Cookie": "ll='108302'; bid=UNCukvvPqR8; __pk_id.100001.4cf6=ba0528c5b4d17da0.1704161561.; __yadk_uid=NAiBkhl9crL0ghh6yzHiXUPk"
}
# 装载所有数据的列表
list = []
for i in range(10):
try:
time.sleep(1)
url = f"https://movie.douban.com/subject/36208094/comments?start={i * 20}&limit=20&status=P&sort=new_score"
# 获取豆瓣浏览器的响应信息
resp = requests.get(url=url, headers=headers)
print(resp.status_code)
# print(resp.text)
# 解析网页源码
bs = BeautifulSoup(resp.content, "lxml")
# 先找到所有的div class='comment-item'
divs = bs.find_all("div", attrs={"class": "comment-item"})
# print(divs)
for item in divs:
dict1 = {}
# 当前的item是每个昵称下的评论块
review = item.find("span", attrs={"class": "short"}).text
print("*********************************")
print(review)
nickname = item.find("span", attrs={"class": "comment-info"}).find("a").text
# 输出昵称
print(nickname)
score_tag = item.find("span", attrs={"class": "rating"})
if score_tag:
score = score_tag['title']
print(score)
else:
score = "No rating found"
dict1['nickname'] = nickname
dict1['review'] = review
dict1['score'] = score
list.append(dict1)
except Exception as e:
print("出现了异常了", e)
# 这条忽略去下一条
continue
import csv
print("总数据是", list)
# 爬取数据,并且将数据写入到第二十条.csv
with open("第二十条.csv", "w", newline="", encoding="utf-8") as f:
# 获取写入器
fwrite = csv.DictWriter(f, fieldnames=["nickname", "review", "score"])
fwrite.writerows(list)
code6 词云1
#pandas做逻辑清洗
import pandas as pd
from wordcloud import WordCloud
#将彩色图片转化为数字,numpy进行矩阵运算
import numpy as np
#Image.open可以将图片导入到内存当中
from PIL import Image
#测试图片导入python内存中
image = np.array(Image.open("image/111.jpg"))
#print(image)
#通过pandas直接读取csv文件
data = pd.read_csv('第二十条.csv', names=['nickname','review','score'])
#将所有的评论放到一个列表
listCom1 = data['review'].tolist()
#可以将词语中的主谓宾提炼出来
import jieba
#切分词云,join(listCom1)将列表转化为字符串
listCom2 = jieba.lcut(",".join(listCom1))
#strWord = "".join(listCom2)
list2 = []
for item in listCom2:
#print(item)
if len(item) > 1: # 只保留长度大于1的词
list2.append(item)
#统计哪些词语出现的频率比较高
import collections
#统计各个词语频率
word_fre = collections.Counter(list2)
print(word_fre)
# 生成词云
wc = WordCloud(
background_color='white',
mask=image,
font_path='font/SourceHanSansHWSC-Bold.otf', # 设置中文字体
max_words=1000, # 最多显示词数
max_font_size=300 # 最大字号
)
wc.generate_from_frequencies(word_fre)
wc.to_file('词云.jpg') # 保存词云图片
code7 词云2
去掉语气词
#pandas做逻辑清洗
import pandas as pd
from wordcloud import WordCloud
#将彩色图片转化为数字,numpy进行矩阵运算
import numpy as np
#Image.open可以将图片导入到内存当中
from PIL import Image
#测试图片导入python内存中
image = np.array(Image.open("image/111.jpg"))
#print(image)
#通过pandas直接读取csv文件
data = pd.read_csv('第二十条.csv', names=['nickname','review','score'])
#将所有的评论放到一个列表
listCom1 = data['review'].tolist()
#可以将词语中的主谓宾提炼出来
import jieba
#切分词云,join(listCom1)将列表转化为字符串
listCom2 = jieba.lcut(",".join(listCom1))
#strWord = "".join(listCom2)
list2 = []
for item in listCom2:
#print(item)
if len(item) > 1: # 只保留长度大于1的词
list2.append(item)
#统计哪些词语出现的频率比较高
import collections
#统计各个词语频率
list3= [ ]
filter_word = [',', '的', '。', '了', '是', '我', '很', '在', '', '电影']
for word in list2:
if word in filter_word:
continue
if len(word) > 1:
list3.append(word)
word_fre = collections.Counter(list3)
print(word_fre)
# 生成词云
# 生成词云
wc = WordCloud(
background_color='white',
mask=image,
font_path='font/SourceHanSansHWSC-Bold.otf', # 设置中文字体
max_words=1000, # 最多显示词数
max_font_size=300 # 最大字号
)
wc.generate_from_frequencies(word_fre)
wc.to_file('第二十条词云.jpg') # 保存词云图片
code 8 flask
from flask import Flask, render_template, request
app = Flask(__name__)
# 默认路径的处理函数(路由)
@app.route("/")
def home():
return "<h1 style='color:red'>今天天气有点热,吃不进饭</h1>"
@app.route("/index")
def index():
return render_template('index.html')
# 启动 Flask 服务器
if __name__ == "__main__":
app.run(port=9999, debug=True)
code 9 index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
<style>
img {
width:600px;
height:500px;
position: absolute;
top:0;
right:0;
bottom:0;
left:0;
margin: auto
}
</style>
</head>
<body>
<img src="../static/img/第二十条词云.jpg"/>
</body>
</html>
code 10 复制下文件
压缩包解压拖过去就可以
app.py
from flask import Flask, render_template, request
app = Flask(__name__)
# 默认路径的处理函数(路由)
@app.route("/")
def home():
return render_template("index.html")
@app.route("/welcome")
def welcome():
return render_template("welcome.html")
@app.route("/wordcloud")
def wordcloud():
# Assuming wordcloud.html is a valid template
return render_template("wordcloud.html")
# 启动 Flask 服务器
if __name__ == "__main__":
app.run(port=9999, debug=True)
fix:
wordcloud_html:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
<style>
img {
width:600px;
height:500px;
position: absolute;
top:0;
right:0;
bottom:0;
left:0;
margin: auto
}
</style>
</head>
<body>
<img src="../static/img/第二十条词云.jpg"/>
</body>
</html>
code11散点图
app.py新增
from pyecharts.charts import Scatter
import pyecharts.options as opts
@app.route("/scatter")
def scatter():
# 读取 CSV 文件
data = pd.read_csv("static/qingdao.csv",encoding='gbk')
# 删除无用的列
data = data.drop("Unnamed: 0", axis=1)
# 获取面积和对应价格的列表
area_list = data['houseSize'].to_list()
price_list = data['total_price'].to_list()
# 创建散点图
scatter_plot = (
Scatter()
.add_xaxis(xaxis_data=area_list)
.add_yaxis(
series_name="青岛二手房面积价格散点图",
y_axis=price_list,
symbol_size=2,
label_opts=opts.LabelOpts(is_show=False),
)
.set_series_opts()
.set_global_opts(
xaxis_opts=opts.AxisOpts(
type_="value", name="面积/m2", splitline_opts=opts.SplitLineOpts(is_show=True)
),
yaxis_opts=opts.AxisOpts(
type_="value",
name="总价/万",
axistick_opts=opts.AxisTickOpts(is_show=True),
splitline_opts=opts.SplitLineOpts(is_show=True),
),
tooltip_opts=opts.TooltipOpts(is_show=False),
)
.render("templates/scatter.html")
)
return render_template("scatter.html")
散点图
code12 map
新增
from pyecharts.charts import Map
@app.route("/map")
def map():
data = pd.read_csv("static/qingdao.csv",encoding='gbk')
data = data.drop("Unnamed: 0", axis=1)
data = data.drop(index=0)
data_grouped = data.groupby('area')['unite_price'].mean().reset_index()
result = [[value['area'], round(value['unite_price'] / 10000, 1)] for index, value in data_grouped.iterrows()]
result[0][0] = '即墨市'
result[1][0] = '城阳区'
result[2][0] = '崂山区'
result[3][0] = '市北区'
result[4][0] = '市南区'
result[5][0] = '平度市'
result[6][0] = '李沧区'
result[7][0] = '胶州市'
result[8][0] = '莱西市'
result[9][0] = '黄岛区'
c = (
Map()
.add("青岛二手房均价", result, "青岛")
.set_global_opts(
title_opts=opts.TitleOpts(title="青岛地图"),
visualmap_opts=opts.VisualMapOpts(min_=0, max_=4, is_piecewise=True),
)
.render("templates/map.html")
)
return render_template("map.html")
Code13 Pie
from pyecharts.charts import Pie
@app.route("/pie")
def pie():
data = pd.read_csv("static/qingdao.csv",encoding='gbk')
data = data.drop("Unnamed: 0", axis=1)
temp = data.groupby("houseType")['area'].count().reset_index()
list1 = [(value['houseType'], value['area']) for index, value in temp.iterrows()]
list1 = sorted(list1, key=lambda x: x[1], reverse=True)[:10]
c = (
Pie()
.add("", list1, radius=["30%", "75%"], center=["25%", "50%"], rosetype="radius",
label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(title_opts=opts.TitleOpts(title="房屋类型数量", pos_top=80))
.render("templates/pie.html")
)
return render_template("pie.html")
Code14 bar
from pyecharts.charts import Scatter, Bar
@app.route("/bar")
def bar():
data =pd.read_csv("static/qingdao.csv",encoding="gbk")
#删除无用的列
data=data.drop("Unnamed: 0",axis=1)
#print(data['position'].head(50))
temp = data.groupby("position")["unite_price"].agg(['mean','count']).reset_index()
#print(temp)
result1=[ (value["position"],round(value['mean']/10000,1)) if value['count']>=3 else (0,0) for _,value in temp.iterrows()]
result1=sorted(result1,key=lambda x:x[1],reverse=True)[:10]
#print(result1)
c = (
Bar()
.add_xaxis([ item[0] for item in result1][::-1])
.add_yaxis("二手房均价", [item[1] for item in result1][::-1])
.reversal_axis()
.set_series_opts(label_opts=opts.LabelOpts(position="right"))
.set_global_opts(title_opts=opts.TitleOpts(title="青岛二手房均价最高的几个小区"),
tooltip_opts=opts.TooltipOpts(
formatter="{b}:{c}万元"
)
)
.render("templates/bar.html")
)
return render_template("bar.html")