第十周实训

部分内容

:::success
Yes :tada:
:::

第一个代码

# 单行注释
'''
# 单行注释
'''
str="我是一只来自北方的狼"
bnum=True
#复赋
com=2+1j
print(type(com))

if type(com)==complex:
    print("我是复数来着")

#列表    
list1 = [1,3,4,True,com,str]
for item in list1:
    print(item,"***********************\n")
    
list1.append("哈哈哈 我是卖报的小行家")
#删除第一项
del list1[0]
print(list1)
#自增运算法,数据不能跨越类型,可以进行拼接
tub1 = (1,2,3,4,5,6,7)
tub2 = ("a","b","c","d","e","f","g")
#tub = tub1+tub2
#tub1[0]=888
#print(tub)
[(1,"a"), (2,"b"), (3,"c")]

list1 = [ item  for item in zip(tub1,tub2)]  
print(list1)

# 单行注释
'''
# 单行注释
'''
str = "我是一只来自北方的狼"
bnum = True
# 复赋
com = 2 + 1j
print(type(com))

if type(com) == complex:
    print("我是复数来着")

# 列表
list1 = [1, 3, 4, True, com, str]
for item in list1:
    print(item, "***********************\n")

list1.append("哈哈哈 我是卖报的小行家")
# 删除第一项
del list1[0]
print(list1)
# 自增运算法,数据不能跨越类型,可以进行拼接
tub1 = (1, 2, 3, 4, 5, 6, 7)
tub2 = ("a", "b", "c", "d", "e", "f", "g")
# tub = tub1+tub2
# tub1[0]=888
# print(tub)
[(1, "a"), (2, "b"), (3, "c")]

list1 = [item for item in zip(tub1, tub2)]
print(list1)

list1 = [ item for item in zip(tub1,tub2)]

# 序列 作业
list2 = sorted(list1,key=lambda x:x[0],reverse=False)
print(list2)
#字典 使用对应Java Map类似  属性:属性值
dict1={
    "name":"古他那黑暗之神赵四",
    "age":20,
    "hobby":"亚洲舞王之炸雷"
}

print(list(dict1.keys()))
print(list(dict1.values()))
#集合
lis3  =[{
    'name':'尼古拉嘶赵四',
    'age':60,
    "hobby":"亚洲舞王之炸雷"

},{
    "name":"最强最强妖王谢广坤",
    "age":62,
    "hobby":"作妖",
},{
    "name":"迈克尔刘能",
    "age":60,
    "hobby":"烫头"}
]

import csv
#将数据库写入到表格
#写入字符串 模式 wb写入字节码 ,会覆盖原来的数据
#a 追加模式,念在末尾添加的数据,不会覆盖
#r读入模式 rb读入学节码
with open("infor.csv","w",newline="",encoding="utf-8") as f:
  #获取写入器
  write = csv.DictWriter(f,fieldnames=["name","age","hobby"])
  write.writerows(lis3)

with open("infor.csv","r",encoding="utf-8") as f:
  fread = csv.DictReader(f,fieldnames=["name","age","hobby"])
  print("*****************************************")
  for item in fread:
    print(item)

第二个代码

def difNum(list1):
    for item1 in list1:
        for item2 in list1:
            if item1 != item2:
                list2.append("%d%d" % (item1, item2))

    for item3 in list2:
        if item3 not in list3:
            list3.append(item3)

    print(list3)


# 构建一组数,一个数的两位不重复,数之间也不重复
list1 = [1, 2, 3, 4, 6, 7, 8, 8, 9, 9, 5, 3]
list2 = []  # 用来装载中间变量的列表
list3 = []  # 最终去除重复的列表
difNum(sorted(list1))

code3 类继承

class Person:
    name="人类"
    age=20

    # 初始化函数
    # 父类独有的属性子类不能继承
    __priMoney=5000
    def __init__(self):
        self.name = "小明"
        self.age = 40

    #动态函数
    def MyPrint(self):
        print(self.name,f"{self.age}了,大龄剩男",)
        return "搞钱吧,别想别的,你已经不是充钱的少年了"

    #构析函数
    def __del__(self):
        print("end_Person_class")

class Mother:
    def fly(self):
        print("会飞翔")

class Child(Person,Mother):
    name = "小强"
    age = 18
    # def __init__(self):
    #    self.name = "小量"
    #    self.age = 17
    __priMoney = 1000
    # 定义类的函数
    def SayHello(self):
        print("你好,我是{0},我今年{1}岁了".format(self.name, self.age))
    def MyPrint(self):
        print("我是子类的输出函数")
        print(self.name, "{}了,人够男的".format(self.age))
        return "愉快吧,别想别的,你已经不是充钱的少年了"

    def priv_money(self):
        print("私房钱剩余",self.__priMoney)

pass

if __name__=="__main__":
    cc = Child()
    cc.SayHello()
    cc.MyPrint()
    cc.fly()
    cc.priv_money()

code4 豆瓣爬虫

镜像源

快下但是好像pycharm不是很喜欢。卡卡的

北外(ustc跳转)镜像源:https://mirrors.ustc.edu.cn/pypi/web/simple

或者直接通过pip install requests beautifulsoup4 bs4 -i https://mirrors.ustc.edu.cn/pypi/web/simple安装

代码

# 获取请求,给远程服务器发送信息 获取数据
import requests
# 网页都是标签组成的,标签之间有嵌套,BeautifulSoup把标签层级化,可以通过内部方法,找到标签属性,和文字
from bs4 import BeautifulSoup
import lxml
import time

url = 'https://movie.douban.com/subject/36208094/comments?status=P'

# 浏览器头部伪装
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Host": "movie.douban.com",
    "Cookie": "ll='108302'; bid=UNCukvvPqR8; __pk_id.100001.4cf6=ba0528c5b4d17da0.1704161561.; __yadk_uid=NAiBkhl9crL0ghh6yzHiXUPk"
}

# 获取豆瓣浏览器的响应信息
resp = requests.get(url=url, headers=headers)
print(resp.status_code)
# print(resp.text)

# 解析网页源码
bs = BeautifulSoup(resp.content, "lxml")

# 先找到所有的div class='comment-item'
divs = bs.find_all("div", attrs={"class": "comment-item"})
# print(divs)

for item in divs:
    # 当前的item是每个昵称下的评论块
    review = item.find("span", attrs={"class": "short"}).text
    print("*********************************")
    print(review)

    nickname = item.find("span", attrs={"class": "comment-info"}).find("a").text
    # 输出昵称
    print(nickname)

    score_tag = item.find("span", attrs={"class": "rating"})
    if score_tag:
        score = score_tag['title']
        print(score)
    else:
        print("No rating found")

code5 爬虫爬多页


# 获取请求,给远程服务器发送信息 获取数据
import requests
# 网页都是标签组成的,标签之间有嵌套,BeautifulSoup把标签层级化,可以通过内部方法,找到标签属性,和文字
from bs4 import BeautifulSoup
import lxml
import time

# 浏览器头部伪装
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Host": "movie.douban.com",
    "Cookie": "ll='108302'; bid=UNCukvvPqR8; __pk_id.100001.4cf6=ba0528c5b4d17da0.1704161561.; __yadk_uid=NAiBkhl9crL0ghh6yzHiXUPk"
}

# 装载所有数据的列表
list = []

for i in range(10):
    try:
        time.sleep(1)
        url = f"https://movie.douban.com/subject/36208094/comments?start={i * 20}&limit=20&status=P&sort=new_score"
        # 获取豆瓣浏览器的响应信息
        resp = requests.get(url=url, headers=headers)
        print(resp.status_code)
        # print(resp.text)

        # 解析网页源码
        bs = BeautifulSoup(resp.content, "lxml")
        # 先找到所有的div class='comment-item'
        divs = bs.find_all("div", attrs={"class": "comment-item"})
        # print(divs)

        for item in divs:
            dict1 = {}
            # 当前的item是每个昵称下的评论块
            review = item.find("span", attrs={"class": "short"}).text
            print("*********************************")
            print(review)

            nickname = item.find("span", attrs={"class": "comment-info"}).find("a").text
            # 输出昵称
            print(nickname)

            score_tag = item.find("span", attrs={"class": "rating"})
            if score_tag:
                score = score_tag['title']
                print(score)
            else:
                score = "No rating found"

            dict1['nickname'] = nickname
            dict1['review'] = review
            dict1['score'] = score
            list.append(dict1)

    except Exception as e:
        print("出现了异常了", e)
        # 这条忽略去下一条
        continue

import csv

print("总数据是", list)

# 爬取数据,并且将数据写入到第二十条.csv
with open("第二十条.csv", "w", newline="", encoding="utf-8") as f:
    # 获取写入器
    fwrite = csv.DictWriter(f, fieldnames=["nickname", "review", "score"])
    fwrite.writerows(list)

code6 词云1

#pandas做逻辑清洗
import pandas as pd
from wordcloud import WordCloud
#将彩色图片转化为数字,numpy进行矩阵运算
import numpy as np
#Image.open可以将图片导入到内存当中
from PIL import Image

#测试图片导入python内存中
image = np.array(Image.open("image/111.jpg"))
#print(image)

#通过pandas直接读取csv文件
data = pd.read_csv('第二十条.csv', names=['nickname','review','score'])
#将所有的评论放到一个列表
listCom1 = data['review'].tolist()

#可以将词语中的主谓宾提炼出来
import jieba
#切分词云,join(listCom1)将列表转化为字符串
listCom2 = jieba.lcut(",".join(listCom1))
#strWord = "".join(listCom2)
list2 = []
for item in listCom2:
    #print(item)
    if len(item) > 1:  # 只保留长度大于1的词
        list2.append(item)

#统计哪些词语出现的频率比较高
import collections
#统计各个词语频率
word_fre = collections.Counter(list2)
print(word_fre)

# 生成词云
wc = WordCloud(
    background_color='white',
    mask=image,
    font_path='font/SourceHanSansHWSC-Bold.otf',  # 设置中文字体
    max_words=1000,  # 最多显示词数
    max_font_size=300  # 最大字号
)
wc.generate_from_frequencies(word_fre)
wc.to_file('词云.jpg')  # 保存词云图片

code7 词云2

去掉语气词

#pandas做逻辑清洗
import pandas as pd
from wordcloud import WordCloud
#将彩色图片转化为数字,numpy进行矩阵运算
import numpy as np
#Image.open可以将图片导入到内存当中
from PIL import Image

#测试图片导入python内存中
image = np.array(Image.open("image/111.jpg"))
#print(image)

#通过pandas直接读取csv文件
data = pd.read_csv('第二十条.csv', names=['nickname','review','score'])
#将所有的评论放到一个列表
listCom1 = data['review'].tolist()

#可以将词语中的主谓宾提炼出来
import jieba
#切分词云,join(listCom1)将列表转化为字符串
listCom2 = jieba.lcut(",".join(listCom1))
#strWord = "".join(listCom2)
list2 = []
for item in listCom2:
    #print(item)
    if len(item) > 1:  # 只保留长度大于1的词
        list2.append(item)

#统计哪些词语出现的频率比较高
import collections
#统计各个词语频率

list3= [ ]


filter_word = [',', '的', '。', '了', '是', '我', '很', '在', '', '电影']

for word in list2:
    if word in filter_word:
        continue
    if len(word) > 1:
        list3.append(word)
word_fre = collections.Counter(list3)
print(word_fre)

# 生成词云
# 生成词云
wc = WordCloud(
    background_color='white',
    mask=image,
    font_path='font/SourceHanSansHWSC-Bold.otf',  # 设置中文字体
    max_words=1000,  # 最多显示词数
    max_font_size=300  # 最大字号
)
wc.generate_from_frequencies(word_fre)
wc.to_file('第二十条词云.jpg')  # 保存词云图片

code 8 flask

from flask import Flask, render_template, request

app = Flask(__name__)

# 默认路径的处理函数(路由)
@app.route("/")
def home():
    return "<h1 style='color:red'>今天天气有点热,吃不进饭</h1>"

@app.route("/index")
def index():
    return render_template('index.html')

# 启动 Flask 服务器
if __name__ == "__main__":
    app.run(port=9999, debug=True)

code 9 index.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
    <style>
        img {
            width:600px;
            height:500px;
            position: absolute;
            top:0;
            right:0;
            bottom:0;
            left:0;
            margin: auto
        }
    </style>
</head>
<body>
   <img  src="../static/img/第二十条词云.jpg"/>
</body>
</html>

code 10 复制下文件

压缩包解压拖过去就可以

app.py

from flask import Flask, render_template, request

app = Flask(__name__)

# 默认路径的处理函数(路由)
@app.route("/")
def home():
    return render_template("index.html")

@app.route("/welcome")
def welcome():
    return render_template("welcome.html")

@app.route("/wordcloud")
def wordcloud():
    # Assuming wordcloud.html is a valid template
    return render_template("wordcloud.html")

# 启动 Flask 服务器
if __name__ == "__main__":
    app.run(port=9999, debug=True)

fix:

wordcloud_html:

<!DOCTYPE html>
<html lang="en">


<head>
    <meta charset="UTF-8">
    <title>Title</title>
    <style>
        img {
            width:600px;
            height:500px;
            position: absolute;
            top:0;
            right:0;
            bottom:0;
            left:0;
            margin: auto
        }
    </style>
</head>
<body>
   <img  src="../static/img/第二十条词云.jpg"/>
</body>
</html>

code11散点图

app.py新增

from pyecharts.charts import Scatter
import pyecharts.options as opts

@app.route("/scatter")
def scatter():
    # 读取 CSV 文件
    data = pd.read_csv("static/qingdao.csv",encoding='gbk')

    # 删除无用的列
    data = data.drop("Unnamed: 0", axis=1)

    # 获取面积和对应价格的列表
    area_list = data['houseSize'].to_list()
    price_list = data['total_price'].to_list()

    # 创建散点图
    scatter_plot = (
        Scatter()
        .add_xaxis(xaxis_data=area_list)
        .add_yaxis(
            series_name="青岛二手房面积价格散点图",
            y_axis=price_list,
            symbol_size=2,
            label_opts=opts.LabelOpts(is_show=False),
        )
        .set_series_opts()
        .set_global_opts(
            xaxis_opts=opts.AxisOpts(
                type_="value", name="面积/m2", splitline_opts=opts.SplitLineOpts(is_show=True)
            ),
            yaxis_opts=opts.AxisOpts(
                type_="value",
                name="总价/万",
                axistick_opts=opts.AxisTickOpts(is_show=True),
                splitline_opts=opts.SplitLineOpts(is_show=True),
            ),
            tooltip_opts=opts.TooltipOpts(is_show=False),
        )
        .render("templates/scatter.html")
    )

    return render_template("scatter.html")

散点图

code12 map

新增

from pyecharts.charts import Map
@app.route("/map")
def map():
    data = pd.read_csv("static/qingdao.csv",encoding='gbk')
    data = data.drop("Unnamed: 0", axis=1)
    data = data.drop(index=0)

    data_grouped = data.groupby('area')['unite_price'].mean().reset_index()
    result = [[value['area'], round(value['unite_price'] / 10000, 1)] for index, value in data_grouped.iterrows()]

    result[0][0] = '即墨市'
    result[1][0] = '城阳区'
    result[2][0] = '崂山区'
    result[3][0] = '市北区'
    result[4][0] = '市南区'
    result[5][0] = '平度市'
    result[6][0] = '李沧区'
    result[7][0] = '胶州市'
    result[8][0] = '莱西市'
    result[9][0] = '黄岛区'

    c = (
        Map()
        .add("青岛二手房均价", result, "青岛")
        .set_global_opts(
            title_opts=opts.TitleOpts(title="青岛地图"),
            visualmap_opts=opts.VisualMapOpts(min_=0, max_=4, is_piecewise=True),
        )
        .render("templates/map.html")
    )

    return render_template("map.html")

Code13 Pie

from pyecharts.charts import Pie
@app.route("/pie")
def pie():
    data = pd.read_csv("static/qingdao.csv",encoding='gbk')
    data = data.drop("Unnamed: 0", axis=1)

    temp = data.groupby("houseType")['area'].count().reset_index()
    list1 = [(value['houseType'], value['area']) for index, value in temp.iterrows()]

    list1 = sorted(list1, key=lambda x: x[1], reverse=True)[:10]

    c = (
        Pie()
        .add("", list1, radius=["30%", "75%"], center=["25%", "50%"], rosetype="radius",
             label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(title_opts=opts.TitleOpts(title="房屋类型数量", pos_top=80))
        .render("templates/pie.html")
    )

    return render_template("pie.html")

Code14 bar

from pyecharts.charts import Scatter, Bar
@app.route("/bar")
def bar():
    data =pd.read_csv("static/qingdao.csv",encoding="gbk")
    #删除无用的列
    data=data.drop("Unnamed: 0",axis=1)
    #print(data['position'].head(50))
    temp = data.groupby("position")["unite_price"].agg(['mean','count']).reset_index()
    #print(temp)

    result1=[ (value["position"],round(value['mean']/10000,1)) if value['count']>=3 else (0,0)  for _,value in temp.iterrows()]
    result1=sorted(result1,key=lambda x:x[1],reverse=True)[:10]
    #print(result1)
    c = (
        Bar()
        .add_xaxis([ item[0] for item in result1][::-1])
        .add_yaxis("二手房均价", [item[1] for item in result1][::-1])
        .reversal_axis()
        .set_series_opts(label_opts=opts.LabelOpts(position="right"))
        .set_global_opts(title_opts=opts.TitleOpts(title="青岛二手房均价最高的几个小区"),
                        tooltip_opts=opts.TooltipOpts(
                            formatter="{b}:{c}万元"
                        )
                        )
        .render("templates/bar.html")
    )
    return render_template("bar.html")

最后修改:2024 年 04 月 30 日
如果觉得我的文章对你有用,请随意赞赏