您的位置：首页 > 编程语言 > Python开发

python学习笔记之文件操作及词频统计

2019-07-19 18:59 417 查看

原文链接：http://www.cnblogs.com/XuChengNotes/p/11215227.html

文件处理
文件词频统计

上节课回顾

函数
```
def func(num):
return 1
```
[/code]

集合

s = set()
print(type(s))
# 交集： & 并集: | 差集： - 补集：^

[/code] 集： & 并集: | 差集： - 补集：^

序列类型：字符串/元组/列表,因为他们都有索引

lt = [1,3,4,5,2,100,154]
lt.sort()
print(lt)
lt.reverse()
print(lt)

[/code]

文件处理

写方式打开文件

fd = open(r"E:\python_Program\day06\hamlet.txt","w")  # 以写方式打开文件
data = fd.write('234234234')  # 写入数据
print(data)
fd.close()    # 关闭资源

[/code]

读方式打开文件

# 以读方式打开文件，并设置字符集解码
fd = open(r"E:\python_Program\day06\threekingdoms.txt","r", encoding="utf8")
data = fd.read()
print(data)
fd.close()

[/code]

文件词频统计

英文词频统计

fd = open(r"E:\python_Program\day06\hamlet.txt","r", encoding="UTF-8")
data = fd.read().lower()
for ch in "!#$%&()*+,-./:;<=>?@[\\]^_‘{|}~": # 找这里的符号并替换
data = data.replace(ch, " ")    # 将符号都替换成空格

words = data.split(" ") # 分割
count_dict = {} # 声明字典
# print(words)
for word in words:
if word not in count_dict:
count_dict[word] = 1    # 没有则 添加字典数据 {key=word,value=1}
else:
count_dict[word] += 1   # 有则 更新数据 {key=word,value+# =1}

lt = list(count_dict.items())   # 将字典的键值对转换成列表

def func(i):
return i[1]
lt.sort(key=func)
lt.reverse()
for i in lt[:10]:
print(f"[{i[0]:^5},{i[1]:^5}]")
fd.close()

[/code]

中文词频统计

f = open(r"E:\python_Program\day06\threekingdoms.txt","r", encoding="UTF-8")
data = f.read()

data_dict = {}
badword = {"军马","大喜","丞相","却说","不可","二人","如此","商议","如何","主公","将军","不能","荆州","军士","左右","引兵","次日"}    # 脏数据
import jieba
words = jieba.lcut(data)    # 通过jieba库进行分词
for word in words:
if len(word)==1 or word in badword :
continue
if "曰" in word:
word = word.replace("曰","")
if word not in data_dict:
data_dict[word] = 1
else:
data_dict[word] += 1

list_data = list(data_dict.items()) # 拿到数据的集合转成列表
def func(i):        # 通过i拿到列表中的每个元祖
return i[1]     # 返回元祖中的索引为1的值
list_data.sort(key=func)    # 绑定函数，根据函数返回值进行排序
list_data.reverse()     #  逆置排序

for i in list_data[:10]:    # 输出出现前10的高频词汇
print(f"{i[0]: >3},{i[1]: <5}")

f.close()

[/code]

作业

# 作业
# 1.定义一个函数，该函数可以实现在内部输入一个信息，如何该信息不能转换为正整数，则重新输入，直到能转换为正整数，则对外返回转换的正整数
# 2.定义一个函数，该函数可以实现在内部输入一个信息，如何该信息不能转换为负整数，则重新输入，直到能转换为负整数，则对外返回转换的负整数
# 3.定义一个函数，实现外界传入一个信息，内部判断是否能被转换为正整数，返回True | Flase信息
# 4.定义一个函数，实现外界传入一个信息，内部判断是否能被转换为负整数，返回True | Flase信息
# 5.定义一个函数，实现传入一个整型数字，判断并直接打印该数字是否是奇数还是偶数
# 6.写函数，检查传入列表的长度，如果大于2，那么仅保留前两个长度的内容，并将新内容返回给调用者。
# 7.写函数，检查获取传入列表或元组对象的所有奇数位索引对应的元素，并将其作为新列表返回给调用者。
# 8.定义一个函数，只要传入 "k1:v1,...,kn:vn" 这样固定格式的字符串，都可以将其转换为 {'k1':'v1',...,'kn':'vn'}这样的字典并返回
# 9.简单购物车,要求如下 (可以用函数也可以不用)：
# 要求： 实现打印商品详细信息，用户输入商品名和购买个数，则将商品名，价格，购买个数加入购物列表，如果输入为空或其他非法输入则要求用户重新输入,购买成功后打印添加到购物车里的信息.

# 作业1：
def input_positiveinteger():
while 1:
num = input("输入一个值：")
if num.startswith(".") or num.endswith(".") or num.count(".") > 1:
continue
if "." in num:
num = abs(float(num))
if int(num) != 0:
break
else:
continue
num = num.replace("-", "0")
if num.isdigit():
num = abs(int(num))
if int(num) != 0:
break
return num

# 作业2：
def input_negativeinteger():
while 1:
num = input("输入一个值：")
if num.startswith(".") or num.endswith(".") or num.count(".") > 1:
continue
if "." in num:
num = float(num)*-1
if int(num) != 0:
break
else:
continue
num = num.replace("-", "0")
if num.isdigit():
num = int(num)*-1
if int(num) != 0:
break
return num

# 作业3：
def is_positiveinteger(num):
num = str(num)
if num.startswith(".") or num.endswith(".") or num.count(".") > 1:
return "False"
if "." in num:
num = abs(float(num))
if int(num) != 0:
return "True"
else:
return "False"
num = num.replace("-", "0")
if num.isdigit():
num = abs(int(num))
if int(num) != 0:
return "True"

# 作业4：
def is_negativeinteger(num):
num = str(num)
if num.startswith(".") or num.endswith(".") or num.count(".") > 1:
return "False"
if "." in num:
num = float(num) * -1
if int(num) != 0:
return "True"
else:
return "False"
num = num.replace("-", "0")
if num.isdigit():
num = int(num) * -1
if int(num) != 0:
return "True"

# 作业5：
def is_oddeven(num):
if int(num) % 2 ==0:
print("偶数")
else:
print("奇数")

# 作业6：
def set_list(list):
if len(list)>2:
temp = list[:2]
return temp
return list

# 作业7：
def set_oddlist(list):
temp=[]
for i in range(len(list)):
if i % 2 ==1:
temp.append(list[i])
else:
continue
return temp

# 作业8：
def set_dict(nums):
dict_count = {}
list = str(nums).split(",") # 切割字符串
for i in list:
temp = i.split(":") # 再次切割字符串
dict_count[temp[0]] = temp[1]   # 存入字典
return dict_count

# 作业9：
def shopping_list():
list_count = []
while 1:
msg_dic = {
'apple': 10,
'tesla': 100000,
'mac': 3000,
'lenovo': 30000,
'chicken': 10,
}
for i in msg_dic.items():
print(f"name:{i[0]} price:{i[1]}")
while 1:
name = input("商品>>:")
number = input("个数>>:")
if name == "quit" or number=="quit":
return
if name!="" and msg_dic.get(name) != None:
break
list_count.append((name, msg_dic.get(name),number))
print(list_count)

[/code]

转载于:https://www.cnblogs.com/XuChengNotes/p/11215227.html

3ff7

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航