您的位置:首页 > 其它

06.批量修改文件夹下的所有pdf文件名

2020-06-06 05:17 781 查看

批量修改文件夹下的所有PDF文件名

按照关键字修改pdf文件名为文章内的标题,该程序实现的是修改基金官网披露的定报文件pdf的文件名,因为每一个文件的标题都带有报告两个字,所以可以使用正则表达式进行匹配与修改。下面提供一个简单版与完整版。完整版只适用于基金官网披露的定报pdf,考虑的情况比较多,情况比较完善。简单版修改关键字适用于大多情况,不过会有一些文件修改不正确,属正常情况,特殊情况需要特殊分析,只是提供一种修改思想。

  1. 简单版
import pdfplumber
import re
import os

Project = 'D:\证监会基金\\'		#需要修改的文件夹
filename = []
paths = []
for filename in os.walk(Project):
try:
filename = filename[2:]
for paths in filename:
paths = paths
for path in paths:
path = Project + path
print(path)
with pdfplumber.open(path) as pdf:
page = pdf.pages[0]   # 第一页的信息
text = page.extract_text()
mm_0 = re.sub(r'\n', '', text)
print(mm_0)
mm_1 = re.sub(r'报告.*', '报告', mm_0)
begin = path
end = mm_1 + '.pdf'
print(end)
print('*'*40)
pdf.close()
try:
os.rename(begin, end)
except:
pass
except:
pass
  1. 完整版
"""
时间:2020-04-10
功能:批量修改基金定报pfd文件标题
"""
import pdfplumber
from itertools import groupby
import re
import os
from colorama import Fore, Back, Style, init

def routine(str1):
str2 = re.sub(r'报告.*', '报告', str1)
str2 = re.sub(r'\s', '', str2)
return str2

def changlist(list1, str1):
list1[0], list1[1] = list1[1], list1[0]
str1 = str1 + ''.join(list1)
return str1

def have_forlord(list1, str1, str2):
str3 = re.sub(r'(.*', str1, str2)
i = list1[-1]   # 年年度报告2019
list2 = [''.join(list(g)) for k, g in groupby(i, key=lambda x: x.isdigit())]
if len(list2) == 1:
str3 = routine(str2)
else:
str3 = changlist(list2, str3)
list3 = str3.split()
if len(list3) > 1:
i = list3[-2]
list4 = [''.join(list(g)) for k, g in groupby(i, key=lambda x: x.isdigit())]
if len(list4) > 1:
str3 = changlist(list4, list3[0])
str3 = str3 + list3[-1]
else:
str3 = ''.join(list3)
return str3

def main():
init(autoreset=True)
project = 'D:\证监会基金\\'
filename = []
paths = []
num = 0
for filename in os.walk(project):
filename = filename[2:]
for paths in filename:
paths.sort()
for path in paths:
path = project + path
print(path)
# date = re.sub(r'\s.*', '', path)
try:
with pdfplumber.open(path) as pdf:
page = pdf.pages[0]  # 第一页的信息
mm_0 = re.sub(r'\n', '', page.extract_text())
# print(mm_0)
mm_1 = re.sub(r'基金管理人.*', '', mm_0)
mm_2 = mm_1.strip()  # 去除首尾空格
year = re.findall('\d{4}', mm_2)
istop = re.findall('度报告', mm_2)
if len(istop) > 1:
mm_3 = routine(mm_2)
else:
if ' ' in mm_2:
mm_21 = re.sub(r'' + year[1] + ".*"'', year[1], mm_2)
list1 = mm_21.split()  # 以空格拆分
if (('Q' in mm_21) & ('L' in mm_21)) | (('Q' in mm_21) & ('l' in mm_21)):
mm_3 = have_forlord(list1, '(QDII-LOF)', mm_2)
elif ('L' in mm_21) | ('l' in mm_21):
mm_3 = have_forlord(list1, '(LOF)', mm_2)
elif ('F' in mm_21) | ('f' in mm_21):
mm_3 = have_forlord(list1, '(FOF)', mm_2)
elif ('Q' in mm_21) | ('q' in mm_21):
mm_3 = have_forlord(list1, '(QDII)', mm_2)
elif len(list1) >= 3:
i = list1[1]
j = list1[-1]
list2 = [''.join(list(g)) for k, g in groupby(i, key=lambda x: x.isdigit())]
list3 = [''.join(list(g)) for k, g in groupby(j, key=lambda x: x.isdigit())]
if (len(list2) == 1) & (len(list3) == 1):
mm_3 = routine(mm_2)
else:
if len(list2) > 1:
for k in range(len(list2) - 1):
list2[k], list2[k + 1] = list2[k + 1], list2[k]
list1[1] = ''.join(list2)
if len(list3) > 1:
for k in range(len(list3) - 1):
list3[k], list3[k + 1] = list3[k + 1], list3[k]
list1[-1] = ''.join(list3)
mm_3 = ''.join(list1)

elif len(list1) == 2:
i = list1[1]
list2 = [''.join(list(g)) for k, g in groupby(i, key=lambda x: x.isdigit())]
if len(list2) > 1:
mm_3 = changlist(list2, list1[0])
else:
mm_3 = routine(mm_2)
else:
mm_3 = routine(mm_2)
else:
mm_3 = routine(mm_2)

# end = date + ' ' + mm_1 + '.pdf'
end = project + mm_3 + '.pdf'
num = num + 1
print(end)
print(Fore.RED + Style.BRIGHT + '成功修改第{}个文件'.format(num))  # 高亮红色
print(Fore.CYAN + Style.BRIGHT + '*' * 79)    # 高亮青色
pdf.close()
os.rename(path, end)
except TypeError:
print(Fore.BLACK + Back.RED + "修改失败,该文件为图片格式,请手动修改,按任意键继续执行。")
print(Fore.CYAN + Style.BRIGHT + '*' * 79)  # 高亮青色
input()
continue
except:
print(Fore.BLACK + Back.RED + "该文件改名出现错误,请将该文件发送给李渊,以供优化程序,按任意键继续执行。")
print(Fore.CYAN + Style.BRIGHT + '*' * 79)  # 高亮青色
input()
continue
print(Back.CYAN + Fore.BLACK + Style.BRIGHT+"程序执行结束,按任意键退出。")
input()

if __name__ == '__main__':
main()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: