您的位置:首页 > 其它

记录当程序猿的日子

2020-06-01 04:36 86 查看
import json
from os import remove
from xml.etree.ElementTree import tostring
import matplotlib.pyplot as plt # plt 用于显示图片
from keras_preprocessing.image import load_img, img_to_array
from PIL import Image
from skimage import io
from config import *
import json
import numpy as np
import cv2 as cv
from tqdm import tqdm
import jieba
import pickle

# 读取函数,用来读取文件夹中的所有函数,输入参数是文件名
def read_directory(directory_name):
for filename in os.listdir(directory_name):
# print(filename)  # 仅仅是为了测试
img = cv.imread(directory_name + "/" + filename)
#####显示图片#######
#cv.imshow(filename, img)
#cv.waitKey(0)
#####################

#####保存图片#########
cv.imwrite("D://wangyang//face1" + "/" + filename, img)

class SaveJson(object):

def save_file(self, path, item):

# 先将字典对象转化为可写入文本的字符串
item = json.dumps(item)

try:
if not os.path.exists(path):
with open(path, "w", encoding='utf-8') as f:
f.write(item + ",\n")
print("^_^ write success")
else:
with open(path, "a", encoding='utf-8') as f:
f.write(item + ",\n")
print("^_^ write success")
except Exception as e:
print("write error==>", e)
if __name__ == '__main__':
# 保存的文件名
path = "F:/data_image/train_image_caption.json"
path_image ="F:/data_image"
s = SaveJson()

i = 0
train_folder = 'F:/PyCharm/WorkSpace/Image-Captioning-master\data/ai_challenger_caption_train_20170902\caption_train_images_20170902'
train_annotations_filename = 'F:/PyCharm/WorkSpace/Image-Captioning-master/data/ai_challenger_caption_train_20170902/caption_train_annotations_20170902.json'

names = [f for f in os.listdir(train_folder) if f.endswith('.jpg')]
#print(names[0])
#加载json文件
with open(train_annotations_filename, 'r') as f:
annotations = json.load(f)

#vocab = set()#set() 函数创建一个无序不重复元素集,可进行关系测试,删除重复数据,还可以计算交集、差集、并集等。
# for a in tqdm(annotations):#tqdm=210000
#    c = a["image_id"]
#    for image in (names):
#        if(image == c):
#            s = os.path.join(train_folder,image)
#            remove("s")
#            i+=1
#        else:
#             if(i== 110000):
for image in (names):
for a in tqdm(annotations):
if(image == a["image_id"]) :

trian_json = {"url": a["url"],"image_id": a["image_id"],"caption":a["caption"]}
#存储json文件
s.save_file(path, trian_json)
# #存储图片信息
filename = os.path.join(train_folder, image)
# #img = cv.imread(filename)
img = load_img(filename)  # 图片路径
img_array = img_to_array(img)  # 把图片转换成矩阵格式
cv.imwrite(path_image+ "/" + image ,img_array)
#cv.waitKey(0)
#img = cv.imread(train_folder + "/" + image)
#cv.imwrite("F:/data_image" + "/" + image, img)

i= i+ 1
print("配对成功")
else:
if(i>50):
break
else:
print("配对不成功"
# with open(filename, 'wb') as encoded_pickle:
#     pickle.dump(vocab, encoded_pickle)#序列化 (Serialization)是将对象的状态信息转换为可以存储或传输的形式的过程。

写这个程序的目的是我做深度学习的数据集太大了,然后用于改变数据集的大小,很简单的一个程序,但是在图片保存方面卡了我好几天。

filename = os.path.join(train_folder, image)
# #img = cv.imread(filename)
img = load_img(filename)  # 图片路径
img_array = img_to_array(img)  # 把图片转换成矩阵格式
cv.imwrite(path_image ,img_array)

就是这热这儿 cv.imwrite(path_image ,img_array) ,我记得我当时写的是
这样的 ,可是这样洗的话,只是写了图片的存储文件夹,没有告知图片要存储的名字,这是最大的失败,看程序以后一定要十分小心啊,
然后在这里回顾一下json的筛选和导出以及保存

class SaveJson(object):

def save_file(self, path, item):

# 先将字典对象转化为可写入文本的字符串
item = json.dumps(item)

try:
if not os.path.exists(path):
with open(path, "w", encoding='utf-8') as f:
f.write(item + ",\n")
print("^_^ write success")
else:
with open(path, "a", encoding='utf-8') as f:
f.write(item + ",\n")
print("^_^ write success")
except Exception as e:
print("write error==>", e)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: