(Tensorflow1.0)强化学习实现游戏AI(Demo_1)
2017-02-17 18:14
731 查看
http://blog.topspeedsnail.com/archives/10459
在学习完这篇文章好,打算循序渐进的实现俄罗斯方块AI和斗地主AI,并且突破DQN,使用对抗神经网络来实现更强大的AI
下面代码实现的是上面博客的程序,发现了tensorflow1.0后的一个坑,tf.mul函数改名了,tf,multiply....改成全拼了
# -*- coding: utf-8 -*- import pygame from pygame.locals import * import sys import tensorflow as tf import cv2 import random import numpy as np from collections import deque print (tf.__version__) BLACK = (0 ,0 ,0 ) WHITE = (255,255,255) SCREEN_SIZE = [320,400] BAR_SIZE = [20, 5] BALL_SIZE = [15, 15] MOVE_STAY=[1,0,0] MOVE_LEFT=[0,1,0] MOVE_RIGHT=[0,0,1] LEARN_RATE=0.99 INIT_ESPTION=1.0 FINAL_ESPTION=0.05 EXPLORE=50000 OBSERVE=5000 REPLAY_MEMORY=500000 BATCH=100 class Game(object): def __init__(self): pygame.init() self.clock = pygame.time.Clock() self.screen = pygame.display.set_mode(SCREEN_SIZE) pygame.display.set_caption('Simple Game') self.ball_pos_x = SCREEN_SIZE[0]//2 - BALL_SIZE[0]/2 self.ball_pos_y = SCREEN_SIZE[1]//2 - BALL_SIZE[1]/2 # ball移动方向 self.ball_dir_x = -1 # -1 = left 1 = right self.ball_dir_y = -1 # -1 = up 1 = down self.ball_pos = pygame.Rect(self.ball_pos_x, self.ball_pos_y, BALL_SIZE[0], BALL_SIZE[1]) self.score = 0 self.bar_pos_x = SCREEN_SIZE[0]//2-BAR_SIZE[0]//2 self.bar_pos = pygame.Rect(self.bar_pos_x, SCREEN_SIZE[1]-BAR_SIZE[1], BAR_SIZE[0], BAR_SIZE[1]) def bar_move_left(self): self.bar_pos_x = self.bar_pos_x - 2 def bar_move_right(self): self.bar_pos_x = self.bar_pos_x + 2 def run(self,action): # pygame.mouse.set_visible(0) # make cursor invisible # bar_move_left = False #bar_move_right = False while True: ''' for event in pygame.event.get(): if event.type == QUIT: pygame.quit() sys.exit() elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 1: # 鼠标左键按下(左移) bar_move_left = True elif event.type == pygame.MOUSEBUTTONUP and event.button == 1: # 鼠标左键释放 bar_move_left = False elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 3: #右键 bar_move_right = True elif event.type == pygame.MOUSEBUTTONUP and event.button == 3: bar_move_right = False ''' if action == MOVE_LEFT: self.bar_move_left() if action==MOVE_RIGHT: self.bar_move_right() else : pass if self.bar_pos_x<0: self.bar_pos_x=0 if self.bar_pos_x>SCREEN_SIZE[0]-BAR_SIZE[0]: self.bar_pos_x=SCREEN_SIZE[0]-BAR_SIZE[0] self.screen.fill(BLACK) self.bar_pos.left = self.bar_pos_x pygame.draw.rect(self.screen, WHITE, self.bar_pos) self.ball_pos.left += self.ball_dir_x * 2 self.ball_pos.bottom += self.ball_dir_y * 3 pygame.draw.rect(self.screen, WHITE, self.ball_pos) if self.ball_pos.top <= 0 or self.ball_pos.bottom >= (SCREEN_SIZE[1] - BAR_SIZE[1]+1): self.ball_dir_y = self.ball_dir_y * -1 if self.ball_pos.left <= 0 or self.ball_pos.right >= (SCREEN_SIZE[0]): self.ball_dir_x = self.ball_dir_x * -1 reward=0 if self.bar_pos.top <= self.ball_pos.bottom and (self.bar_pos.left < self.ball_pos.right and self.bar_pos.right > self.ball_pos.left): self.score += 1 reward=1 print("Score: ", self.score, end='\r') elif self.bar_pos.top <= self.ball_pos.bottom and (self.bar_pos.left > self.ball_pos.right or self.bar_pos.right < self.ball_pos.left): self.score=0 print("Game Over: ", self.score) reward=-1 pygame.display.update() self.clock.tick(60) MyGame_image=pygame.surfarray.array3d(pygame.display.get_surface()) return reward ,MyGame_image output=3 input_image=tf.placeholder("float", [None,80,100,4]) action=tf.placeholder("float",[None,3]) def convolutional_neural_network(input_image): weights = {'w_conv1':tf.Variable(tf.zeros([8, 8, 4, 32])), 'w_conv2':tf.Variable(tf.zeros([4, 4, 32, 64])), 'w_conv3':tf.Variable(tf.zeros([3, 3, 64, 64])), 'w_fc4':tf.Variable(tf.zeros([3456, 784])), 'w_out':tf.Variable(tf.zeros([784, output]))} biases = {'b_conv1':tf.Variable(tf.zeros([32])), 'b_conv2':tf.Variable(tf.zeros([64])), 'b_conv3':tf.Variable(tf.zeros([64])), 'b_fc4':tf.Variable(tf.zeros([784])), 'b_out':tf.Variable(tf.zeros([output]))} conv1 = tf.nn.relu(tf.nn.conv2d(input_image, weights['w_conv1'], strides = [1, 4, 4, 1], padding = "VALID") + biases['b_conv1']) conv2 = tf.nn.relu(tf.nn.conv2d(conv1, weights['w_conv2'], strides = [1, 2, 2, 1], padding = "VALID") + biases['b_conv2']) conv3 = tf.nn.relu(tf.nn.conv2d(conv2, weights['w_conv3'], strides = [1, 1, 1, 1], padding = "VALID") + biases['b_conv3']) conv3_flat = tf.reshape(conv3, [-1, 3456]) fc4 = tf.nn.relu(tf.matmul(conv3_flat, weights['w_fc4']) + biases['b_fc4']) output_layer = tf.matmul(fc4, weights['w_out']) + biases['b_out'] return output_layer def train_neural_network(imput_image): predict_action=convolutional_neural_network(input_image) argmax=tf.placeholder("float",[None,output]) gt=tf.placeholder("float",[None]) action = tf.reduce_sum(tf.multiply(predict_action, argmax), reduction_indices = 1) cost=tf.reduce_mean(tf.square(action-gt)) optimizer=tf.train.AdadeltaOptimizer(1e-6).minimize(cost) game=Game() D=deque() _,image=game.run(MOVE_STAY) image=cv2.cvtColor(cv2.resize(image,(100,80)),cv2.COLOR_BGR2GRAY) ret,image=cv2.threshold(image,1,255,cv2.THRESH_BINARY) input_image_data=np.stack((image,image,image,image),axis=2) with tf.Session() as sess: sess.run(tf.initialize_all_variables()) saver=tf.train.Saver() n=0 epsilon=INIT_ESPTION while True: action_t=predict_action.eval(feed_dict = {input_image : [input_image_data]})[0] argmax_t=np.zeros([output],dtype=np.int) if(random.random()<=INIT_ESPTION): maxIndex=random.randrange(output) else: maxIndex=np.armax(action_t) argmax_t[maxIndex]=1 if epsilon>FINAL_ESPTION: epsilon-=(INIT_ESPTION-FINAL_ESPTION)/EXPLORE reward,image=game.run(list(argmax_t)) image = cv2.cvtColor(cv2.resize(image, (100, 80)), cv2.COLOR_BGR2GRAY) ret,image=cv2.threshold(image,1,255,cv2.THRESH_BINARY) image=np.reshape(image,(80,100,1)) input_image_datal=np.append(image,input_image_data[:,:,0:3],axis=2) D.append((input_image_data,argmax_t,reward,input_image_datal)) if len(D) >REPLAY_MEMORY: D.popleft() if n >OBSERVE: minibatch=random.sample(D,BATCH) input_image_data_batch=[d[0] for d in minibatch] argmax_batch=[d[1] for d in minibatch] reward_batch=[d[2] for d in minibatch] input_image_data1_batch=[d[3] for d in minibatch] gt_batch=[] out_batch=predict_action.eval(feed_dict={input_image:input_image_data1_batch}) for i in range(0,len(minibatch)): gt_batch.append(reward_batch[i]+LEARN_RATE*np.max(out_batch[i])) optimizer.run(feed_dict={gt:gt_batch,argmax:argmax_batch,input_image:input_image_data_batch}) input_image_data=input_image_datal n=n+1 if n% 10000 ==0: saver.save(sess,'C:\\Users\\hasee\\game.cpk',global_step=n) print(n,"epsilon:",epsilon," ","action:",maxIndex," ","reward: ",reward) train_neural_network(input_image)
相关文章推荐
- 深度强化学习入门:用TensorFlow构建你的第一个游戏AI
- 深入浅出的强化学习笔记(二)——使用OpenAI Gym实现游戏AI
- tensorflow41《TensorFlow实战》笔记-08-02 TensorFlow实现深度强化学习-估值网络 code
- TensorFlow实战13:实现策略网络(强化学习一)
- Tensorflow实现策略网络(深度强化学习一)
- DeepMind推出最新强化学习环境「Gridworlds」,剑指AI安全 | 附论文&代码实现
- android学习之五子棋游戏demo的实现
- AI大事件 | 李飞飞接受访谈, TensorFlow: 发布 1.4.0,深度强化学习Bootcamp-视频讲座&实验
- 【强化学习实战】基于gym和tensorflow的强化学习算法实现
- 打游戏时领悟了“向死而生”,这个AI算法真的不虚强化学习
- Tensorflow实例:实现深度强化学习--策略网络
- Tensorflow实战学习(三十七)【实现强化学习策略网络】
- 深度强化学习实战:Tensorflow实现DDPG - PaperWeekly 第48期
- tensorflow40《TensorFlow实战》笔记-08-01 TensorFlow实现深度强化学习-策略网络 code
- 游戏编程入门学习笔记26——AI篇——AI的实现
- 实现一个深度强化学习的Demo
- C#入门学习-----推箱子游戏(WPF技术实现)
- cocos2d-x学习笔记(一)塔防类游戏道具系统开发(上)定时炸弹和轰炸的实现
- 使用行为树(Behavior Tree)实现游戏AI(转)
- Cocos2d-x学习(十一):用cocos2d-x实现MoonWarriors(游戏场景切换和主菜单实现)