您的位置：首页 > 大数据 > 人工智能
(Tensorflow1.0)强化学习实现游戏AI(Demo_1)

2017-02-17 18:14 731 查看
http://blog.topspeedsnail.com/archives/10459

在学习完这篇文章好，打算循序渐进的实现俄罗斯方块AI和斗地主AI,并且突破DQN，使用对抗神经网络来实现更强大的AI

下面代码实现的是上面博客的程序，发现了tensorflow1.0后的一个坑，tf.mul函数改名了，tf,multiply....改成全拼了

# -*- coding: utf-8 -*-

import pygame
from pygame.locals import *
import sys
import tensorflow as tf
import cv2
import random
import numpy as np
from collections import deque
print (tf.__version__)
BLACK     = (0  ,0  ,0  )
WHITE     = (255,255,255)

SCREEN_SIZE = [320,400]
BAR_SIZE = [20, 5]
BALL_SIZE = [15, 15]
MOVE_STAY=[1,0,0]
MOVE_LEFT=[0,1,0]
MOVE_RIGHT=[0,0,1]
LEARN_RATE=0.99
INIT_ESPTION=1.0
FINAL_ESPTION=0.05
EXPLORE=50000
OBSERVE=5000
REPLAY_MEMORY=500000
BATCH=100
class Game(object):
def __init__(self):
pygame.init()
self.clock = pygame.time.Clock()
self.screen = pygame.display.set_mode(SCREEN_SIZE)
pygame.display.set_caption('Simple Game')

self.ball_pos_x = SCREEN_SIZE[0]//2 - BALL_SIZE[0]/2
self.ball_pos_y = SCREEN_SIZE[1]//2 - BALL_SIZE[1]/2
# ball移动方向
self.ball_dir_x = -1 # -1 = left 1 = right
self.ball_dir_y = -1 # -1 = up   1 = down
self.ball_pos = pygame.Rect(self.ball_pos_x, self.ball_pos_y, BALL_SIZE[0], BALL_SIZE[1])

self.score = 0
self.bar_pos_x = SCREEN_SIZE[0]//2-BAR_SIZE[0]//2
self.bar_pos = pygame.Rect(self.bar_pos_x, SCREEN_SIZE[1]-BAR_SIZE[1], BAR_SIZE[0], BAR_SIZE[1])

def bar_move_left(self):
self.bar_pos_x = self.bar_pos_x - 2
def bar_move_right(self):
self.bar_pos_x = self.bar_pos_x + 2

def run(self,action):
# pygame.mouse.set_visible(0) # make cursor invisible

# bar_move_left = False
#bar_move_right = False
while True:
''' for event in pygame.event.get():
if event.type == QUIT:
pygame.quit()
sys.exit()
elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 1:  # 鼠标左键按下(左移)
bar_move_left = True
elif event.type == pygame.MOUSEBUTTONUP and event.button == 1: # 鼠标左键释放
bar_move_left = False
elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 3: #右键
bar_move_right = True
elif event.type == pygame.MOUSEBUTTONUP and event.button == 3:
bar_move_right = False
'''
if action == MOVE_LEFT:
self.bar_move_left()
if action==MOVE_RIGHT:
self.bar_move_right()
else :
pass
if self.bar_pos_x<0:
self.bar_pos_x=0

if self.bar_pos_x>SCREEN_SIZE[0]-BAR_SIZE[0]:
self.bar_pos_x=SCREEN_SIZE[0]-BAR_SIZE[0]

self.screen.fill(BLACK)

self.bar_pos.left = self.bar_pos_x
pygame.draw.rect(self.screen, WHITE, self.bar_pos)

self.ball_pos.left += self.ball_dir_x * 2
self.ball_pos.bottom += self.ball_dir_y * 3
pygame.draw.rect(self.screen, WHITE, self.ball_pos)

if self.ball_pos.top <= 0 or self.ball_pos.bottom >= (SCREEN_SIZE[1] - BAR_SIZE[1]+1):
self.ball_dir_y = self.ball_dir_y * -1
if self.ball_pos.left <= 0 or self.ball_pos.right >= (SCREEN_SIZE[0]):
self.ball_dir_x = self.ball_dir_x * -1

reward=0
if self.bar_pos.top <= self.ball_pos.bottom and (self.bar_pos.left < self.ball_pos.right and self.bar_pos.right > self.ball_pos.left):
self.score += 1
reward=1
print("Score: ", self.score, end='\r')
elif self.bar_pos.top <= self.ball_pos.bottom and (self.bar_pos.left > self.ball_pos.right or self.bar_pos.right < self.ball_pos.left):
self.score=0
print("Game Over: ", self.score)

reward=-1

pygame.display.update()

self.clock.tick(60)

MyGame_image=pygame.surfarray.array3d(pygame.display.get_surface())
return reward ,MyGame_image

output=3
input_image=tf.placeholder("float", [None,80,100,4])

action=tf.placeholder("float",[None,3])

def convolutional_neural_network(input_image):
weights = {'w_conv1':tf.Variable(tf.zeros([8, 8, 4, 32])),
'w_conv2':tf.Variable(tf.zeros([4, 4, 32, 64])),
'w_conv3':tf.Variable(tf.zeros([3, 3, 64, 64])),
'w_fc4':tf.Variable(tf.zeros([3456, 784])),
'w_out':tf.Variable(tf.zeros([784, output]))}

biases = {'b_conv1':tf.Variable(tf.zeros([32])),
'b_conv2':tf.Variable(tf.zeros([64])),
'b_conv3':tf.Variable(tf.zeros([64])),
'b_fc4':tf.Variable(tf.zeros([784])),
'b_out':tf.Variable(tf.zeros([output]))}

conv1 = tf.nn.relu(tf.nn.conv2d(input_image, weights['w_conv1'], strides = [1, 4, 4, 1], padding = "VALID") + biases['b_conv1'])
conv2 = tf.nn.relu(tf.nn.conv2d(conv1, weights['w_conv2'], strides = [1, 2, 2, 1], padding = "VALID") + biases['b_conv2'])
conv3 = tf.nn.relu(tf.nn.conv2d(conv2, weights['w_conv3'], strides = [1, 1, 1, 1], padding = "VALID") + biases['b_conv3'])
conv3_flat = tf.reshape(conv3, [-1, 3456])
fc4 = tf.nn.relu(tf.matmul(conv3_flat, weights['w_fc4']) + biases['b_fc4'])

output_layer = tf.matmul(fc4, weights['w_out']) + biases['b_out']
return output_layer

def train_neural_network(imput_image):
predict_action=convolutional_neural_network(input_image)

argmax=tf.placeholder("float",[None,output])

gt=tf.placeholder("float",[None])

action = tf.reduce_sum(tf.multiply(predict_action, argmax), reduction_indices = 1)

cost=tf.reduce_mean(tf.square(action-gt))
optimizer=tf.train.AdadeltaOptimizer(1e-6).minimize(cost)

game=Game()
D=deque()

_,image=game.run(MOVE_STAY)

image=cv2.cvtColor(cv2.resize(image,(100,80)),cv2.COLOR_BGR2GRAY)

ret,image=cv2.threshold(image,1,255,cv2.THRESH_BINARY)

input_image_data=np.stack((image,image,image,image),axis=2)

with tf.Session() as sess:
sess.run(tf.initialize_all_variables())

saver=tf.train.Saver()
n=0

epsilon=INIT_ESPTION
while True:
action_t=predict_action.eval(feed_dict = {input_image : [input_image_data]})[0]

argmax_t=np.zeros([output],dtype=np.int)

if(random.random()<=INIT_ESPTION):
maxIndex=random.randrange(output)

else:
maxIndex=np.armax(action_t)

argmax_t[maxIndex]=1

if epsilon>FINAL_ESPTION:
epsilon-=(INIT_ESPTION-FINAL_ESPTION)/EXPLORE

reward,image=game.run(list(argmax_t))
image = cv2.cvtColor(cv2.resize(image, (100, 80)), cv2.COLOR_BGR2GRAY)

ret,image=cv2.threshold(image,1,255,cv2.THRESH_BINARY)
image=np.reshape(image,(80,100,1))

input_image_datal=np.append(image,input_image_data[:,:,0:3],axis=2)

D.append((input_image_data,argmax_t,reward,input_image_datal))

if len(D) >REPLAY_MEMORY:
D.popleft()

if n >OBSERVE:
minibatch=random.sample(D,BATCH)
input_image_data_batch=[d[0] for d in minibatch]
argmax_batch=[d[1] for d in minibatch]
reward_batch=[d[2] for d in minibatch]
input_image_data1_batch=[d[3] for d in minibatch]
gt_batch=[]

out_batch=predict_action.eval(feed_dict={input_image:input_image_data1_batch})

for i in range(0,len(minibatch)):
gt_batch.append(reward_batch[i]+LEARN_RATE*np.max(out_batch[i]))

optimizer.run(feed_dict={gt:gt_batch,argmax:argmax_batch,input_image:input_image_data_batch})

input_image_data=input_image_datal
n=n+1

if n% 10000 ==0:
saver.save(sess,'C:\\Users\\hasee\\game.cpk',global_step=n)

print(n,"epsilon:",epsilon," ","action:",maxIndex," ","reward: ",reward)

train_neural_network(input_image)
内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理
标签：
相关文章推荐
新的分享
章节导航