问题描述
EDIT 30/03/2021:问题确实措辞不当,重新表述一下
我在 Python 中实现了一个 Alpha-Beta 剪枝算法,我想知道它不走最快的胜利路线是否正常(有时它会在 2 步中获胜,而它本可以在 1 步中获胜) .
import math
from collections import Counter
from copy import copy,deepcopy
""" Board Class DeFinition """
class Board:
""" constructor """
def __init__(self):
# init data
self.data = [ "." for i in range(9) ]
""" copy constructor equivalent """
@staticmethod
def copy(board):
return deepcopy(board)
""" play at given coordinates """
def play_at(self,position,color):
# check if you can play
if self.data[position] == ".":
# make the move
self.data[position] = color
return True
# did not play
return False
""" get coordinates of empty pieces on the board """
def get_playable_coord(self):
# define coordinates of empty tiles
return [ i for i in range(9) if self.data[i] == "." ]
""" board is full """
def is_full(self):
# define tile counter
c = Counter( [ self.data[i] for i in range(9) ] )
return ( c["x"] + c["o"] == 9 )
""" get winner of the board """
def get_winner(self):
# straight lines to check
straightLines = [ (0,1,2),(3,4,5),(6,7,8),(0,3,6),(1,7),(2,5,6) ]
# check straight lines - 8 in total
for i in range(8):
# get counter of line of tiles
c = Counter( [ self.data[j] for j in straightLines[i] ] )
# different scenarii
if c["x"] == 3:
return "x"
elif c["o"] == 3:
return "o"
# if board is full,game is a draw
if self.is_full():
return "draw"
# return None by default
return None
""" get heuristic value of board - for "x" if 'reverse' == False """
def get_heuristic_value(self,reverse):
# init variable
value = 0
# straight lines to check
straightLines = [ (0,6) ]
# check straight lines - 8 in total
for i in range(8):
# get counter of line of tiles
c = Counter( [ self.data[j] for j in straightLines[i] ] )
# different scenarii
if c["x"] == 3:
value += 100
elif c["x"] == 2 and c["."] == 1:
value += 10
elif c["x"] == 1 and c["."] == 2:
value += 1
elif c["o"] == 3:
value -= 100
elif c["o"] == 2 and c["."] == 1:
value -= 10
elif c["o"] == 1 and c["."] == 2:
value -= 1
# return heuristic value
if reverse:
return -value
else:
return value
""" Model Class DeFinition """
class Model:
""" constructor """
def __init__(self,color):
# define parameters
self.color = color
self.other = self.get_opponent(color)
# define board
self.board = Board()
# define winner
self.winner = None
# 'x' plays first
if self.other == "x":
self.make_ai_move()
""" get opponent """
def get_opponent(self,player):
if player == "x":
return "o"
return "x"
""" player makes a move in given position """
def make_player_move(self,pos):
if self.winner is None:
# get result of board method
res = self.board.play_at(pos,self.color)
# check end of game <?>
self.winner = self.board.get_winner()
if res and self.winner is None:
# make AI move
self.make_ai_move()
""" AI makes a move by using alphabeta pruning on all child nodes """
def make_ai_move(self):
# init variables
best,bestValue = None,- math.inf
for i in self.board.get_playable_coord():
# copy board as child
copie = Board.copy(self.board)
copie.play_at(i,self.other)
# use alpha beta && (potentially) register play
value = self.alphabeta(copie,10,- math.inf,math.inf,False)
if value > bestValue:
best,bestValue = i,value
# play at best coordinates
self.board.play_at(best,self.other)
# check end of game <?>
self.winner = self.board.get_winner()
""" alpha beta function (minimax optimization) """
def alphabeta(self,node,depth,alpha,beta,maximizingPlayer):
# ending condition
if depth == 0 or node.get_winner() is not None:
return node.get_heuristic_value(self.other == "o")
# recursive part initialization
if maximizingPlayer:
value = - math.inf
for pos in node.get_playable_coord():
# copy board as child
child = Board.copy(node)
child.play_at(pos,self.other)
value = max(value,self.alphabeta(child,depth-1,False))
# update alpha
alpha = max(alpha,value)
if alpha >= beta:
break
return value
else:
value = math.inf
for pos in node.get_playable_coord():
# copy board as child
child = Board.copy(node)
child.play_at(pos,self.color)
value = min(value,True))
# update beta
beta = min(beta,value)
if beta <= alpha:
break
return value
我对这个问题的结论:
Alpha-Beta Pruning 是一种深度优先搜索算法,而不是广度优先搜索算法,所以我认为无论深度如何,它都会选择它找到的第一条路线,而不是搜索最快的路线...
解决方法
我知道这不是问题的答案,但我想为 AI tac-tac-toe 玩家建议一种更简单的方法,包括计算位置是赢还是输。这将需要考虑游戏中任何时间可能发生的所有有效位置,但由于场是 3x3,有效位置的数量少于 3^9 = 19683(每个位置要么是 'x '、'o' 或 '')。这不是硬限制,因为从游戏规则的角度来看,很多位置都是无效的。我建议你从这里开始,因为你说的算法主要用于全搜索不可行的较难游戏。
因此,您需要做的就是在启动程序后为每个位置计算一次赢/输指标,然后在 O(1) 中做出决定。这对于 3x3 字段是可以接受的,但可能不会更多。
此处描述了一般方法:https://cp-algorithms.com/game_theory/games_on_graphs.html。简而言之,您构建一棵可能移动的树,将叶子标记为赢或输,并通过考虑所有子转换(例如,如果每个转换都导致对方玩家获胜,则失败的位置) .
如果您懂俄语,这里是原始页面的链接:http://e-maxx.ru/algo/games_on_graphs
附言在过去的某个时候,我也在玩这个游戏并实施这种方法。这是我的 repo,以防您想调查:https://github.com/yuuurchyk/cpp_tic_tac_toe。公平警告:它是用 C++ 编写的,代码有点丑