问题描述
#UPDATE from 13.05.2021
在空闲时间,我已经添加了程序的全部代码(现在还包括其他功能)。我还用我写的带有结果的代码补充了这篇文章(请参阅文章末尾)
我目前正在努力在 tic-tac-toe 游戏中实现 Minmax 算法(没有 Alpa-Beta 修剪)。我目前的算法效果不佳 - 选择的动作远非最佳(至少在算法的意义上)。此外,我无法解决我必须实现的所述算法的哪个元素是错误的 - 我相信逻辑问题与我使用递归有关 - 但是我没有看到逻辑漏洞。如果您能帮我突出显示我的逻辑分解的地方 - 我将非常感激。我在下面插入我的 Python 代码。
X = "X"
O = "O"
EMPTY = None
# List terminal states contain all possible combination of terminal states
that Could occur in a game
# of tic-tac-toe.
terminal_states = [[(0,2),(1,1),(2,0)],[(0,0),2)],# victory
on the diagonal
[(0,(0,[(1,1)],[(2,# horizontal victory
[(0,2)] # vertical victory
]
def initial_state():
# Returns starting state of the board.
return [[EMPTY,EMPTY,EMPTY],[EMPTY,EMPTY]]
def player(board):
# Returns player who has the next turn on a board.
x_amount = sum([list.count(X) for list in board])
y_amount = sum([list.count(O) for list in board])
if x_amount <= y_amount:
return X
else:
return O
def actions(board):
# Returns set of all possible actions (i,j) available on the board.
set_of_actions = []
for i in range(len(board)):
for j in range(len(board[i])):
if board[i][j] == EMPTY:
action = (i,j)
set_of_actions.append(action)
return set_of_actions
def result(board,action):
# Returns the board that results from making move (i,j) on the board.
player_turn = player(board)
deep_board = copy.deepcopy(board)
row = action[0]
column = action[1]
if deep_board[row][column] != EMPTY:
raise ValueError("Not a valid action!")
else:
deep_board[row][column] = player_turn
return deep_board
def terminal(board):
# Returns True if game is over,False otherwise.
board_counter = 0
for row in board:
if EMPTY not in row:
board_counter += 1
if board_counter == 3:
return True
if terminal_state(board) == 1 or terminal_state(board) == 2:
return True
return False
def utility(board):
# Returns 1 if X has won the game,-1 if O has won,0 otherwise.
if terminal_state(board) == 1:
return 1
elif terminal_state(board) == 2:
return -1
else:
return 0
def terminal_state(board):
for possibility in terminal_states:
termination = 0
for state in possibility:
if board[state[0]][state[1]] == X:
termination += 1
if termination == 3:
return 1
for possibility in terminal_states:
termination = 0
for state in possibility:
if board[state[0]][state[1]] == O:
termination += 1
if termination == 3:
return 2
return 0
def minimax(board):
# Returns the optimal action for the current player on the board.
def Max_Value(board,move):
v = -2
if terminal(board):
score = utility(board)
ultimate = {move: score}
return ultimate
else:
moves = {}
for action in actions(board):
m = Min_Value(result(board,action),action)
for key,value in m.items():
moves[key] = value
for key,value in moves.items():
if value >= v:
v = value
ultimate = {key: value}
return ultimate
def Min_Value(board,move):
v = 2
if terminal(board):
score = utility(board)
ultimate = {move: score}
return ultimate
else:
moves = {}
for action in actions(board):
m = Max_Value(result(board,value in m.items():
moves[key] = value
for key,value in moves.items():
if value <= v:
v = value
ultimate = {key: value}
return ultimate
turn = player(board)
move = None
if turn == X:
ultimate = Max_Value(board,move)
for keys in ultimate.keys():
return keys
else:
ultimate = Min_Value(board,move)
for keys in ultimate.keys():
return keys
上述测试:
def test_minimax(self):
board_empty = [[EMPTY,EMPTY]
]
n = 1
while terminal(board_empty) != True:
action = minimax(board_empty)
board_empty = result(board_empty,action)
print("The {0} turn:".format(n))
print("{0}\n".format(board_empty))
n += 1
print("The terminal board is as follows: ")
print(board_empty)
结果:
Process finished with exit code 0
The 1 turn:
[[None,None,None],['X',[None,None]]
The 2 turn:
[[None,'O',None]]
The 3 turn:
[[None,'X',None]]
The 4 turn:
[[None,'O'],None]]
The 5 turn:
[[None,None]]
The 6 turn:
[[None,None]]
The 7 turn:
[['X',None]]
The terminal board is as follows:
[['X',None]]
解决方法
我会确保最大值/最小值永远不会分别为 2/-2。如果可以,请尝试改用 if value <= v:
。