TicTacToe Alpha Beta 剪枝

问题描述

EDIT 30/03/2021：问题确实措辞不当，重新表述一下

我在 Python 中实现了一个 Alpha-Beta 剪枝算法，我想知道它不走最快的胜利路线是否正常（有时它会在 2 步中获胜，而它本可以在 1 步中获胜） .

import math
from collections import Counter
from copy import copy,deepcopy

""" Board Class DeFinition """
class Board:
    """ constructor """
    def __init__(self):
        # init data
        self.data = [ "." for i in range(9) ]
    
    
    """ copy constructor equivalent """
    @staticmethod
    def copy(board):
        return deepcopy(board)
    
    
    """ play at given coordinates """
    def play_at(self,position,color):
        # check if you can play
        if self.data[position] == ".":
            # make the move
            self.data[position] = color
            return True
        
        # did not play
        return False
    
    
    """ get coordinates of empty pieces on the board """
    def get_playable_coord(self):
        # define coordinates of empty tiles
        return [ i for i in range(9) if self.data[i] == "." ]
    
    
    """ board is full """
    def is_full(self):
        # define tile counter
        c = Counter( [ self.data[i] for i in range(9) ] )
        return ( c["x"] + c["o"] == 9 )
    
    
    """ get winner of the board """
    def get_winner(self):
        # straight lines to check
        straightLines = [ (0,1,2),(3,4,5),(6,7,8),(0,3,6),(1,7),(2,5,6) ]
        
        # check straight lines - 8 in total
        for i in range(8):
            # get counter of line of tiles
            c = Counter( [ self.data[j] for j in straightLines[i] ] )
            
            # different scenarii
            if c["x"] == 3:
                return "x"
            
            elif c["o"] == 3:
                return "o"
        
        # if board is full,game is a draw
        if self.is_full():
            return "draw"
        
        # return None by default
        return None
    
    
    """ get heuristic value of board - for "x" if 'reverse' == False """
    def get_heuristic_value(self,reverse):
        # init variable
        value = 0
        
        # straight lines to check
        straightLines = [ (0,6) ]
        
        # check straight lines - 8 in total
        for i in range(8):
            # get counter of line of tiles
            c = Counter( [ self.data[j] for j in straightLines[i] ] )
            
            # different scenarii
            if c["x"] == 3:
                value += 100
            
            elif c["x"] == 2 and c["."] == 1:
                value += 10
            
            elif c["x"] == 1 and c["."] == 2:
                value += 1
            
            elif c["o"] == 3:
                value -= 100
            
            elif c["o"] == 2 and c["."] == 1:
                value -= 10
            
            elif c["o"] == 1 and c["."] == 2:
                value -= 1
        
        # return heuristic value
        if reverse:
            return -value
        else:
            return value



""" Model Class DeFinition """
class Model:
    """ constructor """
    def __init__(self,color):
        # define parameters
        self.color = color
        self.other = self.get_opponent(color)
        
        # define board
        self.board = Board()
        
        # define winner
        self.winner = None
        
        # 'x' plays first
        if self.other == "x":
            self.make_ai_move()
    
    
    """ get opponent """
    def get_opponent(self,player):
        if player == "x":
            return "o"
        return "x"
    
    
    """ player makes a move in given position """
    def make_player_move(self,pos):
        if self.winner is None:
            # get result of board method
            res = self.board.play_at(pos,self.color)
            
            # check end of game <?>
            self.winner = self.board.get_winner()
            
            if res and self.winner is None:
                # make AI move
                self.make_ai_move()
    
    
    """ AI makes a move by using alphabeta pruning on all child nodes """
    def make_ai_move(self):
        # init variables
        best,bestValue = None,- math.inf
        
        for i in self.board.get_playable_coord():
            # copy board as child
            copie = Board.copy(self.board)
            copie.play_at(i,self.other)
            
            # use alpha beta && (potentially) register play
            value = self.alphabeta(copie,10,- math.inf,math.inf,False)
            if value > bestValue:
                best,bestValue = i,value
        
        # play at best coordinates
        self.board.play_at(best,self.other)
        
        # check end of game <?>
        self.winner = self.board.get_winner()
    
    
    """ alpha beta function (minimax optimization) """
    def alphabeta(self,node,depth,alpha,beta,maximizingPlayer):
        # ending condition
        if depth == 0 or node.get_winner() is not None:
            return node.get_heuristic_value(self.other == "o")
        
        # recursive part initialization
        if maximizingPlayer:
            value = - math.inf
            for pos in node.get_playable_coord():
                # copy board as child
                child = Board.copy(node)
                child.play_at(pos,self.other)
                value = max(value,self.alphabeta(child,depth-1,False))
                
                # update alpha
                alpha = max(alpha,value)
                if alpha >= beta:
                    break
            return value
        
        else:
            value = math.inf
            for pos in node.get_playable_coord():
                # copy board as child
                child = Board.copy(node)
                child.play_at(pos,self.color)
                value = min(value,True))
                
                # update beta
                beta = min(beta,value)
                if beta <= alpha:
                    break
            return value

我对这个问题的结论：

Alpha-Beta Pruning 是一种深度优先搜索算法，而不是广度优先搜索算法，所以我认为无论深度如何，它都会选择它找到的第一条路线，而不是搜索最快的路线...

解决方法

我知道这不是问题的答案，但我想为 AI tac-tac-toe 玩家建议一种更简单的方法，包括计算位置是赢还是输。这将需要考虑游戏中任何时间可能发生的所有有效位置，但由于场是 3x3，有效位置的数量少于 3^9 = 19683（每个位置要么是 'x '、'o' 或 '')。这不是硬限制，因为从游戏规则的角度来看，很多位置都是无效的。我建议你从这里开始，因为你说的算法主要用于全搜索不可行的较难游戏。

因此，您需要做的就是在启动程序后为每个位置计算一次赢/输指标，然后在 O(1) 中做出决定。这对于 3x3 字段是可以接受的，但可能不会更多。

此处描述了一般方法：https://cp-algorithms.com/game_theory/games_on_graphs.html。简而言之，您构建一棵可能移动的树，将叶子标记为赢或输，并通过考虑所有子转换（例如，如果每个转换都导致对方玩家获胜，则失败的位置） .

如果您懂俄语，这里是原始页面的链接：http://e-maxx.ru/algo/games_on_graphs

附言在过去的某个时候，我也在玩这个游戏并实施这种方法。这是我的 repo，以防您想调查：https://github.com/yuuurchyk/cpp_tic_tac_toe。公平警告：它是用 C++ 编写的，代码有点丑

alpha-beta-pruning minimax python