TicTacToe Alpha Beta 剪枝

问题描述

EDIT 30/03/2021:问题确实措辞不当,重新表述一下

我在 Python 中实现了一个 Alpha-Beta 剪枝算法,我想知道它不走最快的胜利路线是否正常(有时它会在 2 步中获胜,而它本可以在 1 步中获胜) .

import math
from collections import Counter
from copy import copy,deepcopy

""" Board Class Definition """
class Board:
    """ constructor """
    def __init__(self):
        # init data
        self.data = [ "." for i in range(9) ]
    
    
    """ copy constructor equivalent """
    @staticmethod
    def copy(board):
        return deepcopy(board)
    
    
    """ play at given coordinates """
    def play_at(self,position,color):
        # check if you can play
        if self.data[position] == ".":
            # make the move
            self.data[position] = color
            return True
        
        # did not play
        return False
    
    
    """ get coordinates of empty pieces on the board """
    def get_playable_coord(self):
        # define coordinates of empty tiles
        return [ i for i in range(9) if self.data[i] == "." ]
    
    
    """ board is full """
    def is_full(self):
        # define tile counter
        c = Counter( [ self.data[i] for i in range(9) ] )
        return ( c["x"] + c["o"] == 9 )
    
    
    """ get winner of the board """
    def get_winner(self):
        # straight lines to check
        straightLines = [ (0,1,2),(3,4,5),(6,7,8),(0,3,6),(1,7),(2,5,6) ]
        
        # check straight lines - 8 in total
        for i in range(8):
            # get counter of line of tiles
            c = Counter( [ self.data[j] for j in straightLines[i] ] )
            
            # different scenarii
            if c["x"] == 3:
                return "x"
            
            elif c["o"] == 3:
                return "o"
        
        # if board is full,game is a draw
        if self.is_full():
            return "draw"
        
        # return None by default
        return None
    
    
    """ get heuristic value of board - for "x" if 'reverse' == False """
    def get_heuristic_value(self,reverse):
        # init variable
        value = 0
        
        # straight lines to check
        straightLines = [ (0,6) ]
        
        # check straight lines - 8 in total
        for i in range(8):
            # get counter of line of tiles
            c = Counter( [ self.data[j] for j in straightLines[i] ] )
            
            # different scenarii
            if c["x"] == 3:
                value += 100
            
            elif c["x"] == 2 and c["."] == 1:
                value += 10
            
            elif c["x"] == 1 and c["."] == 2:
                value += 1
            
            elif c["o"] == 3:
                value -= 100
            
            elif c["o"] == 2 and c["."] == 1:
                value -= 10
            
            elif c["o"] == 1 and c["."] == 2:
                value -= 1
        
        # return heuristic value
        if reverse:
            return -value
        else:
            return value



""" Model Class Definition """
class Model:
    """ constructor """
    def __init__(self,color):
        # define parameters
        self.color = color
        self.other = self.get_opponent(color)
        
        # define board
        self.board = Board()
        
        # define winner
        self.winner = None
        
        # 'x' plays first
        if self.other == "x":
            self.make_ai_move()
    
    
    """ get opponent """
    def get_opponent(self,player):
        if player == "x":
            return "o"
        return "x"
    
    
    """ player makes a move in given position """
    def make_player_move(self,pos):
        if self.winner is None:
            # get result of board method
            res = self.board.play_at(pos,self.color)
            
            # check end of game <?>
            self.winner = self.board.get_winner()
            
            if res and self.winner is None:
                # make AI move
                self.make_ai_move()
    
    
    """ AI makes a move by using alphabeta pruning on all child nodes """
    def make_ai_move(self):
        # init variables
        best,bestValue = None,- math.inf
        
        for i in self.board.get_playable_coord():
            # copy board as child
            copie = Board.copy(self.board)
            copie.play_at(i,self.other)
            
            # use alpha beta && (potentially) register play
            value = self.alphabeta(copie,10,- math.inf,math.inf,False)
            if value > bestValue:
                best,bestValue = i,value
        
        # play at best coordinates
        self.board.play_at(best,self.other)
        
        # check end of game <?>
        self.winner = self.board.get_winner()
    
    
    """ alpha beta function (minimax optimization) """
    def alphabeta(self,node,depth,alpha,beta,maximizingPlayer):
        # ending condition
        if depth == 0 or node.get_winner() is not None:
            return node.get_heuristic_value(self.other == "o")
        
        # recursive part initialization
        if maximizingPlayer:
            value = - math.inf
            for pos in node.get_playable_coord():
                # copy board as child
                child = Board.copy(node)
                child.play_at(pos,self.other)
                value = max(value,self.alphabeta(child,depth-1,False))
                
                # update alpha
                alpha = max(alpha,value)
                if alpha >= beta:
                    break
            return value
        
        else:
            value = math.inf
            for pos in node.get_playable_coord():
                # copy board as child
                child = Board.copy(node)
                child.play_at(pos,self.color)
                value = min(value,True))
                
                # update beta
                beta = min(beta,value)
                if beta <= alpha:
                    break
            return value

我对这个问题的结论:

Alpha-Beta Pruning 是一种深度优先搜索算法,而不是广度优先搜索算法,所以我认为无论深度如何,它都会选择它找到的第一条路线,而不是搜索最快的路线...

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)