连接 4 的 Minimax 算法产生一个失败的移动

问题描述

当深度设置为 4 时，该算法似乎会产生正确的移动，但是当我将其增加到 5 时，它会出乎意料地变得更糟。在这种特殊情况下，当我认为第 3 列是时，建议第 0 列是下一个最佳移动。我很可能没有完全理解 minimax 算法，所以我请求你的帮助来解决这个问题，因为我已经尝试了好几天但没有成功。此外，我们将不胜感激任何提高代码可读性的建议。

这是游戏的链接：http://connect4.getforge.io/ - 原谅糟糕的用户界面 (wip)。默认为4层深度，请注意增加AI_DEPTH时的玩法差异。

这是网格，现在轮到 AI 扮演 G（最大化玩家）了。

 ''  '' ''   ''  ''  '' ''
 ''  '' ''   ''  ''  '' ''
 ''  '' ''   ''  ''  '' ''
 ''  '' 'G'  'R' 'G' '' 'G'
 'G' '' 'R'  'R' 'R' '' 'R'
 'G' '' 'R'  'R' 'G' '' 'G'

这是从我的项目中提取的代码：

const GRID_ROW_COUNT = 6;
const GRID_COL_COUNT = 7;
const GRID_ROW_MID = 3;
const GRID_COL_MID = 3;

const WIN_score = 1000;

const rotateGrid = grid => grid.reduce((newGrid,gridRow) => {
  return newGrid.map((column,i) => column.concat(gridRow[i]));
},[...Array(grid[0].length)].map(_ => []));

function* getValidMoves(grid,player) {
  for(let col = 0; col < grid[0].length; coL++){
    for(let row = grid.length; row > 0; row--){
      if(!grid[row - 1][col]){
        const tempGrid = JSON.parse(JSON.stringify(grid));
        tempGrid[row - 1][col] = player;
        yield [tempGrid,col];
        break;
      }
    }
  }
}

const isDrawn = function(grid){
  for(let row = GRID_ROW_COUNT; row > 0; row--){
    if(grid[row - 1].filter(Boolean).length < GRID_COL_COUNT){
      return false;
    }
  }
  return true;
}

const countInRow = (target,row,index,count) => {
  if(count == 0 || !row[index] || row[index] != target){
    return index;
  }
  return countInRow(target,index - 1,count - 1);
}

const countInDiagonal = (target,grid,col,count) => {
  const colModulus = Math.abs(col);
  if(count == 0 || row < 0 || !grid[row][colModulus] || grid[row][colModulus] != target){
    return row;
  }
  return countInDiagonal( target,row - 1,col - 1,count - 1 );
};

const countInCounterDiagonal = (target,count) => countInDiagonal(target,-col,count); 

function scoreGridPosition(grid,player,count = 4){
  let score = 0; 
  
  function checkWinOnHorizontals(grid,count){
    const GRID_ROW_COUNT = grid.length;
    const GRID_COL_COUNT = grid[0].length;    
    const GRID_COL_MID = player ? 0 : Math.floor(GRID_COL_COUNT/2);

    for(let row = GRID_ROW_COUNT - 1; row >= 0; row--){
      for(let col = GRID_COL_COUNT - 1; col >= GRID_COL_MID; col--){
        const cell = grid[row][col];
        if(!cell){ continue; }

        const colIndex = countInRow(cell,grid[row],count - 1);
        
        if(col - colIndex == count){ return WIN_score; }

        if(player){
          const weight = col - 1 - colIndex;
          if(cell == player){
            score += weight;
          } else {
            score -= weight * 2;         
          }
        } 

        col = colIndex + 1;
      }
    }
    return 0;
  }

  const checkWinOnVerticals = (grid,count) => checkWinOnHorizontals(rotateGrid(grid),count);

  function checkWinOnDiagonals(grid,count){
    const _GRID_ROW_MID = player ? 0 : GRID_ROW_MID;

    for(let row = GRID_ROW_COUNT - 1; row >= _GRID_ROW_MID; row--){
      for(let col = GRID_COL_COUNT - 1; col >= 0; col--){
        const cell = grid[row][col];
        if(!cell){ continue; }
        
        let rowIndexL = row,rowIndexR = row;
        
        if(col >= GRID_COL_MID){
          rowIndexL = countInDiagonal(cell,count - 1);
        }
        if(col <= GRID_COL_MID){
          rowIndexR = countInCounterDiagonal(cell,col + 1,count - 1);
        }
        
        if(row - rowIndexL == count || row - rowIndexR == count){ return WIN_score; }

        if(player){
          const weight = (row - rowIndexL) + (row - rowIndexR);
          if(cell == player){
            score += weight
          } else {
            score -= weight;
          }
        }
      }
    }
    return 0;
  }
    
  return [
    checkWinOnHorizontals(grid,count) ||
    checkWinOnVerticals(grid,count) ||
    checkWinOnDiagonals(grid,count),score
  ];
}

const alphaBetaAI = (grid,depth = 5,alpha = -Infinity,beta = Infinity,isMaxPlayer = true) => {
  let value = isMaxPlayer ? -Infinity : Infinity;
  let move = null;

  if(isDrawn(grid)){ return [0,move]; }

  const player = isMaxPlayer ? 'G' : 'R';
  const [terminalscore,score] = scoreGridPosition(grid,player);
  
  if(terminalscore){
                       // -1000            1000  
    return [isMaxPlayer ? -terminalscore : terminalscore,move]
  }
  if(depth == 0){ return [score,move]; }

  if(isMaxPlayer){
    for(let [newGrid,column] of getValidMoves(grid,player)){
      let [tempVal] = alphaBetaAI(newGrid,depth - 1,alpha,beta,!isMaxPlayer);
      if(tempVal > value){
        value = tempVal;
        move = column;
      }
      alpha = Math.max(value,alpha);
      if(beta <= alpha){ break; }
    }
  } else {
    for(let [newGrid,!isMaxPlayer);
      if(tempVal < value){
        value = tempVal;
        move = column;
      }
      beta = Math.min(value,beta);
      if(beta <= alpha){ break; }
    }
  }
  return [value,move];
}

// here is the grid
let g = [
  ['','',''],['','G','R','G'],['G','R'],'G']
];

console.log('Move: ',alphaBetaAI(g)[1]); // 0 - I was expecting 3

解决方法

正如 Ouroborus 指出的那样，在深度 5 时，它认为无论走什么棋都会输。所以现在它选择了你的可能动作列表中的第一个动作，因为所有结果都返回 -1000。

如果您希望它始终找到最长的输路路线，那么您需要在输时返回 -1000 + depth，如果赢则需要返回 1000 - depth。那么你的 AI 将始终选择最长的失败路线（如果有超过 1 种获胜方式，则选择最快的获胜路线）。

alpha-beta-pruning artificial-intelligence javascript minimax