Bot plays Tic-Tac-Toe using N-step lookahead
Good to see you! In this article we will design a bot which can play Tic-Tac-Toe using N-Step Lookahead, a.k.a minimax. In the previous article, we created a bot which played using one-step lookahead. However, this time our bot will look ahead much further into the future rather than just one-step.
Import packages
import numpy as np
import random
from util import *
Create the battlefield
ttt = np.zeros((3,3), np.uint16)
ttt
array([[0, 0, 0],
[0, 0, 0],
[0, 0, 0]], dtype=uint16)
Code functions
def get_valid_moves(grid):
valid_moves = []
for row in range(3):
for col in range(3):
if grid[row,col] == 0:
valid_moves.append((row,col))
return valid_moves
def check_win_draw(ttt):
grid = ttt.copy()
draw = True
for row in grid:
if list(row).count(0) > 0:
draw = False
break
# check rows
for row in range(3):
if list(ttt[row,:]).count(1) == 3:
return 1
elif list(ttt[row,:]).count(2) == 3:
return 2
# check columns
for col in range(3):
if list(ttt[:,col]).count(1) == 3:
return 1
elif list(ttt[:,col]).count(2) == 3:
return 2
# Check diagonals
if list(ttt[range(3),range(3)]).count(1) == 3 or list(ttt[range(3),range(2,-1,-1)]).count(1) == 3:
return 1
elif list(ttt[range(3),range(3)]).count(2) == 3 or list(ttt[range(3),range(2,-1,-1)]).count(2) == 3:
return 2
return 0 if draw else -1
N-Step Lookahead
In one-step lookahead, get_heuristic
did not check for opponent’s three tiles
in a sequence. Since it was never a possiblity for play to resume once either of
the players placed three tiles in sequence. In n-step lookahead, however, we
try to anticipate opponent’s moves by playing any possible moves on a test board.
Thus we will modify get_heuristic
function to take this into account.
Scoring Scheme
(for player)
- 3-in-sequence: 100,000
- 2-in-sequence: 1
(for opponent)
- 2-in-sequence: -100
- 3-in-sequence: -1,000
def get_heuristic(grid, player): """ Evaluates move and assigns a score. Parameters: =========== grid: numpy.ndarray --> 3x3 Tic-Tac-Toe grid player: int --> 1(X) or 2(O) to denote the player Returns: ======== score: int --> score the game board received """ num_three = count_windows(grid, 3, player) num_twos = count_windows(grid, 2, player) num_twos_opp = count_windows(grid, 2, player%2 + 1) num_three_opp = count_windows(grid, 3, player%2 + 1) score = 1e5*num_three + num_twos - 1e2*num_twos_opp - 1e3*num_three_opp return score
The next function is the core of our n-step lookahead algorithm. According to, our common friend, wikipedia:
A minimax algorithm is a recursive algorithm for choosing the next move in an n-player game, usually a two-player game. A value is associated with each position or state of the game. This value is computed by means of a position evaluation function and it indicates how good it would be for a player to reach that position. The player then makes the move that maximizes the minimum value of the position resulting from the opponent’s possible following moves. If it is A’s turn to move, A gives a value to each of their legal moves.
def minimax(grid, depth, player, maximising):
if depth == 0 or check_win_draw(grid) != -1:
heuristic = get_heuristic(grid,player)
return heuristic
ttt = grid.copy()
if maximising:
value = -0b1<<20
for valid_move in get_valid_moves(grid):
ttt[valid_move] = player
value = max(value, minimax(ttt,depth-1,player%2+1,False))
ttt[valid_move] = 0
return value
else:
value = 0b1<<20
for valid_move in get_valid_moves(grid):
value = min(value, minimax(ttt,depth-1,player%2+1,True))
ttt[valid_move] = 0
return value
def score_move(grid, depth, move, player):
"""
Helper function for `get_optimal_move` to score each valid_move.
"""
ttt = grid.copy()
ttt[move] = player
value = minimax(ttt, depth-1, player, False)
return value
def get_optimal_move(grid, depth, player):
"""
Uses minimax to find the best possible move.
Parameters:
===========
grid: numpy.ndarray
--> 3x3 Tic-Tac-Toe grid
depth: int
--> denotes the value of n in n-step lookahead. The max depth of tree
--> in minimax algorithm.
player: int
--> 1(X) or 2(O) to represent the player.
Returns:
========
: tuple
--> Return (row, col) pair to represent the move in the grid
"""
scores = []
valid_moves = get_valid_moves(grid)
for valid_move in valid_moves:
scores.append(score_move(grid, depth, valid_move, player))
max_score = max(scores)
max_score_indices = [index for index,score in enumerate(scores) if score == max_score]
return valid_moves[random.choice(max_score_indices)]
Time to battle!
Against self
agent1 = {'piece':1}
agent2 = {'piece':2}
result = {'total_games':0, 'agent1':0, 'agent2':0, 'draws':0}
ngames = 100
depth = 3
for _ in range(ngames):
grid = np.zeros((3,3), np.uint16)
result['total_games'] += 1
for iteration in range(9):
if iteration%2 == 0:
move = get_optimal_move(grid,depth,agent1['piece'])
grid[move] = agent1['piece']
else:
move = get_optimal_move(grid,depth,agent2['piece'])
grid[move] = agent2['piece']
if check_win_draw(grid) != -1:
game_result = check_win_draw(grid)
if game_result == 1:
result['agent1'] += 1
elif game_result == 2:
result['agent2'] += 1
else:
result['draws'] += 1
break
Stats
--------------------------------------------------
Total Games:100 Agent1:40 Agent2:19 Draws:41
Against Random bot
agent1 = {'piece':1}
agent2 = {'piece':2}
result = {'total_games':0, 'agent1':0, 'agent2':0, 'draws':0}
ngames = 100
depth = 3
for _ in range(ngames):
grid = np.zeros((3,3), np.uint16)
result['total_games'] += 1
for iteration in range(9):
if iteration%2 == 0:
move = get_optimal_move(grid,depth,agent1['piece'])
grid[move] = agent1['piece']
else:
move = get_random_tile(grid)
grid[move] = agent2['piece']
if check_win_draw(grid) != -1:
game_result = check_win_draw(grid)
if game_result == 1:
result['agent1'] += 1
elif game_result == 2:
result['agent2'] += 1
else:
result['draws'] += 1
break
Stats
--------------------------------------------------
Total Games:100 Agent1:100 Agent2:0 Draws:0