rollout evaluation function

このコミットが含まれているのは:
woosh 2023-11-16 07:31:24 +00:00
コミット 499a404139
1個のファイルの変更34行の追加14行の削除

48
uttt.c
ファイルの表示

@ -786,8 +786,8 @@ int ut_boardValue(const char *tiles, int offset, int stride)
value += (T(r, c) == 'X') ? 1 : (T(r, c) == 'O') ? -1 : 0;
return value;
}
#define UT_VALUEMAX 81 * 2
#define UT_VALUEMIN (-81 * 2)
#define UT_VALUEMAX 81
#define UT_VALUEMIN (-81)
int ut_value(const struct ut_state *state)
{
int value = 0;
@ -796,12 +796,12 @@ int ut_value(const struct ut_state *state)
char winner = ut_winner(state);
if(winner != '\0')
{
return (winner == 'X') ? 81 * 2 : (winner == 'O') ? -81 * 2 : 0;
return (winner == 'X') ? UT_VALUEMAX : (winner == 'O') ? UT_VALUEMIN : 0;
}
// weight each board: 2x if taken, 2x if blank but would lead to a 2-of-3
// line (for X or O), 1x otherwise
char importance[3][3] = {1};
/*char importance[3][3] = {1};
for(int r = 0; r < 3; r++) {
for(int c = 0; c < 3; c++) {
char board = state->boards[r][c];
@ -834,7 +834,7 @@ int ut_value(const struct ut_state *state)
else if (board == 'X') importance[r][c] = 2;
else if (board == 'O') importance[r][c] = 2;
}
}
}*/
// game is in progress
for(int r = 0; r < 3; r++) {
@ -848,19 +848,19 @@ int ut_value(const struct ut_state *state)
// board is won
board_value = (board == 'X') ? 9 : (board == 'O') ? -9 : 0;
}
value += board_value * importance[r][c];
value += board_value/* * importance[r][c]*/;
}
}
return value;
}
int ut_alphabetaq(const struct ut_state *state, struct ut_move move, int depth, int a, int b)
int ut_alphabetaq(int (*value)(const struct ut_state *state), const struct ut_state *state, struct ut_move move, int depth, int a, int b)
{
struct ut_state next;
if(ut_move(&next, state, move)) {return (state->player == 'X') ? UT_VALUEMIN - 1 : UT_VALUEMAX + 1;}
int q = ut_value(&next);
int q = value(&next);
if(depth <= 0 || q <= UT_VALUEMIN || q >= UT_VALUEMAX) {return q;}
@ -871,7 +871,7 @@ int ut_alphabetaq(const struct ut_state *state, struct ut_move move, int depth,
for(int c = 0; c < 9; c++)
{
struct ut_move next_move = (struct ut_move){r, c};
int next_q = ut_alphabetaq(&next, next_move, depth - 1, a, b);
int next_q = ut_alphabetaq(value, &next, next_move, depth - 1, a, b);
if(next_q > q) {q = next_q;}
if(q > b) {return q;}
a = q > a ? q : a;
@ -884,7 +884,7 @@ int ut_alphabetaq(const struct ut_state *state, struct ut_move move, int depth,
for(int c = 0; c < 9; c++)
{
struct ut_move next_move = (struct ut_move){r, c};
int next_q = ut_alphabetaq(&next, next_move, depth - 1, a, b);
int next_q = ut_alphabetaq(value, &next, next_move, depth - 1, a, b);
if(next_q < q) {q = next_q;}
if(q < a) {return q;}
b = q < b ? q : b;
@ -892,8 +892,8 @@ int ut_alphabetaq(const struct ut_state *state, struct ut_move move, int depth,
}
return q;
}
#define UT_DEPTH 6
struct ut_move ut_minimax(const struct ut_state *state)
struct ut_move ut_minimax(int (*value)(const struct ut_state *state), const struct ut_state *state, int depth)
{
struct ut_move best_move = {-1, -1};
int best_q = (state->player == 'X') ? UT_VALUEMIN - 1 : UT_VALUEMAX + 1;
@ -901,7 +901,7 @@ struct ut_move ut_minimax(const struct ut_state *state)
for(int c = 0; c < 9; c++)
{
struct ut_move move = (struct ut_move){r, c};
int q = ut_alphabetaq(state, move, UT_DEPTH - 1, UT_VALUEMIN - 1, UT_VALUEMAX + 1);
int q = ut_alphabetaq(value, state, move, depth - 1, UT_VALUEMIN - 1, UT_VALUEMAX + 1);
if((state->player == 'X') ? q > best_q : q < best_q)
{
best_q = q;
@ -911,8 +911,28 @@ struct ut_move ut_minimax(const struct ut_state *state)
return best_move;
}
#define UT_RDEPTH 1
int ut_rollout(const struct ut_state *state)
{
struct ut_state rstate = *state;
int moves = 0;
char winner;
while((winner = ut_winner(&rstate)) == '\0')
{
if(ut_move(&rstate, &rstate,
ut_minimax(ut_value, &rstate, UT_RDEPTH)))
{
return 0;
}
moves++;
}
return (winner == 'X') ? (UT_VALUEMAX - moves) : (winner == 'O') ? (UT_VALUEMIN + moves) : 0;
}
#define UT_DEPTH 5
int ut_agentgetmove(const struct ut_state *state, const struct ut_game *game, struct ut_move *move)
{
*move = ut_minimax(state);
*move = ut_minimax(ut_rollout, state, UT_DEPTH);
return (move->r < 0 || move->r >= 9 || move->c < 0 || move->c >= 9);
}