// checkers.cpp : Defines the entry point for the console application.
// by N.A. Bozinis @ 13-Apr-20 7:50:37 PM

// teaching computer to play checkers with RL
// game rules: https://www.ducksters.com/games/checkers_rules.php
// it is interesting because I am complete rubbish with the game!
// someone solved this game analytically and perfectly, google for Chinook 2007

// there are no intermediate rewards just final states for win/draw/lose [1 0 -1]
// TAKE1: use plain linear feature value function

// TODO: @@@
// combined tree search during learning (TD-leaf? -> needs good starting weights)
// possible to avoid "turn" input by flipping the board? see tavli PhD by N.Papahristou


#include "stdafx.h"

//#define NN_USEFLOAT /* define this if you use single precision */
#define SYMMETRIC_SIGMOID // range [-1,1], else default [0,1]
#include "fann/include/xfan.h"

//#define NN_NUMF double
#include "random.h"

// simple encapsulation of running average
struct RunningAvg {
	RunningAvg(BOOL doVariance = 0) {
		bDoVar = doVariance;
		Reset();
	}

	void Reset()
	{
		avg = 0;
		cnt = 0;
		m2 = 0;
	}

	void Add1(NN_NUMF val) { // new sample
		cnt++;
		NN_NUMF delta = val - avg;
		avg += delta/cnt;
		if(bDoVar)
			// wexford's algorithm for running variance, mathematically stable
			//http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
			m2 += delta * (val - avg);
	}

	NN_NUMF GetVariance() // sqrt to get standard deviation
	{
		ATLASSERT(bDoVar && cnt);
		NN_NUMF variance = m2/cnt; 
		return variance;
	}

	NN_NUMF avg; // mean
	int cnt, bDoVar; // # of samples
	NN_NUMF m2; // optional variance calculation 
};

#define CHEKERS 12 /* how many pieces each */
#define BOARD 32 /* white spots, half of chess board */
#define MOVE_BRK 40 /* marker */
// max number of move descriptor: 4 moves per piece, plus terminator
#define MAX_SUCC (1+4+1)*13
// if nothing happens for these many moves, it's considered a draw (cycling?)
#define MAX_IDLE 50 

// learning parameters
#define FEATURES 6     /* board features in value function */
#define NN_LRATE 0.03f /* weights learning rate */
//#define NN_RNDMAX 0.5f /* absolute range for weight initialization */
#define DISCNT 0.99f   /* discount rate, promotes quicker play */ 
#define RND_EXPLORE 15 /* percent non-greedy moves */
#define RND_EXPLORE_MIN 1 // ramping for long training
#define TRAINING 15000 /* training games for self-play */
// rewards only at the very end (from X perspective)
#define REWARD_WIN  1
#define REWARD_LOSE -1 /* symmetric sigmoid */
#define REWARD_DRAW 0
#define REWARD_MAX 1000 /* for rollouts, clear winner */

// neural network specifics
// representation inspired by tesauro backgammon, keeping things scaled and feature rich
// each position has 2 bits per player, 1 for normal pawn, 1 for king, all 0 if empty, and one bit for turn
//		actually 2 bits per position (see below) is exactly the same, and uses half the inputs
// augment ANN with features? at least threatened pieces and piece/king difference
#define AUGMENT_NN
#ifdef AUGMENT_NN
#define NN_INPUT (BOARD*2 + 1 + 3) 
#else
#define NN_INPUT (BOARD*2 + 1 + 0) 
#endif
#define NN_OUT 1 /* @@@ or have separate probabilities=2 for win/lose? */
#define NN_HID 30

//#define NN_MOMENTUM 0.01f 
//#define FANN_NO_SEED /*if you want identical rnd runs*/
#define STEEPNESS 1.3f // 0.25 matches backprop.h at 0.5 -- he uses exp(-2*x*steepness)
#define NN_TRAINING 15000 /* longer training w/wo ramping wasn't helpful */
// no need to use clipping (TARGET_HIGH etc) because of discount and scaled rewards


// representation of the checkers board and its operations
class Checkers 
{
//protected:
public:
	// checkers is played on half of chessboard, so 32 blocks are enough
	// but the XBOARD classic representation is easy for moves (empty slots 0,9,18,27)
	char m_board[36]; // 0=empty, 1=X, 2=X-king, -1=O, -2=O-king
	char m_mvbuf[MAX_SUCC]; // storage for moves
	int m_cycle; // player activity sensor

	// calculate byte-wise checksum using newcomer's algorithm
	DWORD UGHChecksumBuf(const char* in, DWORD dwLen=0) const
	{
		const BYTE* buf = (const BYTE*)in;
		ATLASSERT(buf);
		if(!buf)
			return 0;

		if(!dwLen)
			dwLen = sizeof(m_board); // fingerprint of board

		// this can also be used for checking large amounts of input for changes
		// (whereas my simple addition is kak since it can't detect 0-1 -> 1-0)
		WORD r = 55665;
		DWORD sum = 0;

		const BYTE* last = buf + dwLen;
		while(buf < last) {
			BYTE value = *buf++;
			BYTE cipher = (value ^ (r >> 8));
			r = (cipher + r) * 52845 + 22719; // magic constants
			sum += cipher;
		}

		return sum;
	}

public:
	Checkers()
	{
		// lay down 12 pieces for each side
		memset(m_board, 0, sizeof(m_board));
		int i;
		for(i=1; i<=13; i++)
			if(i != 9)
				m_board[i] = 1;

		for(i=23; i<=35; i++)
			if(i != 27)
				m_board[i] = -1;

#ifdef _DEBUG
		int k_;
		i = CountCheckers(1, k_);
		ATLASSERT(CHEKERS==i);
		i = CountCheckers(0, k_);
		ATLASSERT(CHEKERS==i);
#endif

		m_cycle = 0;
	}

	Checkers(const Checkers& other) // copy constructor
	{
		memcpy(m_board, other.m_board, sizeof(m_board));
		m_cycle = 0;
	}

	void FlipBoard(const Checkers& orig) // create a board with changed sides
	{
		ATLASSERT(this != &orig);
		// move all pieces from one end to the other, changing signs too
		for(int i=1; i<ARRAYSIZE(m_board); i++)
			m_board[i] = -orig.m_board[ARRAYSIZE(m_board)-i]; // @@@ there's an assymetry here
		ATLASSERT(IsValid());
	}

	BOOL IsValid() const
	{
		ATLASSERT(!m_board[0]); // these are all auxiliary
		ATLASSERT(!m_board[9]);
		ATLASSERT(!m_board[18]);
		ATLASSERT(!m_board[27]);

		int i;
		for(i=0; i<ARRAYSIZE(m_board); i++)
			ATLASSERT(m_board[i] >= -2 && m_board[i] <= 2);

		return TRUE; // unless assertions blow
	}

	int GameEnded() const
	{
#if 0
		int p,k;
		p = CountCheckers(1, k);
		if(p + k == 0)
			return REWARD_LOSE; // X lost
		p = CountCheckers(0, k);
		if(p + k == 0)
			return REWARD_WIN; // X won
#endif

		// make do with a single loop
		int i,px=0, po=0;
		ATLASSERT(IsValid());

		for(i=1; i<ARRAYSIZE(m_board); i++) { 
			int s = (int)m_board[i];
			if(0==s)
				continue;

			// don't care about kings, any pawn will do
			if(s > 0) {
				px++;
				if(po)
					break; // both have pawns
			}
			else {
				po++;
				if(px)
					break;
			}
		}

		if(!px)
			return REWARD_LOSE; // X lost
		if(!po)
			return REWARD_WIN; // X won

		if(m_cycle >= MAX_IDLE)
			return -2; // draw

		// blocked move not detected here (player who can't play loses)
		return 0; // don't know yet
	}

	// plain checkers move only forward 1 place (unless jumping)
	// if they reach the end, they are transformed into kings
	// a king still moves one square at a time, but can go back and forth
	int CountCheckers(BOOL forX, int& kings) const
	{
		ATLASSERT(IsValid());

		int i, cnt = 0;
		kings = 0;
		for(i=1; i<ARRAYSIZE(m_board); i++) { 
			int s = (int)m_board[i];
			if(forX) {
				if(s > 0) {
					if(1==s)
						cnt++;
					else
						kings++;
				}
			}
			else {
				if(s < 0) {
					if(-1==s)
						cnt++;
					else
						kings++;
				}
			}
		}

		ATLASSERT(cnt+kings <= CHEKERS);
		return cnt;
	}

	BOOL IsKing(int c) const
	{
		return m_board[c] > 1 || m_board[c] < -1;
	}

	BOOL IsValidSlot(int c) const
	{
		if(c <=0 || c >= ARRAYSIZE(m_board))
			return FALSE;

		// 9-18-27 also out
		return (c % 9);
	}

	// ANN input vector mapping
	void FillNetInputs(NN_NUMF* fin/*[NN_INPUT]*/, BOOL forX) const
	{
#ifdef _DEBUG
		NN_NUMF* f_ = fin;
#endif
		memset(fin, 0, sizeof(NN_NUMF)*NN_INPUT);

		int i;
		// all inputs are 0-1, so scaling isn't an issue
		for(i=1; i<ARRAYSIZE(m_board); i++) {
			if( (i%9)==0 )
				continue;
			char aux = m_board[i], k = IsKing(i);
			if(aux)
#if NN_INPUT > 100
				if(aux < 0)
					fin[2+k] = 1;
				else
					fin[k] = 1;

			fin += 4;
#else // optional halve the inputs using -1 for other player on same spot?
				if(k)
					fin[1] = aux;
				else
					fin[0] = aux;
			fin += 2;
#endif
		}

		*fin++ = forX != 0; // turn @@@ other bit for player2?

#ifdef AUGMENT_NN
		// extended features, not quite unsupervised learning!
		int px = 0, kx = 0, po = 0, ko = 0, x=0,o=0;  // threatened (or rather attacking)
		for(i=1; i<ARRAYSIZE(m_board); i++) {
			char aux = m_board[i];
			if(!aux)
				continue;

			if(aux < 0) {
				if(-1==aux)
					po++;
				else
					ko++;
			}
			else {
				if(1==aux)
					px++;
				else
					kx++;
			}

			if(CanJump(i)) {
				// order of play not taken into account
				if(aux > 0)
					x++;
				else
					o++;
			}
		}

		*fin++ = 0.5*(px - po); // just differences
		*fin++ = 0.5*(kx - ko);
		*fin++ = (x - o);
#endif

		ATLASSERT(fin - f_ == NN_INPUT);
	}

	NN_NUMF GetValueEstimate(/*const*/fann* net, BOOL forComp) const // estimated or actual worth
	{
		//ATLASSERT(!GameEnded());
		int go = GameEnded();
		if(go) {
			if(-2==go)
				return REWARD_DRAW;
			else {
				//ATLASSERT( (go>0)==forComp);
				return go;
			}
		}

		NN_NUMF fin[NN_INPUT];
		FillNetInputs(fin, forComp);
		ATLASSERT(NN_INPUT==net->num_input);

		NN_NUMF* out = fann_run(net, fin); 
		ATLASSERT(NN_OUT==net->num_output);
		ATLASSERT(1==NN_OUT);
		return out[0];
	}

	// extract linear features: pieces, kings, threatened
	// @@@ add more position relative stuff in the future
	void ExtractFeatures(NN_NUMF* fbuf/*[FEATURES]*/, BOOL forX) const
	{
#ifdef _DEBUG
		NN_NUMF* s_ = fbuf;
#endif

		int p,k;
		p = CountCheckers(1, k);
		*fbuf++ = 0.5*p; // scaling a bit
		*fbuf++ = k;
		int nx = p+k;
		p = CountCheckers(0, k);
		*fbuf++ = 0.5*p;
		*fbuf++ = k;
		int no = p+k;

		p = 0; k = 0; // threatened (or rather attacking)
		int avX =0, avO=0;
		for(int i=1; i<ARRAYSIZE(m_board); i++) {
			if(!m_board[i])
				continue;

#if FEATURES > 6
			if(m_board[i] > 0)
				avX += i; // some positional info on average
			else
				avO += i;
#endif

			if(CanJump(i)) {
				// order of play not taken into account
				if(m_board[i] > 0)
					p++;
				else
					k++;
			}
		}

		*fbuf++ = p;
		*fbuf++ = k;

// any of these extra features are rubbish, draw down performance against the random player
#if FEATURES > 6
		*fbuf++ = avX/(nx+1); // ensure division works
		*fbuf++ = avO/(no+1);
#endif
#if FEATURES > 8
		*fbuf++ = forX; //makes player worse!
#endif

		ATLASSERT(fbuf-s_ == FEATURES);
	}

	// estimate linear rule value function given current weights
	NN_NUMF EstFeatureValue(const NN_NUMF* wt/*FEATURES*/, const NN_NUMF* pfeat) const
	{
		NN_NUMF sum = 0;
		for(int i=0; i<FEATURES; i++)
			sum += pfeat[i]*wt[i]; // inner product

		return sum;
	}

	// forced moves
	void Show1(BOOL clear=TRUE) const
	{
		ATLASSERT(IsValid());
		// to clear screen with console API see KB 99261 (FillConsoleOutputCharacter)
		if(clear)
			system("cls");

		int i, ln, c=35;
		printf("  ABCDEFGH \n");
		printf(" ----------\n");
		for(ln = 1; ln<=8; ln++) {
			printf("%d|", ln);
			for(i=1; i<=4; i++) {
				if(ln & 1)
					printf("\x1b[100m \x1b[0m");
				if(m_board[c] > 0) {
					if(IsKing(c))
						printf("\x1b[92mX\x1b[0m"); // green to differ
					else
						printf("X");
				}
				else if(m_board[c] < 0) {
					if(IsKing(c))
						printf("\x1b[92mO\x1b[0m");
					else
						printf("O");
				}
				else
					printf(" ");
				c--;
				if(!(ln & 1))
					printf("\x1b[100m \x1b[0m");
			}
			if(27==c || 18==c || 9==c)
				c--;
			printf("|\n");
		}
		printf(" ----------\n");

#ifdef _DEBUG
		printf("\npress a key\n");
		//getch(); // pause
#endif
	}

	// how many alternative moves are there?
	// change vector has position, +-4 moves then MOVE_BRK, double MOVE_BRK to terminate series
	// note each piece has different number of moves, so the vector isn't equal in parts
	int Transitions(BOOL forX, char *change/*[MAX_SUCC]*/) const
	{
		ATLASSERT(!GameEnded());
		
		// scan all pawns for possible movements
		char forced[MAX_SUCC], *prb = change;
		int i, j, regular, cnt, cf = 0;

#ifdef _DEBUG
		i = CountCheckers(forX, j);
		ATLASSERT(i+j); // still has pawns?
#endif

		for(i=1; i<ARRAYSIZE(m_board); i++) {
			cnt = m_board[i];
			if(0==cnt || (cnt>0) != forX)
				continue;
			cnt = CanJump(i, prb+1, &regular);
			if(!cnt)
				continue;
			// are there any capturing moves?
			if( (1<<cnt)-1 != regular) {
				forced[cf++] = i; // copy only the jumps
				for(j=0; j<cnt; j++) {
					if( (1<<j) & regular)
						continue;
					forced[cf++] = prb[j+1]; // may have subsequent jumps after this
				}
				forced[cf++] = MOVE_BRK;
			}
			else if(!cf) {
				*prb = i;
				prb += cnt+1;
				*prb++ = MOVE_BRK;
			}
		}

		// count transitions, either forced or normal
		ATLASSERT(prb - change < MAX_SUCC);
		if(cf) {
			memcpy(change, forced, cf);
			prb = change+cf;
		}
		else if(prb==change)
			return 0; // no transitions

		*prb = MOVE_BRK; // double terminal
		prb = change;

		cnt = 0;
		while(1) {
			prb++;
			while(*prb++ != MOVE_BRK)
				cnt++;
			if(MOVE_BRK == *prb)
				break;
		}

		if(cf)
			return -cnt; // hint for forced
		return cnt; // moves from all pawns
	}

	// after you have all transitions, use this one to choose among them
	// balances greedy and random exploration modes
	// when AI moves, we choose the one with maximum utility, else the minimum one
	// after the end of training use rnd100==0 to play the best move
	NN_NUMF ChooseTransition(int nt, const char *moves, const NN_NUMF* weights,
		int rnd100=RND_EXPLORE, fann* net=0)
	{
		ATLASSERT((net!=0) ^ (weights!=0)); // either linear or ANN supplied

		BOOL forX = m_board[moves[0]] > 0; 
		int ibest = -1;
		NN_NUMF bval = forX ? -REWARD_MAX : REWARD_MAX;
		NN_NUMF state[FEATURES];
		int neq=0, equivalent[CHEKERS*4]; // any identical value moves?

		ATLASSERT(nt);
		BOOL jump = nt < 0; // compulsory jump
		if(jump)
			nt = -nt;
		int i = 0;

		if(rnd100 && nt > 1 && (rand() % 100) <= rnd100) {
			i = rand() % nt;
			nt = i + 1; // only one move examined
		}

		for( ; i<nt; i++) {
			// move vector is somehow "compressed", unwind and load m_mvbuf
			FindMoveInVect(moves, i);
			if(jump)
				ExpandJumps(); // if any

			ATLASSERT( (m_board[m_mvbuf[0]] > 0) == forX); // all for the same player
			Checkers c2(*this);
			c2.ImplementMove(m_mvbuf);
#ifdef _DEBUG
			if(0) 
				c2.Show1(0);
#endif

			NN_NUMF v;
			if(net)
				v = c2.GetValueEstimate(net, !forX);
			else {
				c2.ExtractFeatures(state, !forX); // OPPOSITE since we evaluate next board state
				v = c2.EstFeatureValue(weights, state);
			}

			// 1-step lookahead
			if( (forX && v > bval) || (!forX && v < bval)) { // minimax
				ibest = i;
				bval = v;
				neq = 0;
			}
			else if(!jump) {
				// idle moves are many times the same, so pick among them at random
				if( (forX && v == bval) || (!forX && v == bval) )
					equivalent[neq++] = i;
			}
		}

		if(neq) {
			ATLASSERT(!net); // very unusual to have equalities here?

			// @@@ i don't understand why this on leads to X wins all the time even against itself?
			//		if i turn this off, naturally against itself it's always draw
			equivalent[neq++] = ibest;
			ibest = equivalent[rand() % neq];
		}

		// optimal move in m_mvbuf
		if(nt > 1) {
			FindMoveInVect(moves, ibest);
			if(jump)
				ExpandJumps(); // if any
		}
		else
			ATLASSERT(ibest==0);

		return bval; 
	}

	// play-time (no learning) lookahead for improved performance
	// lookahead length (horizon) is how many full moves ahead (both players)
	NN_NUMF ChooseRollout(int nt, const char *moves, const NN_NUMF* weights, fann* net, int horz=1)
	{
		ATLASSERT((net!=0) ^ (weights!=0)); // either linear or ANN supplied

		if(1==nt || -1==nt)
			return ChooseTransition(nt, moves, weights, 0, net);

		horz *= 2; // counts plies now

		BOOL jump = nt < 0; // compulsory jump
		if(jump)
			nt = -nt;

		BOOL forX = m_board[moves[0]] > 0; 
		ATLASSERT(forX); // proof of concept only, for AI(x) play
		ATLASSERT(horz >= 1);

		// evaluate all moves, keep the top X
		int i, j;
		NN_NUMF state[FEATURES], val[CHEKERS*4];
		ATLASSERT(nt <= ARRAYSIZE(val));
		for(i=0; i<nt; i++) {
			FindMoveInVect(moves, i);
			if(jump)
				ExpandJumps(); // if any

			Checkers c2(*this);
			c2.ImplementMove(m_mvbuf);
			if(jump) {
				j = c2.GameEnded();
				ATLASSERT(j >= 0);
				if(j > 0)
					return REWARD_WIN; // m_mvbuf filled ok
			}
			else
				ATLASSERT(!c2.GameEnded()); // we have 2+ options, so GOv unlikely

			if(net)
				val[i] = c2.GetValueEstimate(net, !forX);
			else {
				c2.ExtractFeatures(state, !forX); // OPPOSITE since we evaluate next board state -> NOP since turn isn't used for features!
				val[i] = c2.EstFeatureValue(weights, state);
			}
		}

		// examine top moves in terms of future performance
#define ROLLOUTS 5
		int ibest[ROLLOUTS];
		int rmax = nt > ROLLOUTS ? ROLLOUTS : nt;
		if(rmax < nt) {
			for(i=0; i<rmax; i++) {
				NN_NUMF top = -REWARD_MAX;
				for(j=0; j<nt; j++) {
					if (val[j] > top) { // only max considered
						top = val[j];
						ibest[i] = j;
						ATLASSERT(top < REWARD_MAX);
					}
				}

				val[ibest[i]] = -REWARD_MAX; // won't be picked again
			}
		}
		else for(i=0; i<rmax; i++)
			ibest[i] = i;

		// keep checksums to avoid cycling to previously examined boards
		// there's less of a problem with ANN estimator, but still...
		DWORD chks[1024], nCheck = 1;
		chks[0] = UGHChecksumBuf(m_board);

		//unlike backgammon, here we don't have dice, so just straight tree search without averaging
		// whatever value is found best down the road is used to override original choice
		int opt = -1;
		for(i=0; i<rmax; i++) {
			FindMoveInVect(moves, ibest[i]);
			if(jump)
				ExpandJumps(); // if any

			// since there's some equivalence of board values and randomness, do repeated searches & average out
			RunningAvg ave;
			j = net ? ROLLOUTS-1 : 0;
			for(; j<ROLLOUTS; j++) {
				Checkers c2(*this);
				c2.ImplementMove(m_mvbuf);
				// NOTE: we don't care revisiting states in separate rollouts, only for a single rollout we track
				nCheck = 1;
				NN_NUMF aux = c2.RolloutCore(weights, net, horz, !forX, chks, nCheck);
				if(aux >= REWARD_MAX) {
					val[ibest[i]] = aux;
					break;
				}
				ave.Add1(aux);
			}
			if(j==ROLLOUTS)
				val[ibest[i]] = ave.avg;
			if(-1==opt || val[ibest[i]] > val[opt]) {
				opt = ibest[i];
				if(val[opt] >= REWARD_MAX)
					break; // won!
			}
		}

		FindMoveInVect(moves, opt);
		if(jump)
			ExpandJumps(); // if any
		return opt;
	}

	NN_NUMF RolloutCore(const NN_NUMF *wt, fann* net, int horz, BOOL forX, DWORD *visited, DWORD nCheck)
	{
		ATLASSERT(horz > 0);
		// we don't expand full tree, just follow the optimal path
		// it is a bit like monte carlo, only we don't go all the way to the end of game
		// so no further recursion is needed

		char moves[MAX_SUCC];
		NN_NUMF v1 = REWARD_LOSE;
		while(1) {
			DWORD checksum = UGHChecksumBuf(m_board);
			// make sure we don't go back and forth in cycles
			ATLASSERT(nCheck < 1024); // how deep can you go?
			for(int i=0; i<nCheck; i++) {
				if(checksum==visited[i]) {
					//ATLASSERT(!net);
					return v1; // @@@ dubious, last value registered
				}
			}
			visited[nCheck++] = checksum;

			int nt = Transitions(forX, moves);
			if(!nt)
				return forX ? -REWARD_MAX : REWARD_MAX;

			v1 = ChooseTransition(nt, moves, wt, 0/*no rnd*/, net);
			ImplementMove();
#ifdef _DEBUG
			if(0)
				Show1();
#endif

			int go = GameEnded();
			if(go) {
				if(-2==go)
					return REWARD_DRAW;
				return go*REWARD_MAX;
			}

			horz--;
			if(!horz) {
				ATLASSERT(forX);
				return v1; // value from deepest node is final
			}
			forX = !forX;
		}
	}

	// move something at random unless we need to eat an opponent
	BOOL RandomMove(BOOL forX) 
	{
		char moves[MAX_SUCC];
		int cnt = Transitions(forX, moves);
		if(!cnt)
			return FALSE; // game lost
		if(cnt < 0) { // captures exist
			// do the first one
			FindMoveInVect(moves, 0);
			ExpandJumps(); // if any
		}
		else
			FindMoveInVect(moves, rand() % cnt);

		ImplementMove();
		return cnt;
	}

	// usually 1 slot moved, but could be jumping 2 or more items
	void ImplementMove(const char* moves = 0)
	{
		if(!moves)
			moves = m_mvbuf;
		ATLASSERT(IsValid());
		ATLASSERT(MOVE_BRK > ARRAYSIZE(m_board));
		if(MOVE_BRK == *moves) {
			ATLASSERT(0); // no moves?
			return;
		}

		m_cycle++;
		BOOL forX = m_board[moves[0]] > 0; // who moves
		BOOL kong = IsKing(moves[0]);
		char target;
#ifdef _DEBUG
		BOOL ate1_ = 0, st_ = moves[0];
#endif
		while(1) {
			ATLASSERT(m_board[moves[0]]);
			ATLASSERT((m_board[moves[0]] > 0) == forX);
			ATLASSERT(IsValidSlot(moves[0]));

			// +-5 goes top-left, +-4 goes top-right (or reverse)
			ATLASSERT(abs(moves[1])==4 || abs(moves[1])==5);
			target = moves[0] + moves[1];
			ATLASSERT(IsValidSlot(target));
#ifdef _DEBUG
			if(!kong) { // plain pawns can only go forward
				if(forX)
					ATLASSERT(moves[1] > 0);
				else
					ATLASSERT(moves[1] < 0);
			}
#endif
			if(m_board[target]) { // jump over opponent
				ATLASSERT( (m_board[target] > 0) != forX);
				m_board[target] = 0; // opponent wiped out
				m_cycle = 0; // some action at last!
				ATLASSERT(++ate1_);
				target += moves[1];
				ATLASSERT(IsValidSlot(target));
				ATLASSERT(!m_board[target]);
			}

			m_board[target] = m_board[moves[0]];
			m_board[moves[0]] = 0;
			if(!kong) { // a new king is born?
				if( (forX && target >= 32) || (!forX && target <= 4) ) {
					kong = 1;
					m_board[target] *= 2;
					m_cycle = 0;
				}
			}

			moves += 2;
			if(*moves == MOVE_BRK)
				break;
		}

#ifdef _DEBUG
		if(0)
			Show1();
#endif

		// assert end position isn't allowed to jump again
#ifdef _DEBUG
		if(ate1_ && CanJump(target))
			ate1_ = 1;//@@@ this is rare if we end up in starting position
#endif
		//ATLASSERT(!ate1_ || !CanJump(target) || target==st_);
		ATLASSERT(IsValid());
	}

//protected: // internal implementation
	// can we eat opponent's piece from this position?
	// optionally returns the allowable directions of jumps
	BOOL CanJump(int c, char* ps = 0, BOOL* plainMoves2 = 0) const
	{
		ATLASSERT(m_board[c]);
		BOOL forX = m_board[c] > 0; // who moves
		BOOL kong = IsKing(c);

		if(plainMoves2) {
			ATLASSERT(ps);
			*plainMoves2 = 0; // bitmap of plain moves, if any
		}

		char shift[4] = {4,5,-4,-5}, n, i;
		if(kong)
			n = 4; // can go anywhere
		else {
			n = 2;
			if(!forX) {
				shift[0] = -4;
				shift[1] = -5;
			}
		}

		BOOL cando = 0;
		for(i=0; i<n; i++) {
			int target = c + shift[i];
			if(!IsValidSlot(target))
				continue;
			if(!m_board[target]) {
				if(plainMoves2) {
					*plainMoves2 |= 1 << cando++;
					if(ps)
						*ps++ = shift[i];
				}
				continue;
			}
			if( (m_board[target] > 0)==forX)
				continue;
			// is there an empty slot for the jump?
			target += shift[i];
			if(!IsValidSlot(target))
				continue;
			if(!m_board[target]) {
				if(!ps)
					return TRUE; // details not required
				cando++;
				*ps++ = shift[i];
			}
		}

		return cando; // counts jumps too
	}

	// find said move in array, counting from #0
	char* FindMoveInVect(const char* moves, int num)
	{
		while(1) {
			ATLASSERT(num >=0);

			int c = 1;  // first is the cell, then directions
			while(moves[c] != MOVE_BRK)
				c++;
			ATLASSERT(c > 1);
			c--;
			if(num < c) {
				// translate into move understood by ImplementMove
				m_mvbuf[0] = moves[0];
				m_mvbuf[1] = moves[num+1];
				m_mvbuf[2] = MOVE_BRK;
				ATLASSERT(IsValidSlot(m_mvbuf[0]+m_mvbuf[1]));
				return m_mvbuf;
			}

			moves += c + 2;
			num -= c;
			if(*moves == MOVE_BRK)
				break;
		}

		ATLASSERT(0);
		return 0;
	}

	// see if capturing move can be extended
	BOOL ExpandJumps()
	{
		// basic move in mvbuf already
		ATLASSERT(MOVE_BRK == m_mvbuf[2]);
		ATLASSERT(CanJump(m_mvbuf[0]));
#ifdef _DEBUG
		DWORD cks_ = UGHChecksumBuf(m_board);
#endif

		int x = 0, done = 0;
		char *prb = m_mvbuf;
		char pawn = m_board[m_mvbuf[0]]; // who moves
		char save_ini = pawn, start_pos = m_mvbuf[0];
		m_board[start_pos] = 0; // for really roundabout jumping
		char cycle[CHEKERS], nc=0; // make sure we don't cycle jump
		BOOL kong = IsKing(start_pos);
		while(!done) {
			cycle[nc++] = prb[0];
			char target = prb[0] + prb[1];
			ATLASSERT(IsValidSlot(target));
			char save = m_board[target];
			ATLASSERT(save);
			m_board[target] = 0; // avoid jumping back and forth in case of kings

			char t2 = target + prb[1]; // jump over to empty slot
			if(!kong) { // did it become king just now?
				if( (pawn>0 && t2 >= 32) || (pawn<0 && t2 <= 4) ) {
					kong = 1;
					pawn *= 2; // interim change
				}
			}
			ATLASSERT(IsValidSlot(t2));
			ATLASSERT(!m_board[t2]);
			m_board[t2] = pawn;

			char aux[4], i;
			// make sure we don't jump to a previous location
			for(i=0; i<nc; i++)
				if(t2==cycle[i])
					break;
			if(i==nc && CanJump(t2, aux)) {
				// pick the first one @@@ what if more exist?
				prb += 2;
				prb[0] = t2;
				prb[1] = aux[0];
				prb[2] = MOVE_BRK;
				x++;
			}
			else
				done = 1;

			m_board[target] = save;
			m_board[t2] = 0;
		}

		m_board[start_pos] = save_ini;

		ATLASSERT(prb+3-m_mvbuf <= ARRAYSIZE(m_mvbuf));
		ATLASSERT(UGHChecksumBuf(m_board) == cks_); // unchanged
		return x;
	}
};

// train a neural network for value function estimator
// @@@ option to train against linear player
void train_ann(fann*& net, int max_games=NN_TRAINING)
{
	printf("training ANN FN with self-play...\n");
	clock_t tStart = clock();

	if(!net) { // create a new one
		net = fann_create_standard(3, NN_INPUT, NN_HID, NN_OUT);
		fann_set_activation_function_hidden(net, F_ACT);
		
#ifdef STEEPNESS
		fann_set_activation_steepness_output(net, STEEPNESS);
		fann_set_activation_steepness_hidden(net, STEEPNESS);
#endif

		// this is required because our data outputs are -1/1
		ATLASSERT(F_ACT==FANN_SIGMOID_SYMMETRIC);
		fann_set_activation_function_output(net, F_ACT);
		fann_set_training_algorithm(net, /*FANN_TRAIN_RPROP*/FANN_TRAIN_INCREMENTAL); // one by 1, not batch

#ifdef NN_RNDMAX
		//fann_init_weights(net, data); // this is better but relies on data we don't have
		fann_randomize_weights(net, -NN_RNDMAX, NN_RNDMAX); // default weights -0.1-0.1 
#endif
#ifdef NN_LRATE
		fann_set_learning_rate(net, NN_LRATE);
#endif
#ifdef NN_MOMENTUM
		fann_set_learning_momentum(net, NN_MOMENTUM);
#endif
	}
	// else continue training

	RunningAvg avlen;
	for(int g=1; g<=max_games; g++) {
		int turn = rand() & 1; // who plays first, X=1, O=0
		int i, ply = 0;

		if(g % 10 == 0)
			printf(".");

		Checkers test;
		char moves[MAX_SUCC];
		BOOL done = 0;

		NN_NUMF v, v1, reward, fin[NN_INPUT];
		// @@@ what if state change includes the opponent move?

		while(1) {
			ply++; // statistic only

			//v = test.GetValueEstimate(net, turn);
			test.FillNetInputs(fin, turn);

			// choose e-greedy action for next state
			int cnt = test.Transitions(turn, moves);
			if(!cnt) {
				reward = turn ? REWARD_LOSE : REWARD_WIN; //rewards are from X-perspective
				done = 1; // can't move, you lose!
			}
			else {
				v1 = test.ChooseTransition(cnt, moves, 0, RND_EXPLORE, net);
					// @@@ ramp exploration?
					//(TRAINING-g)*(RND_EXPLORE - RND_EXPLORE_MIN)/TRAINING + RND_EXPLORE_MIN, net);

				test.ImplementMove();
				i = test.GameEnded();
				if(i) {
					if(-2==i)
						reward = REWARD_DRAW;
					else {
						reward = i;
						ATLASSERT( (i>0)==turn);
					}
					done = 1;
				}
				else {
					ATLASSERT(test.GetValueEstimate(net, !turn)==v1);
				}
			}

			// learning rule: Dw = a*(target-prediction)*dP/dw (least squares objective)
			// of course now that's buried in the backpropagation
			// target - output == [R+gam*V(s1) - V(s)] -> both include the same predictor!
			// reward R is zero except for the very end of the game; v1 is 0 for terminal state
			v = done ? reward : DISCNT*v1; // target
			fann_train(net, fin, &v);

			if(done)
				break;

			turn = !turn;
			//v = v1; -> use the new weights
			//memcpy(state, s1, sizeof(state));
		}

		avlen.Add1(ply);
		// every now and then check weight conditioning
		if(g % 1000 == 0)
		{
			printf("\n%d (ply=%d) ", g, (int)avlen.avg);
			fann_stats(net);
			avlen.Reset();
		}
	}

	printf("training on %d self games complete in %d secs\n", g, 
		(clock()-tStart)/CLOCKS_PER_SEC);
}

// use self-play to learn a linear feature rule using TD(0) RL
// using TD(0) means we don't need to wait till the end of the game to assign weights
// @@@ alternatively try monte-carlo and save game states (features)
void train_linear(NN_NUMF *wt/*FEATURES*/, int max_games=TRAINING, BOOL reset_weights=1)
{
	printf("training linear FN with self-play...\n");
	clock_t tStart = clock();

	int i;
#ifdef NN_RNDMAX
	ATLASSERT(NN_RNDMAX > 0);
	if(reset_weights) // option to continue earlier learning
		for(i=0; i<FEATURES; i++)
			wt[i] = (((NN_NUMF)rand() / RAND_MAX)*2 - 1.0)*NN_RNDMAX;
#endif

	RunningAvg avlen;
	for(int g=1; g<=max_games; g++) {
		int turn = rand() & 1; // who plays first, X=1, O=0
		int ply = 0;

		if(g % 10 == 0)
			printf(".");

		Checkers test;
		char moves[MAX_SUCC];
		BOOL done = 0;

		NN_NUMF state[FEATURES], s1[FEATURES], v, v1, reward = 0;

		while(1) {
			ply++; // statistic only

			test.ExtractFeatures(state, turn);
			v = test.EstFeatureValue(wt, state);

			// choose e-greedy action for next state
			int cnt = test.Transitions(turn, moves);
			if(!cnt) {
				reward = turn ? REWARD_LOSE : REWARD_WIN; //rewards are from X-perspective
				done = 1; // can't move, you lose!
			}
			else {
				v1 = test.ChooseTransition(cnt, moves, wt);
					// @@@ ramp exploration?
					// ,(TRAINING-g)*(RND_EXPLORE - RND_EXPLORE_MIN)/TRAINING + RND_EXPLORE_MIN

				test.ImplementMove();
				i = test.GameEnded();
				if(i) {
					if(-2==i)
						reward = REWARD_DRAW;
					else {
						reward = i;
						ATLASSERT( (i>0)==turn);
					}
					done = 1;
				}
				else {
#ifdef _DEBUG
					test.ExtractFeatures(s1, !turn); // other guy's turn for state
					ATLASSERT(test.EstFeatureValue(wt, s1)==v1);
#endif
				}
			}

			// learning rule: Dw = a*(target-prediction)*dP/dw (least squares objective)
			//		= a*[R+gam*V(s1) - V(s)]*xf, where xf is the feature that corresponds to weight
			// reward R is zero except for the very end of the game

			if(done) {
				// v1 is 0 for terminal state
				for(i=0; i<FEATURES; i++)
					wt[i] += NN_LRATE*(reward - v)*state[i];
				break;
			}

			for(i=0; i<FEATURES; i++)
				// reward is zero for intermediate states
				wt[i] += NN_LRATE*(/*0 +*/ DISCNT*v1 - v)*state[i];

			turn = !turn;
			//v = v1; -> use the new weights
			//memcpy(state, s1, sizeof(state));
		}

		avlen.Add1(ply);
		// every now and then check weight conditioning
		if(g % 1000 == 0)
		{
			printf("\n%d (ply=%d) ", g, (int)avlen.avg);
			for(i=0; i<FEATURES; i++)
				printf("%.3g ", wt[i]);
			printf("\n");
			avlen.Reset();
		}
	}

	printf("training on %d self games complete in %d secs\n", g, 
		(clock()-tStart)/CLOCKS_PER_SEC);
}

// convert from A1 to cell number
// ABC... are columns left to right, and 123 rows, top to bottom
int A2cell(char* token)
{
	char ch = *token++;
	if(ch > 'Z')
		ch += 'A' - 'a'; // uppercase

	if(ch < 'A' || ch > 'H')
		return 0;
	ch -= 'A' - 1;

	int num = atoi(token);
	if(num < 1 || num > 8)
		return 0;

	// exclude unused cells
	if( (ch & 1) && (num & 1))
		return 0;
	if( (ch & 1)==0 && (num & 1)==0)
		return 0;

	int cell = (8-num)*4 + (8-num)/2;
	ch = 9-ch;
	if(ch & 1)
		ch++;
	cell += ch/2;
	if((cell % 9)==0)
		cell++;

	return cell;
}

// from XBOARD to Ax
char* cell2A(int c)
{
	static char buf[3] = {0};

	// @@@ i'm sure there's a smarter way to do this but i can't be arsed to find it
	int num = c/4;
	int aux = c - num/2; // bring it close to 32 maximum
	num = aux/4;
	if(aux % 4)
		num++;
	if(32==c)
		num = 8; // exception

#if 0
	if(c > 27)
		aux = c - 3;
	else if(c > 18)
		aux = c - 2;
	else if(c > 9)
		aux = c - 1;
	else
		aux = c;
#endif
	aux = c - c/9;

	int let = aux % 4;
	if(!let)
		let =4;
	let = 2*let;
	if(!(num & 1))
		let--;
	let = 8-let;
	let += 'A';
	num = 9-num;
	sprintf(buf, "%c%d", let, num);
	return buf;
}

int main(int argc, char* argv[])
{
	InitRnd();

	int horizon = 0;
	if(argc > 1) // lookahead horizon supplied? (0= no lookahead, easier opponent)
		horizon = atoi(argv[1]);

	// 6-feature player percent AI won over rnd player 93 (drawn=3)
	NN_NUMF wt2[FEATURES] = {0.214, 0.149, -0.2, -0.242, 0.0487, -0.0623};
	NN_NUMF wt[FEATURES] = {0.216, 0.183, -0.243, -0.174, 0.0604, -0.0689}; //99% wins?
	NN_NUMF* pwtOpp = wt2; // play one linear against the other
	fann* net = 0;

	char moves[MAX_SUCC];
	char buf[100];
	int num[10];

	char filename[MAX_PATH];
	sprintf(filename, "test_%d-%d-%d-%g-%g-%d=%d-%g.net", 
		NN_INPUT, NN_HID, NN_OUT, 
		NN_LRATE, STEEPNESS, RND_EXPLORE, NN_TRAINING, DISCNT);

	//train_linear(wt); //uncomment this to enable self-training
	net = fann_create_from_file(filename);
	if(!net)
		train_ann(net); // neural net training

	BOOL manual = 1; // enable for AI vs human play (else AI vs AI2)

	// try learner against itself or random player
	int won_ai = 0, draw = 0;

	for(int g=1; g<=100; g++) {
		Checkers test;
		int turn = rand() & 1; // X first? try it both ways

		if(manual) {
			test.Show1();
			printf("YOU play for O against AI(X)\n");
			printf("use coordinates to specify your move (e.g. B3,C4)\n");
			printf("press any key to start (first=%c)\n", turn? 'X' : 'O');
			getch();
		}

		while(1) {
			int ate;
			if(turn) {
				ate = test.Transitions(turn, moves);
				if(ate) {
					if(horizon)
						test.ChooseRollout(ate, moves, net ? 0 : wt, net, horizon); // with lookahead
					else
						test.ChooseTransition(ate, moves, net ? 0 : wt, 0/*no rnd*/, net);
					test.ImplementMove();
				}
			}
			else if(manual) { // computer vs human
				ate = test.Transitions(turn, moves);
				if(ate) {
retry:		
					printf("Your(O) move: ");
					scanf("%s", buf); // with space separator it doesn't work!
					
					char* token = strtok( buf, ",");
					ATLASSERT(token); // at worse returns the lot!
					int i = 0, j;
					while(token) {
						//ATLASSERT(strlen(token)==2);
						num[i++] = A2cell(token);
						// Get next token: (mangles buf in the process)
						token = strtok( NULL, ",");
					}
					ATLASSERT(i); // even empty string converts to 0

					BOOL ok = 0;
					if(i != 2)
						printf("please enter from/to coordinates separated by comma e.g. B3,C4\n");
					else if(test.m_board[num[0]] >= 0)
						printf("please choose a cell with O pawn\n");
					else {
#if 0 // troubleshoot this bastard
						for(i=35; i>0; i--) {
							if((i%9)==0)
								continue;
							printf("%d - %s\n", i, cell2A(i));
						}
#endif
						// is it a valid move?
						char jumps[4];
						int regular;
						i = test.CanJump(num[0], jumps, &regular);
						if(!i)
							printf("this pawn cannot move\n");
						else {
							num[2] = num[1] - num[0]; // offset, must be +-4/5
							j = i;
							while(i--)
								if(jumps[i]==num[2])
									break;
							if(-1==i) {
								printf("valid destinations for this pawn are: ");
								for(i=0; i<j; i++) {
									ATLASSERT(test.IsValidSlot(num[0] + jumps[i]));
									printf("%s ", cell2A(num[0] + jumps[i]));
								}
								printf("\n");
							}
							else if(ate < 0 && (regular & (1<<i))) {
								printf("there are forced moves to play, e.g. %s,", 
									cell2A(moves[0]));
								printf("%s\n", cell2A(moves[0]+moves[1])); // shared static buffer
							}
							else {
								//FindMoveInVect(moves, ibest);
								test.m_mvbuf[0] = num[0];
								test.m_mvbuf[1] = num[2];
								test.m_mvbuf[2] = MOVE_BRK;
								if( (regular & (1<<i)) ==0)
									test.ExpandJumps();

								test.ImplementMove();
								ok = TRUE;
							}
						}
					}

					if(!ok)
						goto retry;
				}
			}
			else if(pwtOpp) { // different opponent
				ATLASSERT(pwtOpp != wt); // if you want linear vs linear
				ate = test.Transitions(turn, moves);
				if(ate) {
					test.ChooseTransition(ate, moves, pwtOpp, 0/*no rnd*/);
					test.ImplementMove();
				}
			}
			else
				ate = test.RandomMove(turn);

			if(manual)
				test.Show1();
			if(!ate) {
				printf("%c cannot move - LOSER!\n", turn ? 'X' : 'O');
				if(!turn)
					won_ai++;
				break;
			}

			//if(ate < 0) check in all cases in case of cycling
			ate = test.GameEnded();
			if(ate) {
				if(-2==ate) {
					draw++;
					printf("max idle moves, DRAW\n");
				}
				else {
					ATLASSERT((ate > 0)==turn);
					printf("%c WON!\n", turn ? 'X' : 'O');
					if(turn)
						won_ai++;
				}
				break;
			}

			turn = !turn;
		}

		if(!manual)
			test.Show1();
		int p,k;
		p = test.CountCheckers(1, k);
		printf("X left pawns=%d, kings=%d\n", p,k);
		p = test.CountCheckers(0, k);
		printf("O left pawns=%d, kings=%d\n", p,k);

		if(manual)
			break; // just one game
	}

	if(!manual)
		printf("percent AI won over rnd player %d (drawn=%d)\n",100*won_ai/g, 100*draw/g);
	//t4.FlipBoard(test);
	//t4.Show1();

	if(net) {
		fann_save(net, filename);
		fann_destroy(net); /* chucks memory leaks! */
	}
//	TSEKIT_LEAKZ();
	return 0;
}
