diff --git a/Schafkopf.Training/Algos/MDP.cs b/Schafkopf.Training/Algos/MDP.cs index 49658cd..083be7c 100644 --- a/Schafkopf.Training/Algos/MDP.cs +++ b/Schafkopf.Training/Algos/MDP.cs @@ -32,14 +32,13 @@ public void Collect(PPORolloutBuffer buffer) for (int gameId = 0; gameId < numGames + 1; gameId++) { - Console.Write($"\rcollecting ppo training data { gameId+1 } / { numGames+1 } ... "); + Console.Write($"\rcollecting data { gameId+1 } / { numGames+1 } "); playGame(envs, states, batchesOfTurns); prepareRewards(states, rewards); fillBuffer(gameId, buffer, states, batchesOfTurns, rewards); for (int i = 0; i < states.Length; i++) states[i] = envs[i].Reset(); } - Console.WriteLine(); } diff --git a/Schafkopf.Training/Algos/PPOAgent.cs b/Schafkopf.Training/Algos/PPOAgent.cs index 4e9e791..9ee2809 100644 --- a/Schafkopf.Training/Algos/PPOAgent.cs +++ b/Schafkopf.Training/Algos/PPOAgent.cs @@ -38,16 +38,16 @@ public PPOModel Train(PPOTrainingSettings config) for (int ep = 0; ep < config.NumTrainings; ep++) { + Console.WriteLine($"epoch {ep+1}"); exps.Collect(rollout); model.Train(rollout); - if ((ep + 1) % 10 == 0) - { - model.RecompileCache(batchSize: 1); - double winRate = benchmark.Benchmark(agent); - model.RecompileCache(batchSize: config.BatchSize); - Console.WriteLine($"epoch {ep}: win rate vs. random agents is {winRate}"); - } + model.RecompileCache(batchSize: 1); + double winRate = benchmark.Benchmark(agent); + model.RecompileCache(batchSize: config.BatchSize); + + Console.WriteLine($"win rate vs. random agents: {winRate}"); + Console.WriteLine("--------------------------------------"); } return model; @@ -150,7 +150,7 @@ public void Train(PPORolloutBuffer memory) int i = 1; foreach (var batch in batches) { - Console.Write($"\rtraining {i++} / {numBatches}"); + Console.Write($"\rtraining {i++} / {numBatches} "); updateModels(batch); } Console.WriteLine(); diff --git a/Schafkopf.Training/Benchmark/RandomPlayBenchmark.cs b/Schafkopf.Training/Benchmark/RandomPlayBenchmark.cs index 51c3a08..13afd5b 100644 --- a/Schafkopf.Training/Benchmark/RandomPlayBenchmark.cs +++ b/Schafkopf.Training/Benchmark/RandomPlayBenchmark.cs @@ -21,6 +21,7 @@ public double Benchmark(ISchafkopfAIAgent agentToEval, int epochs = 10_000) int wins = 0; for (int i = 0; i < epochs; i++) { + Console.Write($"\rbenchmarking {i+1} / {epochs} "); var log = session.ProcessGame(); // info: only evaluate games where cards were played @@ -31,6 +32,7 @@ public double Benchmark(ISchafkopfAIAgent agentToEval, int epochs = 10_000) bool isWin = !eval.DidCallerWin ^ isCaller; wins += isWin ? 1 : 0; } + Console.WriteLine(); return (double)wins / epochs; // win rate }