(Tuto) Update to gegelati 1.4.0

gegelati · Oct 29, 2024 · 5d15f65 · 5d15f65
1 parent bd1bea0
commit 5d15f65
Show file tree

Hide file tree

Showing 5 changed files with 50 additions and 40 deletions.
diff --git a/docs/_pages/index.md b/docs/_pages/index.md
@@ -22,7 +22,7 @@ While being fluent in C++ certainly is an asset to follow this tutorial, bits of
 
 ### C++ Environment:
 This tutorial requires a C++ development environment compatible with the C++17 standard.
-Compatibility of this tutorial was tested with MS Visual Studio Community Edition (MSVC) 2019, and GCC v9.
+Compatibility of this tutorial was tested with MS Visual Studio Community Edition (MSVC) 2022, and GCC v9.
 
 ### Bash environment
 Some scripts embedded in the given files of this tutorial require the availability of a bash environment.
@@ -354,7 +354,7 @@ Their utility will be covered in a more advanced tutorial.
 {% details Solution to #5 (Click to expand) %}
 ```cpp
 /* pendulum_wrapper.cpp */
-void PendulumWrapper::reset(size_t seed, Learn::LearningMode mode)
+void PendulumWrapper::reset(size_t seed, Learn::LearningMode mode, uint16_t iterationNumber, uint64_t generationNumber)
 {
 	this->pendulum.setAngle(M_PI);
 	this->pendulum.setVelocity(0.0);
@@ -397,7 +397,7 @@ _C++ tip:_ The `double fmod(double, double)` method can be used to compute the m
 
 ```cpp
 /* pendulum_wrapper.cpp */
-void PendulumWrapper::reset(size_t seed, Learn::LearningMode mode)
+void PendulumWrapper::reset(size_t seed, Learn::LearningMode mode, uint16_t iterationNumber, uint64_t generationNumber)
 {
 	this->pendulum.setAngle(M_PI);
 	this->pendulum.setVelocity(0.0);
@@ -462,11 +462,13 @@ An example of log is presented hereafter:
 Pendulum TPG training.
                       Train
       Gen   NbVert      Min      Avg      Max  T_mutat   T_eval  T_total
-        0      164 -7624.63 -1383.36  -980.33     0.00     1.51     1.51
-        1      167 -4848.23 -1263.68  -980.33     7.28     1.56    10.38
-        2      170 -6862.46 -1242.03  -844.01     2.63     1.53    14.55
-        3      170 -7707.18 -1181.00  -844.01     7.60     1.48    23.65
-        4      168
+        0      162 -6937.82 -1560.49  -844.54     0.00     1.48     1.50
+        1      167 -6912.39 -1316.13  -770.64    17.64     1.80    20.96
+        2      170 -6937.82 -1330.90  -651.14    17.63     2.04    40.69
+        3      167 -6990.10 -1455.24  -651.14    10.82     2.22    53.78
+        4      166 -6862.46 -1302.98  -651.14     8.17     2.40    64.37
+        5      169 -6990.10 -1326.34  -330.62     8.54     2.45    75.40
+        6      167 -6990.10 -1304.40  -194.47     5.67     2.62    83.74
 ```
 
 The generated logs contain a table that can be exported in the CSV format by giving a file path to the `LABasicLogger` constructor.

diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
@@ -8,7 +8,7 @@
 # - SDL2_image
 # - SDL2_ttf
 
-set(GEGELATI_VERSION "1.3.1")
+set(GEGELATI_VERSION "1.4.0")
 set(GEGELATI_MSVC_ARCHIVE "gegelatilib-msvc-${GEGELATI_VERSION}.zip")
 set(GEGELATI_MSVC_URL "https://github.com/gegelati/gegelati/releases/download/v${GEGELATI_VERSION}/${GEGELATI_MSVC_ARCHIVE}")
 

diff --git a/params.json b/params.json
@@ -1,67 +1,68 @@
 {
 	// Number of recordings held in the Archive.
 	// "archiveSize" : 50, // Default value
-	"archiveSize" : 2000,
+	"archiveSize": 2000,
 	// Probability of archiving the result of each Program execution.
 	// "archivingProbability" : 0.05, // Default value
-	"archivingProbability" : 0.01,
+	"archivingProbability": 0.01,
 	// Boolean used to activate an evaluation of the surviving roots in validation
 	// mode after the training at each generation.
 	// "doValidation" : false, // Default value
-	"doValidation" : false,
+	"doValidation": false,
 	// Maximum number of actions performed on the learning environment during the
 	// each evaluation of a root.
 	// "maxNbActionsPerEval" : 1000, // Default value
-	"maxNbActionsPerEval" : 1500,
+	"maxNbActionsPerEval": 1500,
 	// Maximum number of times a given root is evaluated.After this number is
 	// reached, possibly after several generations, the score of the root will be
 	// fixed, and no further evaluation will be done.
 	// "maxNbEvaluationPerPolicy" : 1000, // Default value
-	"maxNbEvaluationPerPolicy" : 10,
-	"mutation" : 
-	{
-		"prog" : 
-		{
+	"maxNbEvaluationPerPolicy": 10,
+	"mutation": {
+		"prog": {
 			// Maximum constant value possible.
 			// "maxConstValue" : 100, // Default value
-			"maxConstValue" : 10,
+			"maxConstValue": 10,
 			// Maximum number of Line within the Program of the TPG.
 			// "maxProgramSize" : 96, // Default value
-			"maxProgramSize" : 20,
+			"maxProgramSize": 20,
 			// Minimum constant value possible.
 			// "minConstValue" : -10, // Default value
-			"minConstValue" : -10,
+			"minConstValue": -10,
 			// Probability of inserting a line in the Program.
 			// "pAdd" : 0.5, // Default value
-			"pAdd" : 0.5,
+			"pAdd": 0.5,
 			// Probability of each constant to be mutated.
 			// "pConstantMutation" : 0.5, // Default value
-			"pConstantMutation" : 0.5,
+			"pConstantMutation": 0.5,
 			// Probability of deleting a line of the Program.
 			// "pDelete" : 0.5, // Default value
-			"pDelete" : 0.5,
+			"pDelete": 0.5,
 			// Probability of altering a line of the Program.
 			// "pMutate" : 1.0, // Default value
-			"pMutate" : 1.0,
+			"pMutate": 1.0,
+			// Probability of creating a new program.
+			// "pNewProgram" : 0.0, // Default value
+			"pNewProgram": 0.0,
 			// Probability of swapping two lines of the Program.
 			// "pSwap" : 1.0, // Default value
-			"pSwap" : 1.0
+			"pSwap": 1.0
 		},
 		"tpg": {
 			// When a Program is mutated, makes sure its behavior is no longer the same.
 			// "forceProgramBehaviorChangeOnMutation" : false, // Default value
-			"forceProgramBehaviorChangeOnMutation": true,
+			"forceProgramBehaviorChangeOnMutation": false,
+			// Number of root TPGTeams at the initialisation of a TPGGraph.
+			// If 0, if will be init to the number of surviving roots
+			// "nbRoots" : 0, // Default value
+			"initNbRoots": 0,
 			// Maximum number of TPGEdge connected to each TPGTeam of the TPGGraph when
 			// initialized.
 			// "maxInitOutgoingEdges" : 3, // Default value
 			"maxInitOutgoingEdges": 3,
 			// Maximum number of outgoing edge during TPGGraph mutations.
 			// "maxOutgoingEdges" : 5, // Default value
 			"maxOutgoingEdges": 5,
-			// Number of TPGAction vertex of the initialized TPGGraph.
-			// This parameter is generally automatically set by the LearningEnvironment.
-			// /* "nbActions" : 0,*/ // Commented by default
-			/* "nbActions" : 0,*/
 			// Number of root TPGTeams to maintain when populating the TPGGraph
 			// "nbRoots" : 100, // Default value
 			"nbRoots": 150,
@@ -84,22 +85,22 @@
 	},
 	// Number of generations of the training.
 	// "nbGenerations" : 500, // Default value
-	"nbGenerations" : 1200,
+	"nbGenerations": 1200,
 	// [Only used in AdversarialLearningAgent.]
 	// Number of times each job is evaluated in the learning process.
 	// Each root may belong to several jobs, hence this parameter should be lower
 	// than the nbIterationsPerPolicyEvaluation parameter.
 	// "nbIterationsPerJob" : 1, // Default value
-	"nbIterationsPerJob" : 1,
+	"nbIterationsPerJob": 1,
 	// Number of evaluation of each root per generation.
 	// "nbIterationsPerPolicyEvaluation" : 5, // Default value
-	"nbIterationsPerPolicyEvaluation" : 1,
+	"nbIterationsPerPolicyEvaluation": 1,
 	// Number of Constant available in each Program.
 	// "nbProgramConstant" : 0, // Default value
-	"nbProgramConstant" : 0,
+	"nbProgramConstant": 0,
 	// Number of registers for the Program execution.
 	// "nbRegisters" : 8, // Default value
-	"nbRegisters" : 8,
+	"nbRegisters": 8,
 	// [Only used in ParallelLearningAgent and child classes.]
 	// Number of threads used for the training process.
 	// When undefined in the json file, this parameter is automatically set to the
@@ -108,5 +109,5 @@
 	/* "nbThreads" : 0,*/
 	// Percentage of deleted (and regenerated) root TPGVertex at each generation.
 	// "ratioDeletedRoots" : 0.5, // Default value
-	"ratioDeletedRoots" : 0.5
-}
+	"ratioDeletedRoots": 0.85
+}
diff --git a/src/training/pendulum_wrapper.cpp b/src/training/pendulum_wrapper.cpp
@@ -30,7 +30,7 @@ std::vector<std::reference_wrapper<const Data::DataHandler>> PendulumWrapper::ge
 #endif // SOLUTION
 }
 
-void PendulumWrapper::reset(size_t seed, Learn::LearningMode mode)
+void PendulumWrapper::reset(size_t seed, Learn::LearningMode mode, uint16_t iterationNumber, uint64_t generationNumber)
 {
 #ifdef SOLUTION
 	this->pendulum.setAngle(M_PI);

diff --git a/src/training/pendulum_wrapper.h b/src/training/pendulum_wrapper.h
@@ -76,8 +76,15 @@ class PendulumWrapper : public Learn::LearningEnvironment {
 	 * the LearningEnvironment.
 	 * \param[in] mode LearningMode in which the Environment should be
 	 * reset for the next set of actions.
+	 * \param[in] iterationNumber the integer value to indicate the current
+	 * iteration number when parameter nbIterationsPerPolicyEvaluation > 1
+	 * \param[in] generationNumber the integer value to indicate the
+	 * current generation number
 	 */
-	virtual void reset(size_t, Learn::LearningMode) override;
+	virtual void reset(size_t seed,
+                       Learn::LearningMode mode,
+                       uint16_t iterationNumber = 0,
+                       uint64_t generationNumber = 0) override;
 
 	/**
 	 * \brief Execute an action on the LearningEnvironment.