-
Notifications
You must be signed in to change notification settings - Fork 0
/
reinforcement_learning.js
699 lines (549 loc) · 31.6 KB
/
reinforcement_learning.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
// mlStrategy.js
//const tf = require('@tensorflow/tfjs');
const tf = require('@tensorflow/tfjs-node'); //back-end version of tensorflow
//import * as tf from '@tensorflow/tfjs';
//import * as tf from '@tensorflow/tfjs-node';
//import os from 'os';
const os = require('os');
// // Strategies for ML
// const COOPERATEML = 0;
// const DEFECTML = 1;
// LSTM (Long Short-Term Memory) Layers:
// In most practical applications of the iterative prisoner's dilemma,
// especially if you're considering strategies that depend on the history of moves,
// an LSTM-based approach would likely be more adept. This allows the model to learn and
// make decisions based on the pattern of previous interactions, which is a key element of
// most successful strategies in the prisoner's dilemma game.
//
// NOTE!!! I WANT TO TRY TO START SIMPLE WITH A DENSE LAYERS APPROACH
//
const {
COOPERATE,
DEFECT,
COOPERATEML,
DEFECTML
} = require('./config.js');
const {
logTensor,
logMemory,
heapSizeInMB,
tfMemLog
} = require('./memoryManagement.js');
async function initializeMLStrategy() {
try {
let mlStrategy = await MLStrategy.create();
// Now you can use mlStrategy instance
// ... rest of your code ...
return mlStrategy;
} catch (error) {
console.error("Failed to create MLStrategy instance:", error);
return null;
}
}
class MLStrategy {
constructor(mlMemoryLength, numberOfInputNodes, numberOfOutputNodes, numberOfHiddenNeurons, discountFactor) {
this.possibleActions = [COOPERATE, DEFECT];
this.mlMemoryLength = mlMemoryLength;
this.numberOfInputNodes = numberOfInputNodes; // Last X rounds × 2 players
this.numberOfOutputNodes = numberOfOutputNodes;
this.numberOfHiddenNeurons = numberOfHiddenNeurons;
this.model = null;
this.discountFactor = discountFactor;
this.isTraining = false; // Flag to indicate if training is in progress
this.epsilon = 0;
this.epsilonCount = 0;
this.totalTimeForLast1000Decisions = 0;
this.countDecisionsCoop = 0;
}
static async create() {
const mlMemoryLength = 5; //how many previous decisions we pass to the ML for training
const numberOfInputNodes = 2 * mlMemoryLength; // Last X rounds × 2 players
const numberOfOutputNodes = 2; // Cooperate or Defect
const numberOfHiddenNeurons = 6; // Units are "neurons" for a hidden layer
// LEARN NOTES
// Rule of Thumb: A common starting point is to use a number of units that is somewhere
// between the size of the input layer and the size of the output layer.
// For example, if you have an input layer of size 20 and an output layer of size 2,
// you might start with a hidden layer of 10-15 units.
// Machine learning often involves experimentation. Start with a reasonable guess like 4-5 units,
// train your model, evaluate its performance, and adjust the number of units based on the results.
// If your model is underfitting (performing poorly on training data), you might need more units.
// If it’s overfitting (performing well on training data but poorly on unseen data),
// reducing the number of units might help.
const discountFactor = 0.9; // 0.9 is a common choice, can be adjusted based on specific needs
// The discount factor, usually denoted as γ (gamma), is used to balance immediate and future rewards.
// It represents the importance of future rewards compared to immediate ones.
// Value Range: It ranges between 0 and 1. A value close to 0 makes the agent
// short-sighted by only considering immediate rewards, while a value close to 1 makes
// the agent far-sighted by valuing future rewards more.
const model = new MLStrategy(mlMemoryLength, numberOfInputNodes, numberOfOutputNodes, numberOfHiddenNeurons, discountFactor);
await model.CreateModel();
return model;
}
async CreateModel() {
// first version
// this.model = tf.sequential({
// layers: [
// tf.layers.dense({ units: this.numberOfUnits, inputShape: [this.numberOfInputNodes], activation: 'relu' }),
// tf.layers.dense({ units: this.numberOfOutputNodes })
// ]
// });
// Non-LSTM layers like Dense layers don't work with the concept of timesteps.
// They just expect [samples, features]. If you were using a Dense layer directly on your data
// without an LSTM, your input shape would indeed be [2 * mlMemoryLength] because you're essentially
// flattening the sequence of moves into a single array.
// second version, tweaking learningRates
// try learningRates = [0.01, 0.001, 0.0001];
// Set a specific learning rate (normally it is not required)
//const learningRate = 0.0001; // Example value, you can change this for each run
// Create an optimizer with the specified learning rate
//const optimizer = tf.train.adam(learningRate);
//this.model.compile({ loss: 'meanSquaredError', optimizer: 'adam' });
//this.model.compile({ loss: 'meanSquaredError', optimizer: optimizer });
// third version adding a 2nd hidden layer
// units originally were suggested 64 and 32, I changed it to this.numberOfUnits * 2 and this.numberOfUnits
this.model = tf.sequential();
this.model.add(tf.layers.dense({
units: this.numberOfHiddenNeurons * 2, // Number of neurons
inputShape: [this.numberOfInputNodes], // Input shape
activation: 'relu',
biasInitializer: 'zeros' // Weight initializer for bias
})); // First hidden layer
//this.model.add(tf.layers.dense({units: this.numberOfHiddenNeurons, activation: 'relu'})); // Second hidden layer
this.model.add(tf.layers.dense({units: this.numberOfOutputNodes})); // Output layer: 2 units for cooperate (0) and defect (1)
this.model.compile({ loss: 'meanSquaredError', optimizer: tf.train.adam() });
// this.model = tf.sequential();
// this.model.add(tf.layers.dense({units: 64, activation: 'relu', inputShape: [inputShape]}));
// this.model.add(tf.layers.dense({units: 2, activation: 'softmax'}));
// this.model.compile({optimizer: 'adam', loss: 'categoricalCrossentropy', metrics: ['accuracy']});
// try {
// // Attempt to load the model
// const modelName = 'model-memory-length-${this.mlMemoryLength}';
// const modelPath = 'file://./models/${modelName}/model.json';
// this.model = await tf.loadLayersModel(modelPath);
// console.log("Model loaded successfully.");
// // Compile the model after loading
// this.model.compile({ loss: 'meanSquaredError', optimizer: 'adam' });
// } catch (error) {
// // If loading fails, create a new model
// console.log("Model not found. Creating a new one.");
// this.model = tf.sequential({
// layers: [
// tf.layers.dense({ units: this.numberOfUnits, inputShape: [this.numberOfInputNodes], activation: 'relu' }),
// tf.layers.dense({ units: this.numberOfOutputNodes })
// ]
// });
// this.model.compile({ loss: 'meanSquaredError', optimizer: 'adam' });
// }
}
async saveModel(model) {
const modelName = 'model-memory-length-${this.mlMemoryLength}';
const savePath = 'file://./models/${modelName}';
await model.save(savePath);
console.log(`Model saved to ${savePath}`);
}
preprocessData (opponentHistory, myHistory, veryVerbose) {
// Convert rawData into a format suitable for training
// Normalize data if necessary
// if (mode === 'strict') { //in this case, I assume it's training and the length is already accurate
// }
// Input validation
if (!Array.isArray(opponentHistory) || !Array.isArray(myHistory)) {
throw new Error('Invalid input: opponentHistory and myHistory must be arrays.');
}
// if (opponentHistory.length === 0 && myHistory.length === 0) {
// let resetStatePattern;
// if (this.denseLayers) {
// // resetStatePattern for dense (with inputShape: [this.numberOfInputNodes])
// resetStatePattern = Array(this.numberOfInputNodes).fill(-1); // Reset state pattern
// }
// if (this.lstmLayers) {
// // resetStatePattern for LSTM (with inputShape: [this.mlMemoryLength, 2])
// resetStatePattern = Array(this.mlMemoryLength).fill([-1, -1]); // Using [-1, -1] for each timestep
// }
// console.log ('[preprocessData] new game - sending resetStatePattern: ${JSON.stringify(resetStatePattern)}');
// return resetStatePattern; //send reset pattern to the model when new game starts
// }
// const isMyHistoryFirst = true;
// const mostRecentMoveFirst = true;
// console.log ('[preprocessData] before trim myHistory: ${myHistory.length} opponentHistory: ${opponentHistory.length}')
const myHistoryRightLength = this.trimAndPadHistory (myHistory);
const opponentHistoryRightLength = this.trimAndPadHistory (opponentHistory);
// console.log ('[preprocessData] after trim myHistory: ${myHistoryRightLength.length} opponentHistory: ${opponentHistoryRightLength.length}')
// Check lengths of trimmed and padded histories
if (myHistoryRightLength.length !== this.mlMemoryLength || opponentHistoryRightLength.length !== this.mlMemoryLength) {
throw new Error(`Error in history lengths: myHistory is ${myHistoryRightLength.length}, opponentHistory is ${opponentHistoryRightLength.length}`);
}
let mappedFeatures;
// Create alternating array
let features = [];
// we simply concatenate myHistory and opponentHistory
features = features.concat(myHistoryRightLength, opponentHistoryRightLength);
// for (let i = 0; i < this.mlMemoryLength; i++) {
// const index = mostRecentMoveFirst ? this.mlMemoryLength - 1 - i : i;
// if (isMyHistoryFirst) {
// features.push(myHistoryRightLength[index], opponentHistoryRightLength[index]);
// } else {
// features.push(opponentHistoryRightLength[index], myHistoryRightLength[index]);
// }
// }
// Map COOPERATE to COOPERATEML and DEFECT to DEFECTML
mappedFeatures = features.map(feature => {
if (feature === COOPERATE) {
return COOPERATEML;
} else if (feature === DEFECT) {
return DEFECTML;
} else {
throw new Error('Invalid feature value: ' + feature);
}
})
// Check final array length
if (mappedFeatures.length !== this.numberOfInputNodes) {
throw new Error(`Error: Expected features array of length ${this.numberOfInputNodes}, but got ${mappedFeatures.length}`);
}
const howMuchToPrint = 10;
if (veryVerbose) {
console.log (`[preprocessData] myHistory: ${myHistory.slice(-howMuchToPrint).join("")} opponentHistory: ${opponentHistory.slice(-howMuchToPrint).join("")} mappedFeatures: ${JSON.stringify(mappedFeatures)} `)
}
return mappedFeatures;
}
trimAndPadHistory(history) {
// Set the desired length and padding value internally
const desiredLength = this.mlMemoryLength;
const padValue = COOPERATE;
// Pad history if it's shorter than the desired length
const paddingLength = Math.max(desiredLength - history.length, 0);
const paddedHistory = Array(paddingLength).fill(padValue).concat(history);
// Trim the history to the desired length
const historyTrimmedAndPadded = paddedHistory
.slice(-desiredLength) // Ensure the history is not longer than desired length
return historyTrimmedAndPadded;
}
async makeDecision(opponentHistory, myHistory, decisionCount, exploreFlag, veryVerbose) {
const initialEpsilon = 1.0; // Start with 100% exploration
const minEpsilon = 0.01; // Minimum value of epsilon (% of exploration)
if (myHistory.length === 0) {
this.epsilon = initialEpsilon
if (veryVerbose) {
console.log (`[makeDecision] myHistory empty - start new game - exploreFlag: ${exploreFlag} epsilon: ${this.epsilon}`);
}
// start of a new game
this.epsilonCount = 0;
this.countDecisionsCoop = 0;
//mlStrategy = await MLStrategy.create();
}
const howOftenToLog = 10;
//console.log ('[makeDecision] decisionCount: ${decisionCount} epsilon: ${epsilon}')
logMemory (tf, 'makeDecision', 'before decision', decisionCount, 1000, this.epsilon, false) //false disable logging
// you can introduce randomness in the agent’s actions during the training phase to encourage exploration.
// This is a common practice in reinforcement learning known as epsilon-greedy strategy.
// In the epsilon-greedy strategy, the agent will mostly take the action that it believes
// is the best according to its current knowledge (exploitation), but with a probability of epsilon,
// it will take a random action (exploration). This allows the agent to learn about the consequences
// of different actions in various states.
// this was the original exploration code, but I want to try a different approach:
// calculate the prediction first and then if we randomly decide to explore,
// we return the opposite. This sounds a lot more like real exploration.
// we will still not "start a fight" - that logic won't change
// Exploration: with a probability of epsilon, take a random action
// if (exploreFlag && Math.random() < epsilon) {
// if (opponentHistory.length === 0 || !opponentHistory.includes('D')) {
// if (veryVerbose) {
// console.log(`No reason to attack first - random action: C`);
// }
// return ('C');
// } else {
// // Update epsilon
// if (epsilon > minEpsilon) {
// epsilon = Math.max(minEpsilon, epsilon / 2);
// }
// const nextAction = Math.random() < 0.5 ? 'C' : 'D';
// console.log(`epsilon: ${epsilon} random action: ${nextAction}`);
// return (nextAction);
// }
// }
const startTime = Date.now(); // Start time measurement
const stateReadyForML = this.preprocessData(opponentHistory, myHistory, veryVerbose);
// NOTE!!!! I TRY TO SWITCH TO A TRY / CATCH approach and instead of having
// on avg 20 tensors at any given time, I ended up with thousands, so I went back to tf.tidy approach
// but the issue (ever growing numBytes) appears with both tidy and try / catch approach
// update: I found the bug!!! the issue was I was keeping size on input in check in processTrainingQueue
// but not in TrainModel so the size of each training set kept growing and growing
// now numBytes is always small but the heap keeps growing anyway - keep digging!
const prediction = tf.tidy( () => {
// Exploitation: otherwise, take the action that the model predicts to be the best
// Make a decision based on the current model
// If bothHistoriesConcatReadyForML is a single input instance (i.e., a flat array representing one sequence of moves),
// you should wrap it in an additional array [] to create a batch with one instance.
let inputTensor;
inputTensor = tf.tensor2d([stateReadyForML]);
logTensor (inputTensor, 'makeDecision', 'inputTensor', false); //false disable logging
// registerTensor(inputTensor, 'inputTensor');
// Check tensor shape based on model architecture
if (inputTensor.shape[0] !== 1 || inputTensor.shape[1] !== this.numberOfInputNodes) {
throw new Error(`[Dense Layers] Unexpected tensor shape. Expected [1,${this.numberOfInputNodes}], received ${inputTensor.shape}`);
}
// Perform the prediction
const rawPrediction = this.model.predict(inputTensor);
if (veryVerbose) {
console.log(`[makeDecision] rawPrediction: ${JSON.stringify(rawPrediction)}`);
// console.log(`[makeDecision] rawPrediction: ${rawPrediction.argMax(1)}`);
}
logMemory (tf, 'makeDecision', 'inside try', decisionCount, 1000, true) //false disable logging
return rawPrediction.argMax(1);
});
// Extract the data from the tensor
const decision = await prediction.dataSync()[0];
if (veryVerbose) {
console.log(`[makeDecision] expected decision: ${decision}`);
}
// Dispose of the prediction tensor if not already handled by tf.tidy
if (prediction instanceof tf.Tensor) {
// unregisterTensor(inputTensor);
prediction.dispose();
}
// Measure the time taken for this decision
const timeTaken = Date.now() - startTime;
// Keep track of the total time taken for the last 1000 decisions
this.totalTimeForLast1000Decisions = (this.totalTimeForLast1000Decisions || 0) + timeTaken;
// if (this.totalTimeForLast1000Decisions > 0) {
// console.log ('this.totalTimeForLast1000Decisions: ${this.totalTimeForLast1000Decisions}')
// }
logMemory (tf, 'makeDecision', 'after decision', decisionCount, 1000, true) //false disable logging
// Reset the count and totalp time every howOftenToLog decisions and report average
if (veryVerbose && decisionCount % howOftenToLog === 0) {
// Calculate average time with two decimal places
const avgTimePerDecision = parseFloat((this.totalTimeForLast1000Decisions / howOftenToLog).toFixed(2));
console.log(`[makeDecision] Average time per decision (last ${howOftenToLog}): ${avgTimePerDecision} ms`);
this.totalTimeForLast1000Decisions = 0; // Reset for the next decisions
}
if (exploreFlag && decision === COOPERATEML) {
// we check exploreFlag to avoid counting twice when
// machineLearningOne vs machineLearningOne
this.countDecisionsCoop++;
}
//if (decisionCount % howOftenToLog === 0 || decision === 1) {
if (veryVerbose && decisionCount % howOftenToLog === 0) {
//if (decision === 1) {
const howMuchToPrint = 20;
console.log(`[makeDecision] decisionCount: ${decisionCount} ML history: ${myHistory.slice(-howMuchToPrint).join("")} opponent history: ${opponentHistory.slice(-howMuchToPrint).join("")} decision predicted: ${decision}`);
console.log(`[makeDecision] decisionCount: ${decisionCount} stateReadyForML: ${stateReadyForML} decision: ${decision}`);
console.log(`[makeDecision] decisionCount: ${decisionCount} rate of cooperation: ${Math.round((this.countDecisionsCoop / decisionCount)*100)}%`);
}
let nextAction;
if (decision === DEFECTML) {
// console.log(`[makeDecision] rawPrediction is D!`);
nextAction = DEFECT;
}
else {
nextAction = COOPERATE;
}
// console.log(`opponentHistory.length: ${opponentHistory.length}`);
// if (opponentHistory.length > 0) {
// console.log(`opponentHistory.includes(DEFECT): ${opponentHistory.includes(DEFECT)}`);
// }
// if opponent has used 'D' at least once, we can contemplate exploration
if (opponentHistory.length > 0 && opponentHistory.includes(DEFECT)) {
if (veryVerbose) {
console.log(`we could explore - epsilon: ${this.epsilon}`);
}
if (exploreFlag && Math.random() < this.epsilon) {
// so we know we have the "license" to explore, and we randomly decided to explore
// so we will return the opposite value to the prediction
if (nextAction === COOPERATE) {
nextAction = DEFECT
}
else {
nextAction = COOPERATE
}
if (veryVerbose) {
console.log(`epsilon: ${this.epsilon} random action: ${nextAction}`);
}
// Update epsilon
if (this.epsilon > minEpsilon) {
this.epsilon = Math.max(minEpsilon, this.epsilon / 2);
}
}
}
return nextAction;
// For now, return a random decision
// Assuming 0 for Cooperate, 1 for Defect
// return Math.floor(Math.random() * 2);
}
//async trainModel(data) {
translateToML(moveEnglish) {
let result;
if (moveEnglish === COOPERATE) {
result = COOPERATEML;
} else if (moveEnglish === DEFECT) {
result = DEFECTML;
} else {
throw new Error(`Invalid input: ${lastMove}. Expected either ${COOPERATE} or ${DEFECT}.`);
}
//console.log ('translateToML input ${moveEnglish} output ${result}');
return result;
}
mapFromMLToText(history) {
// Input validation
if (!Array.isArray(history)) {
throw new Error(`[mapFromMLToText] Invalid input: history must be an array.`);
}
const is2DArray = Array.isArray(history[0]);
if (is2DArray) {
// Handling 2D arrays: [this.mlMemoryLength, 2]
return history.map(subArray =>
subArray.map(value => this.mapValueToText(value))
);
} else {
// Handling 1D arrays: [this.numberOfInputNodes]
return history.map(value => this.mapValueToText(value));
}
}
mapValueToText(value) {
if (value === 0) return 'C';
else if (value === 1) return 'D';
else if (value === -1) return 'R';
else return 'Unknown'; // Handling unexpected values
}
mapScoreToML (myMove, opponentMove) {
// to help train the ML, we remap the results of the game
// when the game gives zero point, this has to be mapped as a negative event
// to help train the model correctly
if (myMove === COOPERATE && opponentMove === COOPERATE) {
return 1
}
else if (myMove === COOPERATE && opponentMove === DEFECT) {
return -1
}
else if (myMove === DEFECT && opponentMove === COOPERATE) {
return 2
}
else {
return 0 // both DEFECT
}
}
async trainModel(myHistory, opponentHistory, myCurrentMove, opponentCurrentMove, veryVerbose) {
// using Deep Q-Network (DQN) approach in the context of Iterative Prisoner Dilemma
//console.log(`[trainModel] ${opponentHistory.slice(-this.mlMemoryLength)} ${myHistory.slice(-this.mlMemoryLength)} ${myScoreThisRound} ${myLastMove} ${opponentLastMove}`);
const myCurrentMoveML = this.translateToML (myCurrentMove)
const opponentCurrentMoveML = this.translateToML (opponentCurrentMove)
//console.log(`[trainModel] ${opponentHistory} ${myHistory} ${myScoreThisRound} ${myLastMoveML} ${opponentLastMoveML}`);
if (this.isTraining) {
// If the model is already being trained, skip this call or handle it appropriately
// (SO MUCH EASIER APPROACH than building a queue and keeping track!!!!)
console.log(`[trainModel] Model Training is already in progress. Skipping this call.`);
return;
}
// console.log(`[trainModel] Training starts. Switching flag to true.`);
this.isTraining = true; // Set the flag to indicate training is in progress
const thisStateReadyForML = this.preprocessData(opponentHistory, myHistory, veryVerbose);
//const nextStateReadyForML = this.preprocessData([opponentCurrentMove].concat(opponentHistory), [myCurrentMove].concat(myHistory));
const nextStateReadyForML = this.preprocessData(opponentHistory.concat(opponentCurrentMove), myHistory.concat(myCurrentMove), veryVerbose);
if (veryVerbose) {
console.log (`[trainModel] thisStateReadyForML: ${JSON.stringify(thisStateReadyForML)}`);
console.log (`[trainModel] thisStateReadyForML (mapFromMLToText): ${this.mapFromMLToText(thisStateReadyForML)}`);
console.log (`[trainModel] nextStateReadyForML: ${JSON.stringify(nextStateReadyForML)}`);
console.log (`[trainModel] nextStateReadyForML (mapFromMLToText): ${this.mapFromMLToText(nextStateReadyForML)}`);
}
let currentQs;
let nextMaxQ;
// curentQs is an array of two values (the Q-values for the two actions: 'C' and 'D')
currentQs = this.model.predict(tf.tensor2d([thisStateReadyForML])).dataSync(); //3d for LSTM 2d for dense
// we predict the Q-values for a future state and then extract the maximum Q-value,
// which represents the best estimated reward for the optimal action in that future state.
nextMaxQ = tf.max(this.model.predict(tf.tensor2d([nextStateReadyForML])), 1).dataSync()[0]; //3d for LSTM 2d for dense
if (veryVerbose) {
console.log (`[trainModel] thisState: ${this.mapFromMLToText(thisStateReadyForML)} currentQs: ${currentQs} `);
console.log (`[trainModel] nextState: ${this.mapFromMLToText(nextStateReadyForML)} nextMaxQ: ${nextMaxQ}`);
}
let reward = this.mapScoreToML (myCurrentMove, opponentCurrentMove); // Score remapped for ML
// Initialize targetQ as an array with zeros, the same length as the number of output nodes
//const targetQ = new Array(this.numberOfOutputNodes).fill(0);
// Update the Q-value for the action taken
// const targetQ = currentQs.slice();
let targetQ;
// targetQ is an empty array of dimension numberOfOutputNodes filled with zeroes
targetQ = new Array(this.numberOfOutputNodes).fill(0);
//if (this.lstmLayers) {targetQ = new Array(this.mlMemoryLength).fill([0, 0]);}
//if (this.lstmLayers) { targetQ = currentQs.slice();}
if (targetQ.length !== this.numberOfOutputNodes) {
throw new Error(`Error: Expected targetQ of length ${this.numberOfOutputNodes}, but got: ${JSON.stringify(targetQ)}`);
}
if (veryVerbose) {
console.log (`[trainModel] targetQ before: ${JSON.stringify(targetQ)}`);
}
targetQ[myCurrentMoveML] = reward + this.discountFactor * nextMaxQ;
if (veryVerbose) {
console.log (`[trainModel] targetQ after: ${JSON.stringify(targetQ)}`);
}
const myHistoryString = myHistory.concat(myCurrentMove).slice(-this.mlMemoryLength).join("");
const opponentHistoryString = opponentHistory.concat(opponentCurrentMove).slice(-this.mlMemoryLength).join("");
if (veryVerbose) {
console.log(`[trainModel] myHistoryString: ${myHistoryString} opponentHistoryString: ${opponentHistoryString}`);
console.log(`[trainModel] action: ${myCurrentMoveML} reward: ${reward} this.discountFactor: ${this.discountFactor} nextMaxQ: ${nextMaxQ}`);
}
//console.log(`[trainModel] thisStateReadyForML: ${JSON.stringify(thisStateReadyForML)}`);
//console.log(`[trainModel] targetQ: ${JSON.stringify(targetQ)}`);
//thisStateReadyForML = [0, 1, 0, 1, 0, 1];
//targetQ = [0.5, 0.8]; // Hypothetical target Q-values
//console.log(`[trainModel] DEBUG targetQ: ${JSON.stringify(targetQ)}`);
// const labelTensor = tf.oneHot(tf.tensor1d(combinedLabels, 'int32'), 2);
let inputTensor;
inputTensor = tf.tensor2d([thisStateReadyForML]);
logTensor (inputTensor, 'trainModel', 'inputTensor', false); //false does not log
const labelTensor = tf.tensor2d([targetQ]);
logTensor (labelTensor, 'trainModel', 'labelTensor', false); //false does not log
// NOTE! Cannot be turned into a tf.tidy model due to the async nature of model.fit
try {
await this.model.fit(inputTensor, labelTensor, {
verbose: false,
epochs: 1,
// Training a Deep Q-Network (DQN) with just 1 epoch per update is actually quite common,
// especially in reinforcement learning scenarios where the DQN is updated frequently.
//batchSize: 32, // Adjust batch size based on your preference
callbacks: {
onEpochEnd: (epoch, logs) => {
//console.log(`Epoch ${epoch}: loss = ${logs.loss}`);
const prefix = '[trainModel] [onEpochEnd]';
const heapUsedMB = heapSizeInMB();
if (heapUsedMB > 1000) {
console.log(`${prefix} Memory usage: ${heapUsedMB} MB`);
tfMemLog (tf, prefix, true); //false doesn't execute
}
},
// Remember, lower loss doesn’t always mean a better model.
// It’s also important to validate your model on a separate validation set
// to ensure it’s not overfitting to the training data. Overfitting occurs
// when a model learns the training data too well and performs poorly on unseen data.
onBatchEnd: (batch, logs) => {
const memUsage = process.memoryUsage().heapUsed / 1024 / 1024;
const totalMem = os.totalmem() / 1024 / 1024;
//console.log(`[trainModel] [onBatchEnd] Memory usage ${memUsage / totalMem} ${memUsage} out of ${totalMem}`);
if (memUsage / totalMem > 0.05) {
console.log(`[trainModel] [onBatchEnd] Memory usage is over 5%, stopping training...`);
this.model.stopTraining = true;
}
}
// In TensorFlow.js model.fit() is the method that trains a model for a fixed number
// of epochs (iterations on a dataset). This method returns a History object.
// The training can be stopped at any time by setting the model.stopTraining property to true.
// This is a part of the TensorFlow.js API.
// When you set this.model.stopTraining = true; inside the onBatchEnd callback,
// it signals the training process to stop at the end of that batch.
// This is useful when you have some condition (like memory usage in your case) that,
// when met, should immediately stop the training process.
}
});
} catch (error) {
console.error('Error during training:', error);
} finally {
// console.log(`[trainModel] Training complete. Switching flag back to false.`);
this.isTraining = false; // Reset the flag once training is complete
}
}
}
module.exports = {
MLStrategy, // Export the MLStrategy class
initializeMLStrategy
};
//module.exports.sendNewTrainingData = sendNewTrainingData; // Export the function