From ca7da80427a892c77953a316ee09a47992a1da35 Mon Sep 17 00:00:00 2001
From: tianzikang <tianzikang@gpu-t04.future.cn>
Date: Tue, 30 Aug 2022 17:38:51 +0800
Subject: [PATCH] add optimizer selection

---
 README.md | 44 +++++++++++++++++++++++++++++++++++++++++---
 learn.py  |  7 +++----
 main.py   |  3 ++-
 model.py  | 11 ++++++++---
 4 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 5328f2f..3e344ea 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,46 @@
 # QMIXRNN
-Referring to pymarl, qmix is implemented with RNN to cope with SMAC environment
+Referring to pymarl, qmix is implemented clearly with RNN to cope with SMAC environment.
+This clear implementation can help you figure out how does QMIX work  
 
 ## Run
-`python main.py --map-name=3s5z`
+Note: --optimizer=0/1 means that optimizer `Adam` and `RMSprop` is good on this scenario, please just select one of both when running
+`python main.py --map-name=3s5z --optimizer=0/1`
+`python main.py --map-name=1c3s5z --optimizer=0/1`
+`python main.py --map-name=2s3z --optimizer=0/1`
+`python main.py --map-name=8m --optimizer=0/1`
+`python main.py --map-name=2s_vs_1sc --optimizer=0`
+`python main.py --map-name=3m --optimizer=0`
+`python main.py --map-name=10m_vs_11m --optimizer=0`
 
 ## TODO
-Now this code can deal with some easy scenarios like 2s3z, 3s5z, 3m, 8m, and I'm trying to approach the result of pymarl. At the same time, I'm also trying to achieve some tricks on this code like multi step TD target and so on. 
\ No newline at end of file
+Now this code can do very good on part of easy scenarios like 1c3s5z, 2s3z, 3s5z and 8m, 
+and relative good on easy scenarios like 2s_vs_1sc and 3m,
+but not good on easy scenarios 10m_vs_11m.
+
+I'm trying to approach the result of pymarl. At the same time, I'm also trying to achieve some tricks on this code like multi step TD target and so on. 
+
+## Reference
+@inproceedings{rashid2018qmix,
+  title={Qmix: Monotonic value function factorisation for deep multi-agent reinforcement learning},
+  author={Rashid, Tabish and Samvelyan, Mikayel and Schroeder, Christian and Farquhar, Gregory and Foerster, Jakob and Whiteson, Shimon},
+  booktitle={International conference on machine learning},
+  pages={4295--4304},
+  year={2018},
+  organization={PMLR}
+}
+
+@article{samvelyan19smac,
+  title = {{The} {StarCraft} {Multi}-{Agent} {Challenge}},
+  author = {Mikayel Samvelyan and Tabish Rashid and Christian Schroeder de Witt and Gregory Farquhar and Nantas Nardelli and Tim G. J. Rudner and Chia-Man Hung and Philiph H. S. Torr and Jakob Foerster and Shimon Whiteson},
+  journal = {CoRR},
+  volume = {abs/1902.04043},
+  year = {2019},
+}
+
+@article{samvelyan19smac,
+  title = {{The} {StarCraft} {Multi}-{Agent} {Challenge}},
+  author = {Mikayel Samvelyan and Tabish Rashid and Christian Schroeder de Witt and Gregory Farquhar and Nantas Nardelli and Tim G. J. Rudner and Chia-Man Hung and Philiph H. S. Torr and Jakob Foerster and Shimon Whiteson},
+  journal = {CoRR},
+  volume = {abs/1902.04043},
+  year = {2019},
+}
\ No newline at end of file
diff --git a/learn.py b/learn.py
index c830bfe..9b1d65d 100644
--- a/learn.py
+++ b/learn.py
@@ -43,7 +43,6 @@ def qmix_learning(
     is_share_para,
     is_evaluate,
     q_func,
-    optimizer,
     learning_rate,
     exploration,
     max_training_steps=1000000,
@@ -113,10 +112,10 @@ def qmix_learning(
             gamma=gamma, 
             replay_buffer_size=replay_buffer_size, 
             episode_limits=episode_limit,
-            batch_size=batch_size, 
-            optimizer=optimizer, 
+            batch_size=batch_size,  
             learning_rate=learning_rate,
-            grad_norm_clip=grad_norm_clip
+            grad_norm_clip=grad_norm_clip,
+            args=args
     )
 
     #############
diff --git a/main.py b/main.py
index 9d98cef..fa06d0b 100644
--- a/main.py
+++ b/main.py
@@ -40,6 +40,8 @@ def get_args():
     parser.add_argument('--evaluate-num', type=int, default=32)
     # store hyper parameters
     parser.add_argument('--store-hyper-para', type=int, default=True)
+    # optimizer
+    parser.add_argument('--optimizer', type=int, default=0, help="0: Adam--[3m, 2s_vs_1sc]; 1: RMSprop--[others]")
 
     return parser.parse_args()
 
@@ -66,7 +68,6 @@ def main(args=get_args()):
         is_evaluate=args.is_evaluate,
         evaluate_num=args.evaluate_num,
         q_func=QMIX_agent,
-        optimizer=optim.RMSprop,
         learning_rate=args.learning_rate,
         exploration=exploration_schedule,
         max_training_steps=args.training_steps,
diff --git a/model.py b/model.py
index 6ba4653..2033ae9 100644
--- a/model.py
+++ b/model.py
@@ -148,9 +148,9 @@ def __init__(
             replay_buffer_size=5000, 
             episode_limits=60,
             batch_size=32, 
-            optimizer=torch.optim.RMSprop, 
             learning_rate=3e-4,
             grad_norm_clip=10,
+            args=None
         ) -> None:
         super(QMIX_agent, self).__init__()
         assert multi_steps == 1 and is_per == False and is_share_para == True, \
@@ -178,8 +178,13 @@ def __init__(
         
         self.params = list(self.Q.parameters())
         self.grad_norm_clip = grad_norm_clip
-        # RMSProp alpha:0.99, RMSProp epsilon:0.00001
-        self.optimizer = optimizer(self.params, learning_rate, alpha=0.99, eps=1e-5)
+        if args.optimizer == 0:
+            # Adam: 3m, 2s_vs_1sc
+            self.optimizer = torch.optim.Adam(self.params, learning_rate)
+        elif args.optimizer == 1:
+            # RMSProp alpha:0.99, RMSProp epsilon:0.00001
+            self.optimizer = torch.optim.RMSprop(self.params, learning_rate, alpha=0.99, eps=1e-5)
+        
         self.MseLoss = nn.MSELoss(reduction='sum')
 
         # Consturct buffer