kwai · JXu97 · Nov 29, 2022 · Nov 29, 2022
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/DouOne.iml b/.idea/DouOne.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/douzero/dmc/models.py b/douzero/dmc/models.py
@@ -80,9 +80,9 @@ def forward(self, z, x, return_value=False, flags=None):
 
 # Model dict is only used in evaluation but not training
 model_dict = {}
-model_dict['landlord'] = LandlordLstmModel
-model_dict['landlord_up'] = FarmerLstmModel
-model_dict['landlord_down'] = FarmerLstmModel
+model_dict['landlord'] = GeneralModel
+model_dict['landlord_up'] = GeneralModel
+model_dict['landlord_down'] = GeneralModel
 
 class Model:
     """
@@ -93,9 +93,9 @@ def __init__(self, device=0):
         self.models = {}
         if not device == "cpu":
             device = 'cuda:' + str(device)
-        self.models['landlord'] = LandlordLstmModel().to(torch.device(device))
-        self.models['landlord_up'] = FarmerLstmModel().to(torch.device(device))
-        self.models['landlord_down'] = FarmerLstmModel().to(torch.device(device))
+        self.models['landlord'] = GeneralModel().to(torch.device(device))
+        self.models['landlord_up'] = GeneralModel().to(torch.device(device))
+        self.models['landlord_down'] = GeneralModel().to(torch.device(device))
 
     def forward(self, position, z, x, training=False, flags=None):
         model = self.models[position]
@@ -119,3 +119,85 @@ def get_model(self, position):
 
     def get_models(self):
         return self.models
+
+# Added from https://github.com/Vincentzyx/Douzero_Resnet/blob/main/douzero/dmc/models.py
+
+# 用于ResNet18和34的残差块，用的是2个3x3的卷积
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv1d(in_planes, planes, kernel_size=(3,),
+                               stride=(stride,), padding=1, bias=False)
+        self.bn1 = nn.BatchNorm1d(planes)
+        self.conv2 = nn.Conv1d(planes, planes, kernel_size=(3,),
+                               stride=(1,), padding=1, bias=False)
+        self.bn2 = nn.BatchNorm1d(planes)
+        self.shortcut = nn.Sequential()
+        # 经过处理后的x要与x的维度相同(尺寸和深度)
+        # 如果不相同，需要添加卷积+BN来变换为同一维度
+        if stride != 1 or in_planes != self.expansion * planes:
+            self.shortcut = nn.Sequential(
+                nn.Conv1d(in_planes, self.expansion * planes,
+                          kernel_size=(1,), stride=(stride,), bias=False),
+                nn.BatchNorm1d(self.expansion * planes)
+            )
+
+    def forward(self, x):
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)
+        out = F.relu(out)
+        return out
+
+
+class GeneralModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.in_planes = 80
+        #input 1*54*41
+        self.conv1 = nn.Conv1d(40, 80, kernel_size=(3,),
+                               stride=(2,), padding=1, bias=False) #1*27*80
+
+        self.bn1 = nn.BatchNorm1d(80)
+
+        self.layer1 = self._make_layer(BasicBlock, 80, 2, stride=2)#1*14*80
+        self.layer2 = self._make_layer(BasicBlock, 160, 2, stride=2)#1*7*160
+        self.layer3 = self._make_layer(BasicBlock, 320, 2, stride=2)#1*4*320
+        # self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+        self.linear1 = nn.Linear(320 * BasicBlock.expansion * 4 + 15 * 4, 1024)
+        self.linear2 = nn.Linear(1024, 512)
+        self.linear3 = nn.Linear(512, 256)
+        self.linear4 = nn.Linear(256, 1)
+
+    def _make_layer(self, block, planes, num_blocks, stride):
+        strides = [stride] + [1] * (num_blocks - 1)
+        layers = []
+        for stride in strides:
+            layers.append(block(self.in_planes, planes, stride))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def forward(self, z, x, return_value=False, flags=None, debug=False):
+        out = F.relu(self.bn1(self.conv1(z)))
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = out.flatten(1,2)
+        out = torch.cat([x,x,x,x,out], dim=-1)
+        out = F.leaky_relu_(self.linear1(out))
+        out = F.leaky_relu_(self.linear2(out))
+        out = F.leaky_relu_(self.linear3(out))
+        out = F.leaky_relu_(self.linear4(out))
+        if return_value:
+            return dict(values=out)
+        else:
+            if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
+                action = torch.randint(out.shape[0], (1,))[0]
+            else:
+                action = torch.argmax(out,dim=0)[0]
+            return dict(action=action, max_value=torch.max(out))
+
+
+