CartPole

2025-09-11 12:35:02 +08:00
parent ce22f0568d
commit 5d7bdc0e39
3 changed files with 60 additions and 12 deletions
--- a/Pytorch/Project/CartPole/TestPthModule.py
+++ b/Pytorch/Project/CartPole/TestPthModule.py
@@ -0,0 +1,46 @@
+import torch
+import gymnasium as gym
+import torch.nn as nn
+import torch.nn.functional as F
+
+# 定义和训练时一样的网络结构
+class DQN(nn.Module):
+    def __init__(self, state_size, action_size):
+        super(DQN, self).__init__()
+        self.l1 = nn.Linear(state_size, 128)
+        self.l2 = nn.Linear(128, 32)
+        self.l4 = nn.Linear(32, action_size)
+
+    def forward(self, x):
+        x = F.relu(self.l1(x))
+        x = F.relu(self.l2(x))
+        x = self.l4(x)
+        return x
+
+
+# 创建环境 (带渲染)
+env = gym.make("CartPole-v1", render_mode="human")
+
+# 初始化模型并加载权重
+state_size = env.observation_space.shape[0]
+action_size = env.action_space.n
+model = DQN(state_size, action_size)
+model.load_state_dict(torch.load("cartpole_dqn_success.pth"))
+model.eval()
+print("已加载模型 cartpole_dqn.pth")
+
+# 测试
+for ep in range(5):  # 测试 5 回合
+    state, _ = env.reset()
+    done = False
+    total_reward = 0
+    while not done:
+        with torch.no_grad():
+            action = model(torch.tensor(state, dtype=torch.float32)).argmax().item()
+        state, reward, terminated, truncated, _ = env.step(action)
+        done = terminated or truncated
+        total_reward += reward
+        env.render()
+    print(f"Episode {ep+1} reward = {total_reward}")
+
+env.close()