import gymnasium as gym import time # 创建环境 env = gym.make("CartPole-v1", render_mode="human") # human模式会用pyglet显示窗口 # 重置环境 observation, info = env.reset() print("初始观察值:", observation) print(":", info) # 随机动作走几步 for step in range(100): observation, reward, terminated, truncated, info = env.step(1) print(f"Step {step+1}: observation={observation}, reward={reward}, done={terminated}") if terminated or truncated: observation, info = env.reset() time.sleep(1) # 关闭环境 env.close()