QPD/examples/esp32_test/env_cartpole.py

45 lines
967 B
Python

import time
import serial
import numpy as np
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env.dummy_vec_env import DummyVecEnv
env = make_vec_env("CartPole-v1", n_envs=1, vec_env_cls=DummyVecEnv, env_kwargs={"render_mode": "human"})
rewards_list = []
state = env.reset()
steps = 0
rewards = 0
port = serial.Serial("/dev/ttyUSB0", 115200, timeout=1)
def read_to_np():
string = port.readline()
return np.array([int(string)])
def write_ser(cmd):
port.write(cmd)
while(1):
tick = time.time()
write_ser(str(state).encode())
actions = read_to_np()
tock = time.time()
state, reward, dones, info = env.step(actions)
env.render()
#time.sleep(0.02)
rewards += reward
steps += 1
print(1/(tock-tick))
if np.all(dones):
print(steps)
print(info[0]["episode"]["r"])
print(rewards)
print(info)
rewards = 0
steps = 0