QPD/examples/run_cheetah.py

from qpd.config import Config
from qpd.networks.models.student_six_model import StudentSixModel
from qpd.compressor import Compressor
from huggingface_sb3 import load_from_hub
from stable_baselines3.sac.sac import SAC
from stable_baselines3.a2c.a2c import A2C
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env.dummy_vec_env import DummyVecEnv
from stable_baselines3.common.vec_env.vec_normalize import VecNormalize

from qpd.networks.wrapper.student.fully_connected_student import FCStudentNet

from datetime import datetime

config = {
    "memory": {
        "size": 100000, # Size of memory used for distillation
        "update_frequency": 1,  # Epoch frequency for updating the memory
        "update_size": 10000,  # Minimum update size in steps
        "device": "cpu",

        # Only used with framestacked environments
        "frame_stack_optimization": False,  # Only store last frame

        "check_consistency": True
    },
    "evaluator": {
        "student_driven": True,  # Student decide the transitions in the environment
        "student_test_frequency": 10,  # Epoch frequency
        "episodes": 20,  # Minimum episodes for testing student
        "initialize": 0,  # Amount of actions to skip at beginning of episode
        "ray_workers": 10,  # Parallel ray workers used for updating and testing
        "device": "cpu",
        "deterministic": False
    },
    "compression": {
        "checkpoint_frequency": 2,  # Epoch frequency for saving students
        "epochs": 600,
        "learning_rate": 5e-4,
        "batch_size": 64,
        "device": "cuda",

        # Only used in discrete action spaces
        "T": 0.01,  # Softmax hyperparameter
        "categorical": False,
        "critic_importance": 0.5,

        # Only used in continuous action spaces
        "distribution": "Std",  # Std, Mean
        "loss": "KL"  # KL, Huber, MSE
    },
    "quantization": {
        "enabled": False,
        "bits": 8
    },
    "data_directory": "./data",
    "run_name": "cheetah_no_quant", # Change this for every run
}
#"/home/user/Workspace/University/PhD/Experiments/QPD",


def get_environment_normalized(config: Config):
    env = make_vec_env("HalfCheetah-v3", n_envs=config.evaluator_config.env_workers, vec_env_cls=DummyVecEnv)

    normalize = load_from_hub(
        repo_id="sb3/a2c-HalfCheetah-v3",
        filename="vec_normalize.pkl",
    )
    return VecNormalize.load(normalize, env)

def get_environment(config: Config):
    env = make_vec_env("HalfCheetah-v3", n_envs=config.evaluator_config.env_workers, vec_env_cls=DummyVecEnv)
    return env


if __name__ == "__main__":
    checkpoint = load_from_hub(repo_id="sb3/sac-HalfCheetah-v3",filename="sac-HalfCheetah-v3.zip",)
    print(checkpoint)
    model = SAC.load(checkpoint)

    c = Config(get_environment, config)

    # comp = Compressor(model, get_environment, c).student_network(FCStudentNet)
    comp = Compressor(model, get_environment, c).student_model(StudentSixModel)
    compressed_model = comp.compress()