Source code for amago.envs.builtin.procgen_envs

"""
Procgen wrapper
"""

import random

import procgen
import gym as og_gym
import numpy as np
import gymnasium as gym

from amago.envs import AMAGOEnv


[docs] class ProcgenAMAGO(AMAGOEnv): """AMAGOEnv for TwoAttemptMTProcgen that logs metrics for each game separately.""" def __init__(self, env): super().__init__( env=env, env_name="Procgen", ) @property def env_name(self): return self.env.current_game
ALL_PROCGEN_GAMES = [ "dodgeball", "caveflyer", "heist", "jumper", "maze", "miner", "fruitbot", "plunder", "chaser", "leaper", "bigfish", "starpilot", "bossfight", "ninja", "coinrun", "climber", ]
[docs] class TwoAttemptMTProcgen(gym.Env): """A Multi-Task Procgen environment that gives two attempts at each level. Args: games: A list of Procgen game names to include (e.g. ["coinrun", "dodgeball"]). distribution_mode: The distribution mode to use for the environment. Options are: - "easy": Standard procgen easy mode for every game. - "hard": Standard procgen hard mode for every game. - "memory-hard": Memory mode in games it is available, hard mode otherwise. reward_scales: A dictionary mapping game names to multipliers that scale their rewards (e.g., {"coinrun": 10.0, "dodgeball": 0.5}). seed_range: A tuple of integers representing the range of seeds to use for the environment. For train/test splits. """ def __init__( self, games: list[str], distribution_mode: str, reward_scales: dict[str, float] = {}, seed_range: tuple[int, int] = (0, 2000), ): super().__init__() self.observation_space = gym.spaces.Box( low=0, high=255, shape=(64, 64, 3), dtype=np.uint8 ) self.action_space = gym.spaces.Discrete(15) self.seed_range = seed_range assert distribution_mode in ["easy", "hard", "memory-hard"] self.distribution_mode = distribution_mode for game in games: assert ( game in ALL_PROCGEN_GAMES ), f"Invalid Procgen game `{game}`. Options are: {ALL_PROCGEN_GAMES}" self.games = games self.reward_scales = reward_scales self.env = None self.reset()
[docs] def frame(self, frame): if self._current_episode > 0: # paint a small box on the screen to indicate the last episode. # means that RL^2 reset flag is unnecessary and resolves value # ambiguity w/ short context lengths. frame[1:5, 1:5, :] = 0 return frame
def _reset_current_env(self): if self.env is not None: self.env.close() if self.distribution_mode == "memory-hard": if self.current_game in [ "dodgeball", "caveflyer", "heist", "jumper", "maze", "miner", ]: distribution = "memory" else: distribution = "hard" else: distribution = self.distribution_mode self.env = og_gym.make( f"procgen:procgen-{self.current_game}-v0", num_levels=1, distribution_mode=distribution, use_sequential_levels=False, start_level=self.current_level, ) self._reset_next_step = False
[docs] def reset(self, *args, **kwargs): self._current_episode = 0 self.current_game = random.choice(self.games) self.current_level = random.randint(*self.seed_range) self._reset_current_env() obs = self.env.reset() return self.frame(obs), {"game": self.current_game, "level": self.current_level}
[docs] def step(self, action): if self._reset_next_step: self._reset_current_env() next_obs = self.env.reset() reward, done, info = 0.0, False, {"soft_reset": True} else: next_obs, reward, done, info = self.env.step(action) actually_done = False if done: self._current_episode += 1 actually_done = self._current_episode >= 2 if not actually_done: self._reset_next_step = True if self.current_game in self.reward_scales: reward = self.reward_scales[self.current_game] * reward return self.frame(next_obs), reward, actually_done, actually_done, info