Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.13
1 change: 0 additions & 1 deletion ReinforceLab-internal
Submodule ReinforceLab-internal deleted from 863b22
1 change: 0 additions & 1 deletion envs/tornadocliff_env
Submodule tornadocliff_env deleted from b1f6b0
2 changes: 0 additions & 2 deletions leaderboard_bot/last_update.txt

This file was deleted.

21 changes: 0 additions & 21 deletions leaderboard_bot/update_leaderboards.py

This file was deleted.

15 changes: 15 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[project]
name = "reinforcelab"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"dill>=0.3.6",
"gymnasium>=0.28",
"matplotlib>=3.0",
"opencv-python>=4.0",
"seaborn>=0.12.2",
"torch>=2.9.1",
"tqdm>=4.64.1",
]
3 changes: 0 additions & 3 deletions reinforcelab/action_selectors/__init__.py

This file was deleted.

4 changes: 0 additions & 4 deletions reinforcelab/action_selectors/continuous/__init__.py

This file was deleted.

3 changes: 0 additions & 3 deletions reinforcelab/agents/__init__.py

This file was deleted.

1 change: 0 additions & 1 deletion reinforcelab/agents/policy_gradient/__init__.py

This file was deleted.

2 changes: 0 additions & 2 deletions reinforcelab/agents/value_optimization/__init__.py

This file was deleted.

Empty file.
2 changes: 0 additions & 2 deletions reinforcelab/transforms/experience/__init__.py

This file was deleted.

40 changes: 0 additions & 40 deletions reinforcelab/utils/leaderboard_utils.py

This file was deleted.

6 changes: 0 additions & 6 deletions requirements.txt

This file was deleted.

19 changes: 0 additions & 19 deletions setup.py

This file was deleted.

5 changes: 5 additions & 0 deletions src/reinforcelab/modules/action_selectors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .action_selector import ActionSelector
from .discrete import DiscreteActionSelector
from .continuous import ContinuousActionSelector, NoisyAction, ContinuousEpsilonGreedy

__all__ = ["ActionSelector", "DiscreteActionSelector", "ContinuousActionSelector", "NoisyAction", "ContinuousEpsilonGreedy"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .continuous_action_selector import ContinuousActionSelector
from .noisy_action import NoisyAction
from .epsilon_greedy import ContinuousEpsilonGreedy

__all__ = ["ContinuousActionSelector", "NoisyAction", "ContinuousEpsilonGreedy"]
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import gymnasium as gym

from reinforcelab.utils import tie_breaker, get_space_size, space_is_type
from reinforcelab.modules.utils import tie_breaker, get_space_size, space_is_type


class ContinuousEpsilonGreedy(ContinuousActionSelector):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import gymnasium as gym

from reinforcelab.utils import tie_breaker, get_space_size, space_is_type
from reinforcelab.modules.utils import tie_breaker, get_space_size, space_is_type


class NoisyAction(ContinuousActionSelector):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .discrete_action_selector import DiscreteActionSelector
from .epsilon_greedy import EpsilonGreedy

__all__ = ["DiscreteActionSelector", "EpsilonGreedy"]
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import gymnasium as gym

from reinforcelab.utils import tie_breaker, get_space_size, space_is_type
from reinforcelab.modules.utils import tie_breaker, get_space_size, space_is_type


class EpsilonGreedy(DiscreteActionSelector):
Expand Down
5 changes: 5 additions & 0 deletions src/reinforcelab/modules/agents/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .agent import BaseAgent, Agent
from .value_optimization import QLearning, SARSA, ExpectedSARSA, DQN, DCQN
from .policy_gradient import DDPG

__all__ = ["BaseAgent", "Agent", "QLearning", "SARSA", "ExpectedSARSA", "DQN", "DCQN", "DDPG"]
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
import os
import dill
from torch import Tensor
from typing import Union
from abc import ABC, abstractmethod

from reinforcelab.brains import Brain
from reinforcelab.estimators import Estimator
from reinforcelab.memory_buffers import MemoryBuffer
from reinforcelab.action_selectors import ActionSelector
from reinforcelab.experience import Experience
from reinforcelab.modules.brains import Brain
from reinforcelab.modules.memory_buffers import MemoryBuffer
from reinforcelab.modules.action_selectors import ActionSelector
from reinforcelab.modules.experience import Experience


class BaseAgent(ABC):
@abstractmethod
def act(self, state: Tensor, epsilon: float = 0.0):
def act(self, state: Tensor, **kwargs):
"""Choose an action given a state

Args:
state (Any): A representation of the state
epsilon (float, optional): Probability of taking an exploratory action. Defaults to 0.0.
state (Tensor): A representation of the state
**kwargs: Additional arguments to the action selector
"""

@abstractmethod
Expand Down
3 changes: 3 additions & 0 deletions src/reinforcelab/modules/agents/policy_gradient/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .ddpg import DDPG

__all__ = ["DDPG"]
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
from torch import nn


from reinforcelab.agents import Agent
from reinforcelab.brains import ActorCritic
from reinforcelab.action_selectors.continuous import *
from reinforcelab.estimators import SARSEstimator
from reinforcelab.memory_buffers import ExperienceReplay
from reinforcelab.modules.agents import Agent
from reinforcelab.modules.brains import ActorCritic
from reinforcelab.modules.action_selectors.continuous import ContinuousEpsilonGreedy
from reinforcelab.modules.estimators import SARSEstimator
from reinforcelab.modules.memory_buffers import ExperienceReplay

class DDPG(Agent):
def __init__(self, env: Env, actor_model: nn.Module, critic_model: nn.Module, learning_rate=0.01, discount_factor=0.999, alpha=0.03, batch_size=128, update_every=4, max_buffer_size=2**12):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .tabular import QLearning, SARSA, ExpectedSARSA
from .function_approximation import DQN, DCQN

__all__ = ["QLearning", "SARSA", "ExpectedSARSA", "DQN", "DCQN"]
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .dqn import DQN
from .dcqn import DCQN

__all__ = ["DQN", "DCQN"]
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
from gymnasium import Env
from torch import nn

from reinforcelab.agents.agent import Agent
from reinforcelab.brains import QNetwork
from reinforcelab.estimators import MaxQEstimator
from reinforcelab.transforms.experience import IntrinsicCuriosityModule
from reinforcelab.action_selectors import EpsilonGreedy
from reinforcelab.memory_buffers import ExperienceReplay
from reinforcelab.utils import get_state_action_sizes
from reinforcelab.modules.agents.agent import Agent
from reinforcelab.modules.brains import QNetwork
from reinforcelab.modules.estimators import MaxQEstimator
from reinforcelab.modules.transforms.experience import IntrinsicCuriosityModule
from reinforcelab.modules.action_selectors import EpsilonGreedy
from reinforcelab.modules.memory_buffers import ExperienceReplay


class DCQN(Agent):
Expand All @@ -17,7 +16,17 @@ class DCQN(Agent):
procedure.
"""

def __init__(self, env: Env, model: nn.Module, learning_rate=0.01, discount_factor: float = 0.999, alpha=0.03, batch_size=128, update_every=4, max_buffer_size=2**12):
def __init__(
self,
env: Env,
model: nn.Module,
learning_rate=0.01,
discount_factor: float = 0.999,
alpha=0.03,
batch_size=128,
update_every=4,
max_buffer_size=2**12,
):
action_selector = EpsilonGreedy(env)
icm = IntrinsicCuriosityModule(
env, 4, learning_rate=0.0001, state_transform_hidden_layers=[4, 4])
Expand All @@ -26,5 +35,9 @@ def __init__(self, env: Env, model: nn.Module, learning_rate=0.01, discount_fact
estimator = MaxQEstimator(env, discount_factor)
brain = QNetwork(model, estimator, learning_rate, alpha)

super().__init__(brain,
action_selector, buffer, update_every=update_every)
super().__init__(
brain,
action_selector,
buffer,
update_every=update_every
)
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from gymnasium import Env
from copy import deepcopy
from torch import nn

from reinforcelab.agents.agent import Agent
from reinforcelab.brains import QNetwork
from reinforcelab.estimators import MaxQEstimator
from reinforcelab.action_selectors import EpsilonGreedy
from reinforcelab.memory_buffers import ExperienceReplay
from reinforcelab.modules.agents.agent import Agent
from reinforcelab.modules.brains import QNetwork
from reinforcelab.modules.estimators import MaxQEstimator
from reinforcelab.modules.action_selectors import EpsilonGreedy
from reinforcelab.modules.memory_buffers import ExperienceReplay


class DQN(Agent):
Expand All @@ -16,7 +15,17 @@ class DQN(Agent):
procedure.
"""

def __init__(self, env: Env, model: nn.Module, learning_rate=0.01, discount_factor: float = 0.999, alpha=0.03, batch_size=128, update_every=4, max_buffer_size=2**12):
def __init__(
self,
env: Env,
model: nn.Module,
learning_rate=0.01,
discount_factor: float = 0.999,
alpha=0.03,
batch_size=128,
update_every=4,
max_buffer_size=2**12,
):
action_selector = EpsilonGreedy(env)
estimator = MaxQEstimator(env, discount_factor)
brain = QNetwork(model, estimator, learning_rate=learning_rate, alpha=alpha)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from .q_learning import QLearning
from .sarsa import SARSA
from .expected_sarsa import ExpectedSARSA

__all__ = ["QLearning", "SARSA", "ExpectedSARSA"]
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from gymnasium import Env

from reinforcelab.agents.agent import Agent
from reinforcelab.brains import QTable
from reinforcelab.estimators import ExpectedSARSAEstimator
from reinforcelab.action_selectors import EpsilonGreedy
from reinforcelab.memory_buffers import OrderedBuffer
from reinforcelab.modules.agents.agent import Agent
from reinforcelab.modules.brains import QTable
from reinforcelab.modules.estimators import ExpectedSARSAEstimator
from reinforcelab.modules.action_selectors import EpsilonGreedy
from reinforcelab.modules.memory_buffers import OrderedBuffer


class ExpectedSARSA(Agent):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from gymnasium import Env

from reinforcelab.agents.agent import Agent
from reinforcelab.brains import QTable
from reinforcelab.estimators import MaxQEstimator
from reinforcelab.action_selectors import EpsilonGreedy
from reinforcelab.memory_buffers import OrderedBuffer
from reinforcelab.modules.agents.agent import Agent
from reinforcelab.modules.brains import QTable
from reinforcelab.modules.estimators import MaxQEstimator
from reinforcelab.modules.action_selectors import EpsilonGreedy
from reinforcelab.modules.memory_buffers import OrderedBuffer


class QLearning(Agent):
Expand Down
Loading