Skip to content

Commit

Permalink
Stablebaseline Writer compatibility (#470)
Browse files Browse the repository at this point in the history
* integrates the writer directly into the base "Agent" via an option. It should solve (at least for the writer) Stablebaselines agent compatibility with rlberry #457
* update changelog, and userdoc
* remove WriterWraper from API
  • Loading branch information
JulienT01 committed Jul 19, 2024
1 parent 324f237 commit 8710009
Show file tree
Hide file tree
Showing 19 changed files with 151 additions and 113 deletions.
1 change: 0 additions & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -185,4 +185,3 @@ Environment Wrappers
wrappers.discretize_state.DiscretizeStateWrapper
wrappers.gym_utils.OldGymCompatibilityWrapper
wrappers.RescaleRewardWrapper
wrappers.WriterWrapper
33 changes: 9 additions & 24 deletions docs/basics/quick_start_rl/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ from rlberry.manager import (
plot_writer_data,
read_writer_data,
)
from rlberry.wrappers import WriterWrapper
```

Choosing an RL environment
Expand Down Expand Up @@ -221,47 +220,33 @@ cannot compute the optimal policy, we could simply compare the rewards
gathered during learning, instead of the regret.

First, we have to record the reward during the fit as this is not done
automatically. To do this, we can use the
[WriterWrapper](rlberry.wrappers.writer_utils.WriterWrapper)
module, or simply the [writer](rlberry.agents.Agent.writer) attribute.

```python
class RandomAgent2(RandomAgent):
name = "RandomAgent2"

def __init__(self, env, **kwargs):
RandomAgent.__init__(self, env, **kwargs)
self.env = WriterWrapper(self.env, self.writer, write_scalar="reward")


class UCBVIAgent2(UCBVIAgent):
name = "UCBVIAgent2"

def __init__(self, env, **kwargs):
UCBVIAgent.__init__(self, env, **kwargs)
self.env = WriterWrapper(self.env, self.writer, write_scalar="reward")
```

automatically. To do this, we can use the `writer_extra` optional parameter.

Then, we fit the two agents.

```python
ucbvi_params["writer_extra"] = "reward"
random_params = {"writer_extra": "reward"}

# Create ExperimentManager for UCBI to fit 10 agents
ucbvi_stats = ExperimentManager(
UCBVIAgent2,
UCBVIAgent,
(env_ctor, env_kwargs),
fit_budget=50,
init_kwargs=ucbvi_params,
n_fit=10,
agent_name="UCBVIAgent2",
)
ucbvi_stats.fit()

# Create ExperimentManager for baseline to fit 10 agents
baseline_stats = ExperimentManager(
RandomAgent2,
RandomAgent,
(env_ctor, env_kwargs),
fit_budget=5000,
init_kwargs=random_params,
n_fit=10,
agent_name="RandomAgent2",
)
baseline_stats.fit()
```
Expand Down
8 changes: 6 additions & 2 deletions docs/basics/userguide/logging.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,15 @@ As you can see, on the previous output, you don't have the "INFO" output anymore


## Writer
To keep informations during and after the experiment, rlberry use a 'writer'. The writer is stored inside the [Agent](agent_page), and is updated in its fit() function.
To keep information during and after the experiment, rlberry uses a 'writer'. The writer is stored inside the [Agent](agent_page), and is updated in its fit() function.

By default (with the [Agent interface](rlberry.agents.Agent)), the writer is [DefaultWriter](rlberry.utils.writers.DefaultWriter).
You can add information about the environment's `reward` and `action` with the optional parameter `writer_extra` during the agent initialization.

To keep informations about the environment inside the writer, you can wrap the environment inside [WriterWrapper](rlberry.wrappers.WriterWrapper).
In the previous example, to get the reward, this would give (parameter for the ExperimentManager):
```python
init_kwargs = dict(algo_cls=PPO, verbose=1, writer_extra="reward")
```


To get the data, saved during an experiment, in a Pandas DataFrame, you can use [plot_writer_data](rlberry.manager.plot_writer_data) on the [ExperimentManager](rlberry.manager.ExperimentManager) (or a list of them).
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ Changelog
Dev version
-----------


*PR #470*

* Integrates the writer directly into the base "Agent" via an option: https://github.com/rlberry-py/rlberry/issues/457

*PR #468*

* New tool to find the path of the 'manager_obj.pickle' more easily: https://github.com/rlberry-py/rlberry/issues/407
Expand Down
24 changes: 13 additions & 11 deletions examples/comparison_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from rlberry.manager.comparison import compare_agents
from rlberry.manager import AgentManager
from rlberry_research.envs.bandits import BernoulliBandit
from rlberry.wrappers import WriterWrapper
from rlberry_research.agents.bandits import (
IndexAgent,
makeBoundedMOSSIndex,
Expand All @@ -42,18 +41,16 @@ class UCBAgent(IndexAgent):

def __init__(self, env, **kwargs):
index, _ = makeBoundedUCBIndex()
IndexAgent.__init__(self, env, index, **kwargs)
self.env = WriterWrapper(self.env, self.writer, write_scalar="reward")
IndexAgent.__init__(self, env, index, writer_extra="reward", **kwargs)


class ETCAgent(IndexAgent):
name = "ETC"

def __init__(self, env, m=20, **kwargs):
index, _ = makeETCIndex(A, m)
IndexAgent.__init__(self, env, index, **kwargs)
self.env = WriterWrapper(
self.env, self.writer, write_scalar="action_and_reward"
IndexAgent.__init__(
self, env, index, writer_extra="action_and_reward", **kwargs
)


Expand All @@ -62,9 +59,8 @@ class MOSSAgent(IndexAgent):

def __init__(self, env, **kwargs):
index, _ = makeBoundedMOSSIndex(T, A)
IndexAgent.__init__(self, env, index, **kwargs)
self.env = WriterWrapper(
self.env, self.writer, write_scalar="action_and_reward"
IndexAgent.__init__(
self, env, index, writer_extra="action_and_reward", **kwargs
)


Expand All @@ -73,8 +69,14 @@ class NPTSAgent(IndexAgent):

def __init__(self, env, **kwargs):
index, tracker_params = makeBoundedNPTSIndex()
IndexAgent.__init__(self, env, index, tracker_params=tracker_params, **kwargs)
self.env = WriterWrapper(self.env, self.writer, write_scalar="reward")
IndexAgent.__init__(
self,
env,
index,
writer_extra="reward",
tracker_params=tracker_params,
**kwargs,
)


Agents_class = [MOSSAgent, NPTSAgent, UCBAgent, ETCAgent]
Expand Down
13 changes: 4 additions & 9 deletions examples/demo_bandits/plot_TS_bandit.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
makeGaussianPrior,
)
from rlberry.manager import ExperimentManager, plot_writer_data
from rlberry.wrappers import WriterWrapper


# Bernoulli
Expand All @@ -36,8 +35,7 @@ class BernoulliTSAgent(TSAgent):

def __init__(self, env, **kwargs):
prior, _ = makeBetaPrior()
TSAgent.__init__(self, env, prior, **kwargs)
self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
TSAgent.__init__(self, env, prior, writer_extra="action", **kwargs)


class BoundedUCBAgent(IndexAgent):
Expand All @@ -47,8 +45,7 @@ class BoundedUCBAgent(IndexAgent):

def __init__(self, env, **kwargs):
index, _ = makeBoundedUCBIndex(0, 1)
IndexAgent.__init__(self, env, index, **kwargs)
self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
IndexAgent.__init__(self, env, index, writer_extra="action", **kwargs)


# Parameters of the problem
Expand Down Expand Up @@ -101,8 +98,7 @@ class GaussianTSAgent(TSAgent):

def __init__(self, env, sigma=1.0, **kwargs):
prior, _ = makeGaussianPrior(sigma)
TSAgent.__init__(self, env, prior, **kwargs)
self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
TSAgent.__init__(self, env, prior, writer_extra="action", **kwargs)


class GaussianUCBAgent(IndexAgent):
Expand All @@ -112,8 +108,7 @@ class GaussianUCBAgent(IndexAgent):

def __init__(self, env, sigma=1.0, **kwargs):
index, _ = makeSubgaussianUCBIndex(sigma)
IndexAgent.__init__(self, env, index, **kwargs)
self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
IndexAgent.__init__(self, env, index, writer_extra="action", **kwargs)


# Parameters of the problem
Expand Down
56 changes: 33 additions & 23 deletions examples/demo_bandits/plot_compare_index_bandits.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import matplotlib.pyplot as plt
from rlberry_research.envs.bandits import BernoulliBandit
from rlberry.manager import ExperimentManager, plot_writer_data
from rlberry.wrappers import WriterWrapper
from rlberry_research.agents.bandits import (
IndexAgent,
RandomizedAgent,
Expand Down Expand Up @@ -44,9 +43,8 @@ class UCBAgent(IndexAgent):

def __init__(self, env, **kwargs):
index, _ = makeBoundedUCBIndex()
IndexAgent.__init__(self, env, index, **kwargs)
self.env = WriterWrapper(
self.env, self.writer, write_scalar="action_and_reward"
IndexAgent.__init__(
self, env, index, writer_extra="action_and_reward", **kwargs
)


Expand All @@ -55,9 +53,13 @@ class UCBVAgent(IndexAgent):

def __init__(self, env, **kwargs):
index, params = makeBoundedUCBVIndex()
IndexAgent.__init__(self, env, index, tracker_params=params, **kwargs)
self.env = WriterWrapper(
self.env, self.writer, write_scalar="action_and_reward"
IndexAgent.__init__(
self,
env,
index,
writer_extra="action_and_reward",
tracker_params=params,
**kwargs
)


Expand All @@ -66,9 +68,8 @@ class ETCAgent(IndexAgent):

def __init__(self, env, m=20, **kwargs):
index, _ = makeETCIndex(A, m)
IndexAgent.__init__(self, env, index, **kwargs)
self.env = WriterWrapper(
self.env, self.writer, write_scalar="action_and_reward"
IndexAgent.__init__(
self, env, index, writer_extra="action_and_reward", **kwargs
)


Expand All @@ -77,9 +78,8 @@ class MOSSAgent(IndexAgent):

def __init__(self, env, **kwargs):
index, _ = makeBoundedMOSSIndex(T, A)
IndexAgent.__init__(self, env, index, **kwargs)
self.env = WriterWrapper(
self.env, self.writer, write_scalar="action_and_reward"
IndexAgent.__init__(
self, env, index, writer_extra="action_and_reward", **kwargs
)


Expand All @@ -88,9 +88,13 @@ class IMEDAgent(IndexAgent):

def __init__(self, env, **kwargs):
index, tracker_params = makeBoundedIMEDIndex()
IndexAgent.__init__(self, env, index, tracker_params=tracker_params, **kwargs)
self.env = WriterWrapper(
self.env, self.writer, write_scalar="action_and_reward"
IndexAgent.__init__(
self,
env,
index,
writer_extra="action_and_reward",
tracker_params=tracker_params,
**kwargs
)


Expand All @@ -99,9 +103,13 @@ class NPTSAgent(IndexAgent):

def __init__(self, env, **kwargs):
index, tracker_params = makeBoundedNPTSIndex()
IndexAgent.__init__(self, env, index, tracker_params=tracker_params, **kwargs)
self.env = WriterWrapper(
self.env, self.writer, write_scalar="action_and_reward"
IndexAgent.__init__(
self,
env,
index,
writer_extra="action_and_reward",
tracker_params=tracker_params,
**kwargs
)


Expand All @@ -111,10 +119,12 @@ class EXP3Agent(RandomizedAgent):
def __init__(self, env, **kwargs):
prob, tracker_params = makeEXP3Index()
RandomizedAgent.__init__(
self, env, prob, tracker_params=tracker_params, **kwargs
)
self.env = WriterWrapper(
self.env, self.writer, write_scalar="action_and_reward"
self,
env,
prob,
writer_extra="action_and_reward",
tracker_params=tracker_params,
**kwargs
)


Expand Down
12 changes: 7 additions & 5 deletions examples/demo_bandits/plot_exp3_bandit.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
makeBetaPrior,
)
from rlberry.manager import ExperimentManager, plot_writer_data
from rlberry.wrappers import WriterWrapper


# Agents definition
Expand All @@ -28,9 +27,13 @@ class EXP3Agent(RandomizedAgent):
def __init__(self, env, **kwargs):
prob, tracker_params = makeEXP3Index()
RandomizedAgent.__init__(
self, env, prob, tracker_params=tracker_params, **kwargs
self,
env,
prob,
writer_extra="action",
tracker_params=tracker_params,
**kwargs
)
self.env = WriterWrapper(self.env, self.writer, write_scalar="action")


class BernoulliTSAgent(TSAgent):
Expand All @@ -40,8 +43,7 @@ class BernoulliTSAgent(TSAgent):

def __init__(self, env, **kwargs):
prior, _ = makeBetaPrior()
TSAgent.__init__(self, env, prior, **kwargs)
self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
TSAgent.__init__(self, env, prior, writer_extra="action", **kwargs)


# Parameters of the problem
Expand Down
6 changes: 2 additions & 4 deletions examples/demo_bandits/plot_mirror_bandit.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from rlberry.manager import ExperimentManager, read_writer_data
from rlberry.envs.interface import Model
from rlberry_research.agents.bandits import BanditWithSimplePolicy
from rlberry.wrappers import WriterWrapper
import rlberry.spaces as spaces

import requests
Expand Down Expand Up @@ -117,9 +116,8 @@ class SeqHalvAgent(BanditWithSimplePolicy):
name = "SeqHalvAgent"

def __init__(self, env, **kwargs):
BanditWithSimplePolicy.__init__(self, env, **kwargs)
self.env = WriterWrapper(
self.env, self.writer, write_scalar="action_and_reward"
BanditWithSimplePolicy.__init__(
self, env, writer_extra="action_and_reward", **kwargs
)

def fit(self, budget=None, **kwargs):
Expand Down
4 changes: 1 addition & 3 deletions examples/demo_bandits/plot_ucb_bandit.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from rlberry_research.agents.bandits import IndexAgent, makeSubgaussianUCBIndex
from rlberry.manager import ExperimentManager, plot_writer_data
import matplotlib.pyplot as plt
from rlberry.wrappers import WriterWrapper


# Agents definition
Expand All @@ -24,8 +23,7 @@ class UCBAgent(IndexAgent):

def __init__(self, env, sigma=1, **kwargs):
index, _ = makeSubgaussianUCBIndex(sigma)
IndexAgent.__init__(self, env, index, **kwargs)
self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
IndexAgent.__init__(self, env, index, writer_extra="action", **kwargs)


# Parameters of the problem
Expand Down
Loading

0 comments on commit 8710009

Please sign in to comment.