Stablebaseline Writer compatibility (#470)

* integrates the writer directly into the base "Agent" via an option. It should solve (at least for the writer) Stablebaselines agent compatibility with rlberry #457 * update changelog, and userdoc * remove WriterWraper from API
rlberry-py · Jul 19, 2024 · 8710009 · 8710009
1 parent 324f237
commit 8710009
Show file tree

Hide file tree

Showing 19 changed files with 151 additions and 113 deletions.
diff --git a/docs/api.rst b/docs/api.rst
@@ -185,4 +185,3 @@ Environment Wrappers
   wrappers.discretize_state.DiscretizeStateWrapper
   wrappers.gym_utils.OldGymCompatibilityWrapper
   wrappers.RescaleRewardWrapper
-  wrappers.WriterWrapper
diff --git a/docs/basics/quick_start_rl/quickstart.md b/docs/basics/quick_start_rl/quickstart.md
@@ -25,7 +25,6 @@ from rlberry.manager import (
     plot_writer_data,
     read_writer_data,
 )
-from rlberry.wrappers import WriterWrapper
 ```
 
 Choosing an RL environment
@@ -221,47 +220,33 @@ cannot compute the optimal policy, we could simply compare the rewards
 gathered during learning, instead of the regret.
 
 First, we have to record the reward during the fit as this is not done
-automatically. To do this, we can use the
-[WriterWrapper](rlberry.wrappers.writer_utils.WriterWrapper)
-module, or simply the [writer](rlberry.agents.Agent.writer) attribute.
-
-```python
-class RandomAgent2(RandomAgent):
-    name = "RandomAgent2"
-
-    def __init__(self, env, **kwargs):
-        RandomAgent.__init__(self, env, **kwargs)
-        self.env = WriterWrapper(self.env, self.writer, write_scalar="reward")
-
-
-class UCBVIAgent2(UCBVIAgent):
-    name = "UCBVIAgent2"
-
-    def __init__(self, env, **kwargs):
-        UCBVIAgent.__init__(self, env, **kwargs)
-        self.env = WriterWrapper(self.env, self.writer, write_scalar="reward")
-```
-
+automatically. To do this, we can use the `writer_extra` optional parameter.
 
 Then, we fit the two agents.
 
 ```python
+ucbvi_params["writer_extra"] = "reward"
+random_params = {"writer_extra": "reward"}
+
 # Create ExperimentManager for UCBI to fit 10 agents
 ucbvi_stats = ExperimentManager(
-    UCBVIAgent2,
+    UCBVIAgent,
     (env_ctor, env_kwargs),
     fit_budget=50,
     init_kwargs=ucbvi_params,
     n_fit=10,
+    agent_name="UCBVIAgent2",
 )
 ucbvi_stats.fit()
 
 # Create ExperimentManager for baseline to fit 10 agents
 baseline_stats = ExperimentManager(
-    RandomAgent2,
+    RandomAgent,
     (env_ctor, env_kwargs),
     fit_budget=5000,
+    init_kwargs=random_params,
     n_fit=10,
+    agent_name="RandomAgent2",
 )
 baseline_stats.fit()
 ```

diff --git a/docs/basics/userguide/logging.md b/docs/basics/userguide/logging.md
@@ -100,11 +100,15 @@ As you can see, on the previous output, you don't have the "INFO" output anymore
 
 
 ## Writer
-To keep informations during and after the experiment, rlberry use a 'writer'. The writer is stored inside the [Agent](agent_page), and is updated in its fit() function.
+To keep information during and after the experiment, rlberry uses a 'writer'. The writer is stored inside the [Agent](agent_page), and is updated in its fit() function.
 
 By default (with the [Agent interface](rlberry.agents.Agent)), the writer is [DefaultWriter](rlberry.utils.writers.DefaultWriter).
+You can add information about the environment's `reward` and `action` with the optional parameter `writer_extra` during the agent initialization.
 
-To keep informations about the environment inside the writer, you can wrap the environment inside [WriterWrapper](rlberry.wrappers.WriterWrapper).
+In the previous example, to get the reward, this would give (parameter for the ExperimentManager):
+```python
+init_kwargs = dict(algo_cls=PPO, verbose=1, writer_extra="reward")
+```
 
 
 To get the data, saved during an experiment, in a Pandas DataFrame, you can use [plot_writer_data](rlberry.manager.plot_writer_data) on the [ExperimentManager](rlberry.manager.ExperimentManager) (or a list of them).

diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -7,6 +7,11 @@ Changelog
 Dev version
 -----------
 
+
+ *PR #470*
+
+* Integrates the writer directly into the base "Agent" via an option: https://github.com/rlberry-py/rlberry/issues/457
+
  *PR #468*
 
 * New tool to find the path of the 'manager_obj.pickle' more easily: https://github.com/rlberry-py/rlberry/issues/407

diff --git a/examples/comparison_agents.py b/examples/comparison_agents.py
@@ -16,7 +16,6 @@
 from rlberry.manager.comparison import compare_agents
 from rlberry.manager import AgentManager
 from rlberry_research.envs.bandits import BernoulliBandit
-from rlberry.wrappers import WriterWrapper
 from rlberry_research.agents.bandits import (
     IndexAgent,
     makeBoundedMOSSIndex,
@@ -42,18 +41,16 @@ class UCBAgent(IndexAgent):
 
     def __init__(self, env, **kwargs):
         index, _ = makeBoundedUCBIndex()
-        IndexAgent.__init__(self, env, index, **kwargs)
-        self.env = WriterWrapper(self.env, self.writer, write_scalar="reward")
+        IndexAgent.__init__(self, env, index, writer_extra="reward", **kwargs)
 
 
 class ETCAgent(IndexAgent):
     name = "ETC"
 
     def __init__(self, env, m=20, **kwargs):
         index, _ = makeETCIndex(A, m)
-        IndexAgent.__init__(self, env, index, **kwargs)
-        self.env = WriterWrapper(
-            self.env, self.writer, write_scalar="action_and_reward"
+        IndexAgent.__init__(
+            self, env, index, writer_extra="action_and_reward", **kwargs
         )
 
 
@@ -62,9 +59,8 @@ class MOSSAgent(IndexAgent):
 
     def __init__(self, env, **kwargs):
         index, _ = makeBoundedMOSSIndex(T, A)
-        IndexAgent.__init__(self, env, index, **kwargs)
-        self.env = WriterWrapper(
-            self.env, self.writer, write_scalar="action_and_reward"
+        IndexAgent.__init__(
+            self, env, index, writer_extra="action_and_reward", **kwargs
         )
 
 
@@ -73,8 +69,14 @@ class NPTSAgent(IndexAgent):
 
     def __init__(self, env, **kwargs):
         index, tracker_params = makeBoundedNPTSIndex()
-        IndexAgent.__init__(self, env, index, tracker_params=tracker_params, **kwargs)
-        self.env = WriterWrapper(self.env, self.writer, write_scalar="reward")
+        IndexAgent.__init__(
+            self,
+            env,
+            index,
+            writer_extra="reward",
+            tracker_params=tracker_params,
+            **kwargs,
+        )
 
 
 Agents_class = [MOSSAgent, NPTSAgent, UCBAgent, ETCAgent]

diff --git a/examples/demo_bandits/plot_TS_bandit.py b/examples/demo_bandits/plot_TS_bandit.py
@@ -21,7 +21,6 @@
     makeGaussianPrior,
 )
 from rlberry.manager import ExperimentManager, plot_writer_data
-from rlberry.wrappers import WriterWrapper
 
 
 # Bernoulli
@@ -36,8 +35,7 @@ class BernoulliTSAgent(TSAgent):
 
     def __init__(self, env, **kwargs):
         prior, _ = makeBetaPrior()
-        TSAgent.__init__(self, env, prior, **kwargs)
-        self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
+        TSAgent.__init__(self, env, prior, writer_extra="action", **kwargs)
 
 
 class BoundedUCBAgent(IndexAgent):
@@ -47,8 +45,7 @@ class BoundedUCBAgent(IndexAgent):
 
     def __init__(self, env, **kwargs):
         index, _ = makeBoundedUCBIndex(0, 1)
-        IndexAgent.__init__(self, env, index, **kwargs)
-        self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
+        IndexAgent.__init__(self, env, index, writer_extra="action", **kwargs)
 
 
 # Parameters of the problem
@@ -101,8 +98,7 @@ class GaussianTSAgent(TSAgent):
 
     def __init__(self, env, sigma=1.0, **kwargs):
         prior, _ = makeGaussianPrior(sigma)
-        TSAgent.__init__(self, env, prior, **kwargs)
-        self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
+        TSAgent.__init__(self, env, prior, writer_extra="action", **kwargs)
 
 
 class GaussianUCBAgent(IndexAgent):
@@ -112,8 +108,7 @@ class GaussianUCBAgent(IndexAgent):
 
     def __init__(self, env, sigma=1.0, **kwargs):
         index, _ = makeSubgaussianUCBIndex(sigma)
-        IndexAgent.__init__(self, env, index, **kwargs)
-        self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
+        IndexAgent.__init__(self, env, index, writer_extra="action", **kwargs)
 
 
 # Parameters of the problem

diff --git a/examples/demo_bandits/plot_compare_index_bandits.py b/examples/demo_bandits/plot_compare_index_bandits.py
@@ -10,7 +10,6 @@
 import matplotlib.pyplot as plt
 from rlberry_research.envs.bandits import BernoulliBandit
 from rlberry.manager import ExperimentManager, plot_writer_data
-from rlberry.wrappers import WriterWrapper
 from rlberry_research.agents.bandits import (
     IndexAgent,
     RandomizedAgent,
@@ -44,9 +43,8 @@ class UCBAgent(IndexAgent):
 
     def __init__(self, env, **kwargs):
         index, _ = makeBoundedUCBIndex()
-        IndexAgent.__init__(self, env, index, **kwargs)
-        self.env = WriterWrapper(
-            self.env, self.writer, write_scalar="action_and_reward"
+        IndexAgent.__init__(
+            self, env, index, writer_extra="action_and_reward", **kwargs
         )
 
 
@@ -55,9 +53,13 @@ class UCBVAgent(IndexAgent):
 
     def __init__(self, env, **kwargs):
         index, params = makeBoundedUCBVIndex()
-        IndexAgent.__init__(self, env, index, tracker_params=params, **kwargs)
-        self.env = WriterWrapper(
-            self.env, self.writer, write_scalar="action_and_reward"
+        IndexAgent.__init__(
+            self,
+            env,
+            index,
+            writer_extra="action_and_reward",
+            tracker_params=params,
+            **kwargs
         )
 
 
@@ -66,9 +68,8 @@ class ETCAgent(IndexAgent):
 
     def __init__(self, env, m=20, **kwargs):
         index, _ = makeETCIndex(A, m)
-        IndexAgent.__init__(self, env, index, **kwargs)
-        self.env = WriterWrapper(
-            self.env, self.writer, write_scalar="action_and_reward"
+        IndexAgent.__init__(
+            self, env, index, writer_extra="action_and_reward", **kwargs
         )
 
 
@@ -77,9 +78,8 @@ class MOSSAgent(IndexAgent):
 
     def __init__(self, env, **kwargs):
         index, _ = makeBoundedMOSSIndex(T, A)
-        IndexAgent.__init__(self, env, index, **kwargs)
-        self.env = WriterWrapper(
-            self.env, self.writer, write_scalar="action_and_reward"
+        IndexAgent.__init__(
+            self, env, index, writer_extra="action_and_reward", **kwargs
         )
 
 
@@ -88,9 +88,13 @@ class IMEDAgent(IndexAgent):
 
     def __init__(self, env, **kwargs):
         index, tracker_params = makeBoundedIMEDIndex()
-        IndexAgent.__init__(self, env, index, tracker_params=tracker_params, **kwargs)
-        self.env = WriterWrapper(
-            self.env, self.writer, write_scalar="action_and_reward"
+        IndexAgent.__init__(
+            self,
+            env,
+            index,
+            writer_extra="action_and_reward",
+            tracker_params=tracker_params,
+            **kwargs
         )
 
 
@@ -99,9 +103,13 @@ class NPTSAgent(IndexAgent):
 
     def __init__(self, env, **kwargs):
         index, tracker_params = makeBoundedNPTSIndex()
-        IndexAgent.__init__(self, env, index, tracker_params=tracker_params, **kwargs)
-        self.env = WriterWrapper(
-            self.env, self.writer, write_scalar="action_and_reward"
+        IndexAgent.__init__(
+            self,
+            env,
+            index,
+            writer_extra="action_and_reward",
+            tracker_params=tracker_params,
+            **kwargs
         )
 
 
@@ -111,10 +119,12 @@ class EXP3Agent(RandomizedAgent):
     def __init__(self, env, **kwargs):
         prob, tracker_params = makeEXP3Index()
         RandomizedAgent.__init__(
-            self, env, prob, tracker_params=tracker_params, **kwargs
-        )
-        self.env = WriterWrapper(
-            self.env, self.writer, write_scalar="action_and_reward"
+            self,
+            env,
+            prob,
+            writer_extra="action_and_reward",
+            tracker_params=tracker_params,
+            **kwargs
         )
 
 

diff --git a/examples/demo_bandits/plot_exp3_bandit.py b/examples/demo_bandits/plot_exp3_bandit.py
@@ -16,7 +16,6 @@
     makeBetaPrior,
 )
 from rlberry.manager import ExperimentManager, plot_writer_data
-from rlberry.wrappers import WriterWrapper
 
 
 # Agents definition
@@ -28,9 +27,13 @@ class EXP3Agent(RandomizedAgent):
     def __init__(self, env, **kwargs):
         prob, tracker_params = makeEXP3Index()
         RandomizedAgent.__init__(
-            self, env, prob, tracker_params=tracker_params, **kwargs
+            self,
+            env,
+            prob,
+            writer_extra="action",
+            tracker_params=tracker_params,
+            **kwargs
         )
-        self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
 
 
 class BernoulliTSAgent(TSAgent):
@@ -40,8 +43,7 @@ class BernoulliTSAgent(TSAgent):
 
     def __init__(self, env, **kwargs):
         prior, _ = makeBetaPrior()
-        TSAgent.__init__(self, env, prior, **kwargs)
-        self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
+        TSAgent.__init__(self, env, prior, writer_extra="action", **kwargs)
 
 
 # Parameters of the problem

diff --git a/examples/demo_bandits/plot_mirror_bandit.py b/examples/demo_bandits/plot_mirror_bandit.py
@@ -17,7 +17,6 @@
 from rlberry.manager import ExperimentManager, read_writer_data
 from rlberry.envs.interface import Model
 from rlberry_research.agents.bandits import BanditWithSimplePolicy
-from rlberry.wrappers import WriterWrapper
 import rlberry.spaces as spaces
 
 import requests
@@ -117,9 +116,8 @@ class SeqHalvAgent(BanditWithSimplePolicy):
     name = "SeqHalvAgent"
 
     def __init__(self, env, **kwargs):
-        BanditWithSimplePolicy.__init__(self, env, **kwargs)
-        self.env = WriterWrapper(
-            self.env, self.writer, write_scalar="action_and_reward"
+        BanditWithSimplePolicy.__init__(
+            self, env, writer_extra="action_and_reward", **kwargs
         )
 
     def fit(self, budget=None, **kwargs):

diff --git a/examples/demo_bandits/plot_ucb_bandit.py b/examples/demo_bandits/plot_ucb_bandit.py
@@ -11,7 +11,6 @@
 from rlberry_research.agents.bandits import IndexAgent, makeSubgaussianUCBIndex
 from rlberry.manager import ExperimentManager, plot_writer_data
 import matplotlib.pyplot as plt
-from rlberry.wrappers import WriterWrapper
 
 
 # Agents definition
@@ -24,8 +23,7 @@ class UCBAgent(IndexAgent):
 
     def __init__(self, env, sigma=1, **kwargs):
         index, _ = makeSubgaussianUCBIndex(sigma)
-        IndexAgent.__init__(self, env, index, **kwargs)
-        self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
+        IndexAgent.__init__(self, env, index, writer_extra="action", **kwargs)
 
 
 # Parameters of the problem