AI4Finance-Foundation · luckylwk · Apr 29, 2021 · Apr 29, 2021
diff --git a/finrl/env/env_portfolio.py b/finrl/env/env_portfolio.py
@@ -9,6 +9,7 @@
 import matplotlib.pyplot as plt
 from stable_baselines3.common.vec_env import DummyVecEnv
 
+from finrl.utils.maths import softmax
 
 class StockPortfolioEnv(gym.Env):
     """A single stock trading environment for OpenAI gym
@@ -161,7 +162,7 @@ def step(self, actions):
             #  norm_actions = (np.array(actions) - np.array(actions).min()) / (np.array(actions) - np.array(actions).min()).sum()
             # else:
             #  norm_actions = actions
-            weights = self.softmax_normalization(actions)
+            weights = softmax(actions)
             # print("Normalized actions: ", weights)
             self.actions_memory.append(weights)
             last_day_memory = self.data
@@ -220,12 +221,6 @@ def reset(self):
     def render(self, mode="human"):
         return self.state
 
-    def softmax_normalization(self, actions):
-        numerator = np.exp(actions)
-        denominator = np.sum(np.exp(actions))
-        softmax_output = numerator / denominator
-        return softmax_output
-
     def save_asset_memory(self):
         date_list = self.date_memory
         portfolio_return = self.portfolio_return_memory

diff --git a/finrl/utils/__init__.py b/finrl/utils/__init__.py
diff --git a/finrl/utils/maths.py b/finrl/utils/maths.py
@@ -0,0 +1,25 @@
+import numpy as np
+
+
+def softmax(x: np.ndarray) -> np.ndarray:
+    """
+    Returns the returns softmax. For numerical stability we subtract the max
+    which will cancel out in the equation.
+
+    See:
+        * https://en.wikipedia.org/wiki/Softmax_function
+        * https://github.com/tensorflow/tensorflow/blob/85c8b2a817f95a3e979ecd1ed95bff1dc1335cff/tensorflow/python/keras/activations.py#L78
+        * https://stackoverflow.com/a/38250088
+
+    Parameters
+    ----------
+    x : pd.Series or np.ndarray
+
+    Returns
+    -------
+    softmax : array-like
+    """
+
+    e_x = np.exp(x - np.max(x))
+
+    return e_x / e_x.sum(axis=0)
diff --git a/requirements.txt b/requirements.txt
@@ -15,6 +15,7 @@ stable-baselines3[extra]
 
 # testing requirements
 pytest
+parameterized==0.8.1
 
 # packaging
 setuptools>=41.4.0

diff --git a/tests/test_utils_maths.py b/tests/test_utils_maths.py
@@ -0,0 +1,40 @@
+from unittest import TestCase
+
+from parameterized import parameterized
+
+from numpy import asarray, float64
+from numpy.testing import assert_almost_equal
+from pandas import Series
+
+from finrl.utils import maths
+
+
+class TestMaths(TestCase):
+    @parameterized.expand([
+        (
+            "one",
+            asarray([1, 2, 3]),
+            asarray([0.090030573, 0.244728471, 0.665240956])
+        ),
+        (
+            "two",
+            asarray([0.1, 0.1, 0.8]),
+            asarray([0.249143401, 0.249143401, 0.501713198])
+        ),
+        (
+            "three",
+            asarray([0.0, 0.0, 1.0]),
+            asarray([0.211941558, 0.211941558, 0.576116885])
+        ),
+        (
+            "Should allow for using a Pandas Series",
+            Series([0.0, 0.0, 1.0]),
+            asarray([0.211941558, 0.211941558, 0.576116885])
+        ),
+    ])
+    def test_softmax_normalization(self, name, input, expected):
+        assert_almost_equal(
+            maths.softmax(input),
+            expected,
+            decimal=9
+        )