From de06f31d2e6cb9fab5efaa351213e1301c1baac7 Mon Sep 17 00:00:00 2001
From: luckylwk <nxxCp^T1dA1c1$EUjAB@wOAOY44zVsPei7xi&38HI9Mc&3@iyV>
Date: Thu, 29 Apr 2021 08:54:14 +0200
Subject: [PATCH 1/2] [GH#220] Created softmax utility function with test

---
 finrl/utils/__init__.py   |  0
 finrl/utils/maths.py      | 25 ++++++++++++++++++++++++
 requirements.txt          |  1 +
 tests/test_utils_maths.py | 40 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+)
 create mode 100644 finrl/utils/__init__.py
 create mode 100644 finrl/utils/maths.py
 create mode 100644 tests/test_utils_maths.py

diff --git a/finrl/utils/__init__.py b/finrl/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/finrl/utils/maths.py b/finrl/utils/maths.py
new file mode 100644
index 000000000..921b01e83
--- /dev/null
+++ b/finrl/utils/maths.py
@@ -0,0 +1,25 @@
+import numpy as np
+
+
+def softmax(x: np.ndarray) -> np.ndarray:
+    """
+    Returns the returns softmax. For numerical stability we subtract the max
+    which will cancel out in the equation.
+
+    See:
+        * https://en.wikipedia.org/wiki/Softmax_function
+        * https://github.com/tensorflow/tensorflow/blob/85c8b2a817f95a3e979ecd1ed95bff1dc1335cff/tensorflow/python/keras/activations.py#L78
+        * https://stackoverflow.com/a/38250088
+
+    Parameters
+    ----------
+    x : pd.Series or np.ndarray
+
+    Returns
+    -------
+    softmax : array-like
+    """
+
+    e_x = np.exp(x - np.max(x))
+
+    return e_x / e_x.sum(axis=0)
diff --git a/requirements.txt b/requirements.txt
index 2e14aac12..6edb2f946 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,6 +15,7 @@ stable-baselines3[extra]
 
 # testing requirements
 pytest
+parameterized==0.8.1
 
 # packaging
 setuptools>=41.4.0
diff --git a/tests/test_utils_maths.py b/tests/test_utils_maths.py
new file mode 100644
index 000000000..be0fe143b
--- /dev/null
+++ b/tests/test_utils_maths.py
@@ -0,0 +1,40 @@
+from unittest import TestCase
+
+from parameterized import parameterized
+
+from numpy import asarray, float64
+from numpy.testing import assert_almost_equal
+from pandas import Series
+
+from finrl.utils import maths
+
+
+class TestMaths(TestCase):
+    @parameterized.expand([
+        (
+            "one",
+            asarray([1, 2, 3]),
+            asarray([0.090030573, 0.244728471, 0.665240956])
+        ),
+        (
+            "two",
+            asarray([0.1, 0.1, 0.8]),
+            asarray([0.249143401, 0.249143401, 0.501713198])
+        ),
+        (
+            "three",
+            asarray([0.0, 0.0, 1.0]),
+            asarray([0.211941558, 0.211941558, 0.576116885])
+        ),
+        (
+            "Should allow for using a Pandas Series",
+            Series([0.0, 0.0, 1.0]),
+            asarray([0.211941558, 0.211941558, 0.576116885])
+        ),
+    ])
+    def test_softmax_normalization(self, name, input, expected):
+        assert_almost_equal(
+            maths.softmax(input),
+            expected,
+            decimal=9
+        )

From 076415917c3c2fbff4b30c0eab227392bf5a6043 Mon Sep 17 00:00:00 2001
From: luckylwk <nxxCp^T1dA1c1$EUjAB@wOAOY44zVsPei7xi&38HI9Mc&3@iyV>
Date: Thu, 29 Apr 2021 08:54:57 +0200
Subject: [PATCH 2/2] [GH#220] Implemented softmax utility function in
 Portfolio Env

---
 finrl/env/env_portfolio.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/finrl/env/env_portfolio.py b/finrl/env/env_portfolio.py
index 0fadd910e..d51c5883f 100644
--- a/finrl/env/env_portfolio.py
+++ b/finrl/env/env_portfolio.py
@@ -9,6 +9,7 @@
 import matplotlib.pyplot as plt
 from stable_baselines3.common.vec_env import DummyVecEnv
 
+from finrl.utils.maths import softmax
 
 class StockPortfolioEnv(gym.Env):
     """A single stock trading environment for OpenAI gym
@@ -161,7 +162,7 @@ def step(self, actions):
             #  norm_actions = (np.array(actions) - np.array(actions).min()) / (np.array(actions) - np.array(actions).min()).sum()
             # else:
             #  norm_actions = actions
-            weights = self.softmax_normalization(actions)
+            weights = softmax(actions)
             # print("Normalized actions: ", weights)
             self.actions_memory.append(weights)
             last_day_memory = self.data
@@ -220,12 +221,6 @@ def reset(self):
     def render(self, mode="human"):
         return self.state
 
-    def softmax_normalization(self, actions):
-        numerator = np.exp(actions)
-        denominator = np.sum(np.exp(actions))
-        softmax_output = numerator / denominator
-        return softmax_output
-
     def save_asset_memory(self):
         date_list = self.date_memory
         portfolio_return = self.portfolio_return_memory