master

Master Thesis code
git clone git://popovic.xyz/master.git
Log | Files | Refs | README

commit 520eb0476a70dfd35f419437671e854419abb8b6
parent db85f91cb1bc12ec0e3633b3c6343b466a0ce1ed
Author: miksa234 <milutin@popovic.xyz>
Date:   Sun, 27 Apr 2025 20:28:29 +0100

revert last changes

Diffstat:
Mrl_arb/rl_arb/reinforce.py | 5+----
Mrl_arb/rl_arb/rlearn.py | 20+++++++++-----------
2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/rl_arb/rl_arb/reinforce.py b/rl_arb/rl_arb/reinforce.py @@ -86,10 +86,7 @@ class Reinforce: test_rewards_hist = [] for itr in range(self.args['num_reinforce']): - if itr % 2 == 0: - at_block = 10 - else: - at_block = 11 + at_block = 10 self.mdp.current_block = at_block self.mcts.mdp.current_block = at_block diff --git a/rl_arb/rl_arb/rlearn.py b/rl_arb/rl_arb/rlearn.py @@ -151,12 +151,11 @@ class AgentRLearn(): mean_val = np.mean(vs[idxs]) if block not in self.baseline_tracker: - self.baseline_tracker[block] = mean_val + self.baseline_tracker[block] = list(vs[idxs]) else: - self.baseline_tracker[block] = 1/2 * (mean_val + self.baseline_tracker[block]) - - baseline[idxs] = self.baseline_tracker[block] + self.baseline_tracker[block].append(vs[idxs]) + baseline[idxs] = np.mean(self.baseline_tracker[block]) return baseline*np.array(gamma_factors) @@ -248,13 +247,12 @@ class AgentRLearn(): with open("values.pickle", "wb") as f: pickle.dump(self.values, f) - test_values = self.test_model(iteration) - self.test_values.append(test_values) - average_value = float(np.mean(test_values)) - with open("test_values.pickle", "wb") as f: - pickle.dump(self.test_values, f) - - send_telegram_message(f"Average profit {average_value}") + #test_values = self.test_model(iteration) + #self.test_values.append(test_values) + #average_value = float(np.mean(test_values)) + #with open("test_values.pickle", "wb") as f: + # pickle.dump(self.test_values, f) + #send_telegram_message(f"Average profit {average_value}") if self.args['telegram']: send_telegram_message("DONE!")