commit 520eb0476a70dfd35f419437671e854419abb8b6
parent db85f91cb1bc12ec0e3633b3c6343b466a0ce1ed
Author: miksa234 <milutin@popovic.xyz>
Date: Sun, 27 Apr 2025 20:28:29 +0100
revert last changes
Diffstat:
2 files changed, 10 insertions(+), 15 deletions(-)
diff --git a/rl_arb/rl_arb/reinforce.py b/rl_arb/rl_arb/reinforce.py
@@ -86,10 +86,7 @@ class Reinforce:
test_rewards_hist = []
for itr in range(self.args['num_reinforce']):
- if itr % 2 == 0:
- at_block = 10
- else:
- at_block = 11
+ at_block = 10
self.mdp.current_block = at_block
self.mcts.mdp.current_block = at_block
diff --git a/rl_arb/rl_arb/rlearn.py b/rl_arb/rl_arb/rlearn.py
@@ -151,12 +151,11 @@ class AgentRLearn():
mean_val = np.mean(vs[idxs])
if block not in self.baseline_tracker:
- self.baseline_tracker[block] = mean_val
+ self.baseline_tracker[block] = list(vs[idxs])
else:
- self.baseline_tracker[block] = 1/2 * (mean_val + self.baseline_tracker[block])
-
- baseline[idxs] = self.baseline_tracker[block]
+ self.baseline_tracker[block].append(vs[idxs])
+ baseline[idxs] = np.mean(self.baseline_tracker[block])
return baseline*np.array(gamma_factors)
@@ -248,13 +247,12 @@ class AgentRLearn():
with open("values.pickle", "wb") as f:
pickle.dump(self.values, f)
- test_values = self.test_model(iteration)
- self.test_values.append(test_values)
- average_value = float(np.mean(test_values))
- with open("test_values.pickle", "wb") as f:
- pickle.dump(self.test_values, f)
-
- send_telegram_message(f"Average profit {average_value}")
+ #test_values = self.test_model(iteration)
+ #self.test_values.append(test_values)
+ #average_value = float(np.mean(test_values))
+ #with open("test_values.pickle", "wb") as f:
+ # pickle.dump(self.test_values, f)
+ #send_telegram_message(f"Average profit {average_value}")
if self.args['telegram']:
send_telegram_message("DONE!")