From 3c0fe2013223e88da3a6df7c5308db4ca8ff5305 Mon Sep 17 00:00:00 2001
From: Kanewersa <30356293+Kanewersa@users.noreply.github.com>
Date: Mon, 7 Jun 2021 13:39:32 +0200
Subject: [PATCH] Adjust the rewards

---
 survival/generators/resource_generator.py |  2 +-
 survival/systems/consumption_system.py    |  2 +-
 survival/systems/neural_system.py         | 12 +++++++++++-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/survival/generators/resource_generator.py b/survival/generators/resource_generator.py
index bab5e4e..043c0e9 100644
--- a/survival/generators/resource_generator.py
+++ b/survival/generators/resource_generator.py
@@ -59,7 +59,7 @@ class ResourceGenerator:
         world.delete_entity(resource_ent, immediate=True)
         if world.has_component(player, LearningComponent):
             learning = world.component_for_entity(player, LearningComponent)
-            learning.reward = 10
+            learning.reward += 10
             learning.score += 1
             ResourceGenerator.resources_amount -= 1
             if ResourceGenerator.resources_amount == 0:
diff --git a/survival/systems/consumption_system.py b/survival/systems/consumption_system.py
index c47b521..1bad298 100644
--- a/survival/systems/consumption_system.py
+++ b/survival/systems/consumption_system.py
@@ -20,7 +20,7 @@ class ConsumptionSystem(esper.Processor):
                 # If no item was picked up
                 if cons.last_inventory_state == inventory.total_items_count():
                     learning: LearningComponent = self.world.component_for_entity(ent, LearningComponent)
-                    learning.reward = -10
+                    learning.reward += -10
                     learning.done = True
                 cons.last_inventory_state = inventory.total_items_count()
             else:
diff --git a/survival/systems/neural_system.py b/survival/systems/neural_system.py
index c2551db..a4a3aeb 100644
--- a/survival/systems/neural_system.py
+++ b/survival/systems/neural_system.py
@@ -17,7 +17,7 @@ from survival.model import LinearQNetwork, QTrainer
 MAX_MEMORY = 100_000
 BATCH_SIZE = 1000
 LR = 0.001
-LEARN = True
+LEARN = False
 
 
 class NeuralSystem(esper.Processor):
@@ -34,6 +34,7 @@ class NeuralSystem(esper.Processor):
             self.starting_epsilon = -1
         self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
         self.utils = LearningUtils()
+        self.best_action = None
 
     def remember(self, state, action, reward, next_state, done):
         self.memory.append((state, action, reward, next_state, done))
@@ -68,10 +69,15 @@ class NeuralSystem(esper.Processor):
                                                                                TimeComponent, LearningComponent):
             if not learning.made_step:
                 learning.reset()
+                self.best_action = None
 
                 # Get the closest resource | [entity, path, cost]
                 resource: [int, list, int] = self.game_map.find_nearest_resource(self.world, ent, pos)
 
+                if resource is not None:
+                    # If resource was found get the best move chosen by A*
+                    self.best_action = resource[1][0]
+
                 # Get current entity state
                 old_state = get_state(self, ent, resource)
                 # Predict the action
@@ -81,6 +87,10 @@ class NeuralSystem(esper.Processor):
                 # Perform the action
                 act = Action.perform(self.world, ent, Action.from_array(action))
                 self.utils.append_action(act, pos)
+
+                # Add reward if chosen action was the best action
+                if act == self.best_action:
+                    learning.reward += 1
                 continue
 
             # Wait for the action to complete