From 3c0fe2013223e88da3a6df7c5308db4ca8ff5305 Mon Sep 17 00:00:00 2001 From: Kanewersa <30356293+Kanewersa@users.noreply.github.com> Date: Mon, 7 Jun 2021 13:39:32 +0200 Subject: [PATCH] Adjust the rewards --- survival/generators/resource_generator.py | 2 +- survival/systems/consumption_system.py | 2 +- survival/systems/neural_system.py | 12 +++++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/survival/generators/resource_generator.py b/survival/generators/resource_generator.py index bab5e4e..043c0e9 100644 --- a/survival/generators/resource_generator.py +++ b/survival/generators/resource_generator.py @@ -59,7 +59,7 @@ class ResourceGenerator: world.delete_entity(resource_ent, immediate=True) if world.has_component(player, LearningComponent): learning = world.component_for_entity(player, LearningComponent) - learning.reward = 10 + learning.reward += 10 learning.score += 1 ResourceGenerator.resources_amount -= 1 if ResourceGenerator.resources_amount == 0: diff --git a/survival/systems/consumption_system.py b/survival/systems/consumption_system.py index c47b521..1bad298 100644 --- a/survival/systems/consumption_system.py +++ b/survival/systems/consumption_system.py @@ -20,7 +20,7 @@ class ConsumptionSystem(esper.Processor): # If no item was picked up if cons.last_inventory_state == inventory.total_items_count(): learning: LearningComponent = self.world.component_for_entity(ent, LearningComponent) - learning.reward = -10 + learning.reward += -10 learning.done = True cons.last_inventory_state = inventory.total_items_count() else: diff --git a/survival/systems/neural_system.py b/survival/systems/neural_system.py index c2551db..a4a3aeb 100644 --- a/survival/systems/neural_system.py +++ b/survival/systems/neural_system.py @@ -17,7 +17,7 @@ from survival.model import LinearQNetwork, QTrainer MAX_MEMORY = 100_000 BATCH_SIZE = 1000 LR = 0.001 -LEARN = True +LEARN = False class NeuralSystem(esper.Processor): @@ -34,6 +34,7 @@ class NeuralSystem(esper.Processor): self.starting_epsilon = -1 self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma) self.utils = LearningUtils() + self.best_action = None def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) @@ -68,10 +69,15 @@ class NeuralSystem(esper.Processor): TimeComponent, LearningComponent): if not learning.made_step: learning.reset() + self.best_action = None # Get the closest resource | [entity, path, cost] resource: [int, list, int] = self.game_map.find_nearest_resource(self.world, ent, pos) + if resource is not None: + # If resource was found get the best move chosen by A* + self.best_action = resource[1][0] + # Get current entity state old_state = get_state(self, ent, resource) # Predict the action @@ -81,6 +87,10 @@ class NeuralSystem(esper.Processor): # Perform the action act = Action.perform(self.world, ent, Action.from_array(action)) self.utils.append_action(act, pos) + + # Add reward if chosen action was the best action + if act == self.best_action: + learning.reward += 1 continue # Wait for the action to complete