Adjust the rewards

This commit is contained in:
Kanewersa 2021-06-07 13:39:32 +02:00
parent 88f13d7d0d
commit 3c0fe20132
3 changed files with 13 additions and 3 deletions

View File

@ -59,7 +59,7 @@ class ResourceGenerator:
world.delete_entity(resource_ent, immediate=True)
if world.has_component(player, LearningComponent):
learning = world.component_for_entity(player, LearningComponent)
learning.reward = 10
learning.reward += 10
learning.score += 1
ResourceGenerator.resources_amount -= 1
if ResourceGenerator.resources_amount == 0:

View File

@ -20,7 +20,7 @@ class ConsumptionSystem(esper.Processor):
# If no item was picked up
if cons.last_inventory_state == inventory.total_items_count():
learning: LearningComponent = self.world.component_for_entity(ent, LearningComponent)
learning.reward = -10
learning.reward += -10
learning.done = True
cons.last_inventory_state = inventory.total_items_count()
else:

View File

@ -17,7 +17,7 @@ from survival.model import LinearQNetwork, QTrainer
MAX_MEMORY = 100_000
BATCH_SIZE = 1000
LR = 0.001
LEARN = True
LEARN = False
class NeuralSystem(esper.Processor):
@ -34,6 +34,7 @@ class NeuralSystem(esper.Processor):
self.starting_epsilon = -1
self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
self.utils = LearningUtils()
self.best_action = None
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
@ -68,10 +69,15 @@ class NeuralSystem(esper.Processor):
TimeComponent, LearningComponent):
if not learning.made_step:
learning.reset()
self.best_action = None
# Get the closest resource | [entity, path, cost]
resource: [int, list, int] = self.game_map.find_nearest_resource(self.world, ent, pos)
if resource is not None:
# If resource was found get the best move chosen by A*
self.best_action = resource[1][0]
# Get current entity state
old_state = get_state(self, ent, resource)
# Predict the action
@ -81,6 +87,10 @@ class NeuralSystem(esper.Processor):
# Perform the action
act = Action.perform(self.world, ent, Action.from_array(action))
self.utils.append_action(act, pos)
# Add reward if chosen action was the best action
if act == self.best_action:
learning.reward += 1
continue
# Wait for the action to complete