Adjust the rewards
This commit is contained in:
parent
88f13d7d0d
commit
3c0fe20132
@ -59,7 +59,7 @@ class ResourceGenerator:
|
|||||||
world.delete_entity(resource_ent, immediate=True)
|
world.delete_entity(resource_ent, immediate=True)
|
||||||
if world.has_component(player, LearningComponent):
|
if world.has_component(player, LearningComponent):
|
||||||
learning = world.component_for_entity(player, LearningComponent)
|
learning = world.component_for_entity(player, LearningComponent)
|
||||||
learning.reward = 10
|
learning.reward += 10
|
||||||
learning.score += 1
|
learning.score += 1
|
||||||
ResourceGenerator.resources_amount -= 1
|
ResourceGenerator.resources_amount -= 1
|
||||||
if ResourceGenerator.resources_amount == 0:
|
if ResourceGenerator.resources_amount == 0:
|
||||||
|
@ -20,7 +20,7 @@ class ConsumptionSystem(esper.Processor):
|
|||||||
# If no item was picked up
|
# If no item was picked up
|
||||||
if cons.last_inventory_state == inventory.total_items_count():
|
if cons.last_inventory_state == inventory.total_items_count():
|
||||||
learning: LearningComponent = self.world.component_for_entity(ent, LearningComponent)
|
learning: LearningComponent = self.world.component_for_entity(ent, LearningComponent)
|
||||||
learning.reward = -10
|
learning.reward += -10
|
||||||
learning.done = True
|
learning.done = True
|
||||||
cons.last_inventory_state = inventory.total_items_count()
|
cons.last_inventory_state = inventory.total_items_count()
|
||||||
else:
|
else:
|
||||||
|
@ -17,7 +17,7 @@ from survival.model import LinearQNetwork, QTrainer
|
|||||||
MAX_MEMORY = 100_000
|
MAX_MEMORY = 100_000
|
||||||
BATCH_SIZE = 1000
|
BATCH_SIZE = 1000
|
||||||
LR = 0.001
|
LR = 0.001
|
||||||
LEARN = True
|
LEARN = False
|
||||||
|
|
||||||
|
|
||||||
class NeuralSystem(esper.Processor):
|
class NeuralSystem(esper.Processor):
|
||||||
@ -34,6 +34,7 @@ class NeuralSystem(esper.Processor):
|
|||||||
self.starting_epsilon = -1
|
self.starting_epsilon = -1
|
||||||
self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
|
self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
|
||||||
self.utils = LearningUtils()
|
self.utils = LearningUtils()
|
||||||
|
self.best_action = None
|
||||||
|
|
||||||
def remember(self, state, action, reward, next_state, done):
|
def remember(self, state, action, reward, next_state, done):
|
||||||
self.memory.append((state, action, reward, next_state, done))
|
self.memory.append((state, action, reward, next_state, done))
|
||||||
@ -68,10 +69,15 @@ class NeuralSystem(esper.Processor):
|
|||||||
TimeComponent, LearningComponent):
|
TimeComponent, LearningComponent):
|
||||||
if not learning.made_step:
|
if not learning.made_step:
|
||||||
learning.reset()
|
learning.reset()
|
||||||
|
self.best_action = None
|
||||||
|
|
||||||
# Get the closest resource | [entity, path, cost]
|
# Get the closest resource | [entity, path, cost]
|
||||||
resource: [int, list, int] = self.game_map.find_nearest_resource(self.world, ent, pos)
|
resource: [int, list, int] = self.game_map.find_nearest_resource(self.world, ent, pos)
|
||||||
|
|
||||||
|
if resource is not None:
|
||||||
|
# If resource was found get the best move chosen by A*
|
||||||
|
self.best_action = resource[1][0]
|
||||||
|
|
||||||
# Get current entity state
|
# Get current entity state
|
||||||
old_state = get_state(self, ent, resource)
|
old_state = get_state(self, ent, resource)
|
||||||
# Predict the action
|
# Predict the action
|
||||||
@ -81,6 +87,10 @@ class NeuralSystem(esper.Processor):
|
|||||||
# Perform the action
|
# Perform the action
|
||||||
act = Action.perform(self.world, ent, Action.from_array(action))
|
act = Action.perform(self.world, ent, Action.from_array(action))
|
||||||
self.utils.append_action(act, pos)
|
self.utils.append_action(act, pos)
|
||||||
|
|
||||||
|
# Add reward if chosen action was the best action
|
||||||
|
if act == self.best_action:
|
||||||
|
learning.reward += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Wait for the action to complete
|
# Wait for the action to complete
|
||||||
|
Loading…
Reference in New Issue
Block a user