working one house

This commit is contained in:
Lech Wołowski 2020-05-05 00:04:58 +02:00
parent 3620637962
commit 5a43772aa4
81 changed files with 75 additions and 96 deletions

View File

@ -12,11 +12,12 @@ from collections import deque
import random
from Deep_Q_Learning.GC_Env import GC_Env
DISCOUNT = 0.99
REPLAY_MEMORY_SIZE = 5_000 # How many last steps to keep for model training
DISCOUNT = 0.9
REPLAY_MEMORY_SIZE = 500_000 # How many last steps to keep for model training
# Minimum number of steps in a memory to start training
MIN_REPLAY_MEMORY_SIZE = 100
MIN_REPLAY_MEMORY_SIZE = 200
MINIBATCH_SIZE = 64 # How many steps (samples) to use for training
HALF_MINIBATCH = int(MINIBATCH_SIZE / 2)
UPDATE_TARGET_EVERY = 5 # Terminal states (end of episodes)
LEARNING_RATE = 0.01
MODEL_NAME = f'lr={LEARNING_RATE}_gamma={DISCOUNT}'
@ -76,6 +77,8 @@ class DQNAgent:
# An array with last n steps for training
self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
# self.negative_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
# self.positive_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
# Custom tensorboard object
self.tensorboard = ModifiedTensorBoard(
@ -87,16 +90,16 @@ class DQNAgent:
def create_model(self):
model = Sequential()
model.add(Dense(40, input_shape=self.env.OBSERVATION_SPACE_VALUES))
model.add(Activation('relu'))
model.add(Dense(40))
model.add(Activation('relu'))
model.add(Dense(40))
model.add(Activation('relu'))
model.add(
Dense(40, input_shape=self.env.OBSERVATION_SPACE_VALUES, activation='tanh'))
model.add(Dense(40, activation='tanh'))
model.add(Dense(40, activation='tanh'))
model.add(Dense(40, activation='tanh'))
model.add(Dense(40, activation='tanh'))
model.add(Dense(self.env.ACTION_SPACE_SIZE, activation='linear'))
model.compile(loss="mse", optimizer=Adam(
lr=0.001), metrics=['accuracy'])
model.add(Dense(self.env.ACTION_SPACE_SIZE, activation='softmax'))
model.compile(loss='huber_loss',
optimizer='SGD', metrics=['accuracy'])
print(model.summary())
return model
@ -104,6 +107,10 @@ class DQNAgent:
# (observation space, action, reward, new observation space, done)
def update_replay_memory(self, transition):
self.replay_memory.append(transition)
# if transition[2] > 0:
# self.positive_memory.append(transition)
# else:
# self.negative_memory.append(transition)
# Trains main network every step during episode
def train(self, terminal_state, step):

View File

@ -2,13 +2,13 @@ from Deep_Q_Learning.q_gc import Garbage_Collector
from helpler import Render_Element
from models.House import House
from models.Road import Road
from config import MAP_WIDTH, MAP_HEIGHT
from config import MAP_WIDTH, MAP_HEIGHT, NUMBER_OF_HOUSES
import numpy as np
from timeit import default_timer as timer
class GC_Env:
OBSERVATION_SPACE_VALUES = (36 + 6,)
OBSERVATION_SPACE_VALUES = (36 + NUMBER_OF_HOUSES,)
ACTION_SPACE_SIZE = 6
def reset(self):
@ -38,10 +38,10 @@ class GC_Env:
houses = list(map(lambda item: draw_items[item], list(filter(lambda item: isinstance(
draw_items[item], House), draw_items))))
houses_trash = max([[(int(getattr(house, item) != 0) - 0.5) * 2 for item in
["mixed", "paper", "glass", "plastic"]] for house in houses])
houses_trash = [max([(int(getattr(house, item) != 0) - 0.5) * 2 for item in
["mixed", "paper", "glass", "plastic"]]) for house in houses]
observation[-4:] = houses_trash
observation[-NUMBER_OF_HOUSES:] = houses_trash
return observation
# gc_trash = [int(getattr(self.gc, item) == self.gc.limit)
@ -55,9 +55,9 @@ class GC_Env:
new_observation = self.observe(self.gc, self.draw_items)
if action_result == False:
reward = -10
elif action_result == True:
if action_result is False:
reward = -1
elif action_result is True:
reward = -0.1
else:
reward = action_result
@ -71,4 +71,7 @@ class GC_Env:
done = False
break
# if sum(new_observation[-NUMBER_OF_HOUSES:]) < NUMBER_OF_HOUSES:
# done = True
return new_observation, reward, done

View File

@ -65,7 +65,7 @@ class Garbage_Collector(Numbers):
def pick_trash(self):
if self.mixed == self.limit and self.glass == self.limit and self.paper == self.limit and self.plastic == self.limit:
return - 10
return - 1
to_check = [
{"col": self.col - 1, "row": self.row},
@ -93,9 +93,9 @@ class Garbage_Collector(Numbers):
transfered += house_trash
if houses_around and transfered:
return transfered * 10
return 1
else:
return -10
return -1
def leave_trash(self):
to_check = [
@ -120,6 +120,6 @@ class Garbage_Collector(Numbers):
break
if trashes_around and transfered:
return transfered * 100
return 1
else:
return -10
return -1

View File

@ -2,6 +2,7 @@ from platform import system
CELL_SIZE = 64
MAP_HEIGHT = 7
MAP_WIDTH = 9
NUMBER_OF_HOUSES = 6
WINDOW_HEIGHT = MAP_HEIGHT * CELL_SIZE
WINDOW_WIDTH = MAP_WIDTH * CELL_SIZE
FONT = "./Resources/JetBrainsMono-Regular.ttf"

View File

@ -8,26 +8,27 @@ from keras.models import load_model
from datetime import datetime
MIN_REWARD = 0 # For model save
STEP_LIMIT = 5_000
STEP_LIMIT = 500
# Environment settings
EPISODES = 2_000
EPISODES = 20_000
# Exploration settings
epsilon = 1 # not a constant, going to be decayed
EPSILON_DECAY = 0.999
EPSILON_DECAY = 0.99975
MIN_EPSILON = 0.01
# Stats settings
AGGREGATE_STATS_EVERY = 50 # episodes
AGGREGATE_STATS_EVERY = 20 # episodes
env = GC_Env()
# For stats
ep_rewards = [-200]
ep_rewards = []
steps = []
model = load_model(
'trained_models\\lr=0.001_gamma=0.5___-35.90max_-1172.30avg_-4394.80min__2020-05-01_23-03.model')
# model = load_model(
# 'trained_models\\lr=0.001_gamma=0.5___-35.90max_-1172.30avg_-4394.80min__2020-05-01_23-03.model')
model = None
@ -72,17 +73,20 @@ for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):
current_state = new_state
step += 1
agent.tensorboard.update_stats(steps=step, reward=episode_reward)
agent.tensorboard.update_stats(reward=episode_reward)
# Append episode reward to a list and log stats (every given number of episodes)
ep_rewards.append(episode_reward)
steps.append(step)
if not episode % AGGREGATE_STATS_EVERY or episode == 1:
average_reward = sum(
ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
average_steps = sum(steps[-AGGREGATE_STATS_EVERY:]) / \
len(steps[-AGGREGATE_STATS_EVERY:])
agent.tensorboard.update_stats(
reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)
reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon, average_steps=average_steps)
# Save model, but only when min reward is greater or equal a set value
if min_reward >= MIN_REWARD:

View File

@ -71,8 +71,8 @@ clock = pygame.time.Clock()
# know = Knowledge(draw_items, gc)
model = load_model(
'trained_models\\lr=0.001_gamma=0.5___-35.90max_-1172.30avg_-4394.80min__2020-05-01_23-03.model')
model = load_model(os.path.join('trained_models', 'working_one_trash.model'))
# Game Loop
run_a = False
running = True

View File

@ -1,5 +1,5 @@
import pygame
from config import CELL_SIZE, MAP_HEIGHT, MAP_WIDTH, MAP, FONT, BLACK, BLUE, GREEN, YELLOW, GARBAGE_COLLECTOR_IMAGE
from config import CELL_SIZE, MAP_HEIGHT, MAP_WIDTH, MAP, FONT, BLACK, BLUE, GREEN, YELLOW, GARBAGE_COLLECTOR_IMAGE, TRASH_TYPES
from random import randint
from models.House import House
from PIL import Image
@ -116,6 +116,9 @@ class Garbage_Collector(Numbers):
return result
def pick_trash(self):
if self.mixed == self.limit and self.glass == self.limit and self.paper == self.limit and self.plastic == self.limit:
return - 10
to_check = [
{"col": self.col - 1, "row": self.row},
{"col": self.col + 1, "row": self.row},
@ -125,42 +128,21 @@ class Garbage_Collector(Numbers):
houses_around = False
transfered = 0
for field in to_check:
if 0 <= field["row"] < MAP_HEIGHT and 0 <= field["col"] < MAP_WIDTH:
if field["row"] >= 0 and field["row"] < MAP_HEIGHT and field["col"] >= 0 and field["col"] < MAP_WIDTH:
item = self.draw_items[(field["col"], field["row"])]
if isinstance(item, House):
houses_around = True
mixed = True
while mixed and self.mixed < self.limit:
mixed = item.get_mixed()
if mixed:
self.mixed += 1
transfered += 1
paper = True
while paper and self.paper < self.limit:
paper = item.get_paper()
if paper:
self.paper += 1
transfered += 1
glass = True
while glass and self.glass < self.limit:
glass = item.get_glass()
if glass:
self.glass += 1
transfered += 1
plastic = True
while plastic and self.plastic < self.limit:
plastic = item.get_plastic()
if plastic:
self.plastic += 1
transfered += 1
if houses_around:
return transfered * 10
else:
return -10
# debug - unit test
for trash_type in TRASH_TYPES:
gc_trash, house_trash = getattr(
self, trash_type), getattr(item, trash_type)
if house_trash and gc_trash < self.limit:
if gc_trash + house_trash > self.limit:
house_trash = self.limit - gc_trash
item.get_trash(trash_type=trash_type,
queried_ammount=house_trash)
transfered += house_trash
def leave_trash(self):
to_check = [
@ -172,32 +154,14 @@ class Garbage_Collector(Numbers):
transfered = 0
trashes_around = False
for field in to_check:
if 0 <= field["row"] < MAP_HEIGHT and 0 <= field["col"] < MAP_WIDTH:
if field["row"] >= 0 and field["row"] < MAP_HEIGHT and field["col"] >= 0 and field["col"] < MAP_WIDTH:
item = self.draw_items[(field["col"], field["row"])]
if isinstance(item, Trash):
trashes_around = True
if item.trash_type == "mixed":
while self.mixed > 0:
item.put_trash()
self.mixed -= 1
transfered += 1
elif item.trash_type == "paper":
while self.paper > 0:
item.put_trash()
self.paper -= 1
transfered += 1
elif item.trash_type == "glass":
while self.glass > 0:
item.put_trash()
self.glass -= 1
transfered += 1
elif item.trash_type == "plastic":
while self.plastic > 0:
item.put_trash()
self.plastic -= 1
transfered += 1
if trashes_around:
return transfered * 100
else:
return -10
if item.trash_type in TRASH_TYPES:
trash_ammount = getattr(self, item.trash_type)
if trash_ammount:
item.put_trash(trash_ammount)
setattr(self, item.trash_type, 0)
transfered += trash_ammount
break

Binary file not shown.