working one house
This commit is contained in:
parent
3620637962
commit
5a43772aa4
@ -12,11 +12,12 @@ from collections import deque
|
||||
import random
|
||||
from Deep_Q_Learning.GC_Env import GC_Env
|
||||
|
||||
DISCOUNT = 0.99
|
||||
REPLAY_MEMORY_SIZE = 5_000 # How many last steps to keep for model training
|
||||
DISCOUNT = 0.9
|
||||
REPLAY_MEMORY_SIZE = 500_000 # How many last steps to keep for model training
|
||||
# Minimum number of steps in a memory to start training
|
||||
MIN_REPLAY_MEMORY_SIZE = 100
|
||||
MIN_REPLAY_MEMORY_SIZE = 200
|
||||
MINIBATCH_SIZE = 64 # How many steps (samples) to use for training
|
||||
HALF_MINIBATCH = int(MINIBATCH_SIZE / 2)
|
||||
UPDATE_TARGET_EVERY = 5 # Terminal states (end of episodes)
|
||||
LEARNING_RATE = 0.01
|
||||
MODEL_NAME = f'lr={LEARNING_RATE}_gamma={DISCOUNT}'
|
||||
@ -76,6 +77,8 @@ class DQNAgent:
|
||||
|
||||
# An array with last n steps for training
|
||||
self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
|
||||
# self.negative_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
|
||||
# self.positive_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
|
||||
|
||||
# Custom tensorboard object
|
||||
self.tensorboard = ModifiedTensorBoard(
|
||||
@ -87,16 +90,16 @@ class DQNAgent:
|
||||
def create_model(self):
|
||||
model = Sequential()
|
||||
|
||||
model.add(Dense(40, input_shape=self.env.OBSERVATION_SPACE_VALUES))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(40))
|
||||
model.add(Activation('relu'))
|
||||
model.add(Dense(40))
|
||||
model.add(Activation('relu'))
|
||||
model.add(
|
||||
Dense(40, input_shape=self.env.OBSERVATION_SPACE_VALUES, activation='tanh'))
|
||||
model.add(Dense(40, activation='tanh'))
|
||||
model.add(Dense(40, activation='tanh'))
|
||||
model.add(Dense(40, activation='tanh'))
|
||||
model.add(Dense(40, activation='tanh'))
|
||||
|
||||
model.add(Dense(self.env.ACTION_SPACE_SIZE, activation='linear'))
|
||||
model.compile(loss="mse", optimizer=Adam(
|
||||
lr=0.001), metrics=['accuracy'])
|
||||
model.add(Dense(self.env.ACTION_SPACE_SIZE, activation='softmax'))
|
||||
model.compile(loss='huber_loss',
|
||||
optimizer='SGD', metrics=['accuracy'])
|
||||
print(model.summary())
|
||||
return model
|
||||
|
||||
@ -104,6 +107,10 @@ class DQNAgent:
|
||||
# (observation space, action, reward, new observation space, done)
|
||||
def update_replay_memory(self, transition):
|
||||
self.replay_memory.append(transition)
|
||||
# if transition[2] > 0:
|
||||
# self.positive_memory.append(transition)
|
||||
# else:
|
||||
# self.negative_memory.append(transition)
|
||||
|
||||
# Trains main network every step during episode
|
||||
def train(self, terminal_state, step):
|
||||
|
@ -2,13 +2,13 @@ from Deep_Q_Learning.q_gc import Garbage_Collector
|
||||
from helpler import Render_Element
|
||||
from models.House import House
|
||||
from models.Road import Road
|
||||
from config import MAP_WIDTH, MAP_HEIGHT
|
||||
from config import MAP_WIDTH, MAP_HEIGHT, NUMBER_OF_HOUSES
|
||||
import numpy as np
|
||||
from timeit import default_timer as timer
|
||||
|
||||
|
||||
class GC_Env:
|
||||
OBSERVATION_SPACE_VALUES = (36 + 6,)
|
||||
OBSERVATION_SPACE_VALUES = (36 + NUMBER_OF_HOUSES,)
|
||||
ACTION_SPACE_SIZE = 6
|
||||
|
||||
def reset(self):
|
||||
@ -38,10 +38,10 @@ class GC_Env:
|
||||
houses = list(map(lambda item: draw_items[item], list(filter(lambda item: isinstance(
|
||||
draw_items[item], House), draw_items))))
|
||||
|
||||
houses_trash = max([[(int(getattr(house, item) != 0) - 0.5) * 2 for item in
|
||||
["mixed", "paper", "glass", "plastic"]] for house in houses])
|
||||
houses_trash = [max([(int(getattr(house, item) != 0) - 0.5) * 2 for item in
|
||||
["mixed", "paper", "glass", "plastic"]]) for house in houses]
|
||||
|
||||
observation[-4:] = houses_trash
|
||||
observation[-NUMBER_OF_HOUSES:] = houses_trash
|
||||
|
||||
return observation
|
||||
# gc_trash = [int(getattr(self.gc, item) == self.gc.limit)
|
||||
@ -55,9 +55,9 @@ class GC_Env:
|
||||
|
||||
new_observation = self.observe(self.gc, self.draw_items)
|
||||
|
||||
if action_result == False:
|
||||
reward = -10
|
||||
elif action_result == True:
|
||||
if action_result is False:
|
||||
reward = -1
|
||||
elif action_result is True:
|
||||
reward = -0.1
|
||||
else:
|
||||
reward = action_result
|
||||
@ -71,4 +71,7 @@ class GC_Env:
|
||||
done = False
|
||||
break
|
||||
|
||||
# if sum(new_observation[-NUMBER_OF_HOUSES:]) < NUMBER_OF_HOUSES:
|
||||
# done = True
|
||||
|
||||
return new_observation, reward, done
|
||||
|
@ -65,7 +65,7 @@ class Garbage_Collector(Numbers):
|
||||
|
||||
def pick_trash(self):
|
||||
if self.mixed == self.limit and self.glass == self.limit and self.paper == self.limit and self.plastic == self.limit:
|
||||
return - 10
|
||||
return - 1
|
||||
|
||||
to_check = [
|
||||
{"col": self.col - 1, "row": self.row},
|
||||
@ -93,9 +93,9 @@ class Garbage_Collector(Numbers):
|
||||
transfered += house_trash
|
||||
|
||||
if houses_around and transfered:
|
||||
return transfered * 10
|
||||
return 1
|
||||
else:
|
||||
return -10
|
||||
return -1
|
||||
|
||||
def leave_trash(self):
|
||||
to_check = [
|
||||
@ -120,6 +120,6 @@ class Garbage_Collector(Numbers):
|
||||
break
|
||||
|
||||
if trashes_around and transfered:
|
||||
return transfered * 100
|
||||
return 1
|
||||
else:
|
||||
return -10
|
||||
return -1
|
||||
|
@ -2,6 +2,7 @@ from platform import system
|
||||
CELL_SIZE = 64
|
||||
MAP_HEIGHT = 7
|
||||
MAP_WIDTH = 9
|
||||
NUMBER_OF_HOUSES = 6
|
||||
WINDOW_HEIGHT = MAP_HEIGHT * CELL_SIZE
|
||||
WINDOW_WIDTH = MAP_WIDTH * CELL_SIZE
|
||||
FONT = "./Resources/JetBrainsMono-Regular.ttf"
|
||||
|
@ -8,26 +8,27 @@ from keras.models import load_model
|
||||
from datetime import datetime
|
||||
|
||||
MIN_REWARD = 0 # For model save
|
||||
STEP_LIMIT = 5_000
|
||||
STEP_LIMIT = 500
|
||||
|
||||
# Environment settings
|
||||
EPISODES = 2_000
|
||||
EPISODES = 20_000
|
||||
|
||||
# Exploration settings
|
||||
epsilon = 1 # not a constant, going to be decayed
|
||||
EPSILON_DECAY = 0.999
|
||||
EPSILON_DECAY = 0.99975
|
||||
MIN_EPSILON = 0.01
|
||||
|
||||
# Stats settings
|
||||
AGGREGATE_STATS_EVERY = 50 # episodes
|
||||
AGGREGATE_STATS_EVERY = 20 # episodes
|
||||
|
||||
env = GC_Env()
|
||||
|
||||
# For stats
|
||||
ep_rewards = [-200]
|
||||
ep_rewards = []
|
||||
steps = []
|
||||
|
||||
model = load_model(
|
||||
'trained_models\\lr=0.001_gamma=0.5___-35.90max_-1172.30avg_-4394.80min__2020-05-01_23-03.model')
|
||||
# model = load_model(
|
||||
# 'trained_models\\lr=0.001_gamma=0.5___-35.90max_-1172.30avg_-4394.80min__2020-05-01_23-03.model')
|
||||
|
||||
model = None
|
||||
|
||||
@ -72,17 +73,20 @@ for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):
|
||||
current_state = new_state
|
||||
step += 1
|
||||
|
||||
agent.tensorboard.update_stats(steps=step, reward=episode_reward)
|
||||
agent.tensorboard.update_stats(reward=episode_reward)
|
||||
|
||||
# Append episode reward to a list and log stats (every given number of episodes)
|
||||
ep_rewards.append(episode_reward)
|
||||
steps.append(step)
|
||||
if not episode % AGGREGATE_STATS_EVERY or episode == 1:
|
||||
average_reward = sum(
|
||||
ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
|
||||
ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
|
||||
min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
|
||||
max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
|
||||
average_steps = sum(steps[-AGGREGATE_STATS_EVERY:]) / \
|
||||
len(steps[-AGGREGATE_STATS_EVERY:])
|
||||
agent.tensorboard.update_stats(
|
||||
reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)
|
||||
reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon, average_steps=average_steps)
|
||||
|
||||
# Save model, but only when min reward is greater or equal a set value
|
||||
if min_reward >= MIN_REWARD:
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
4
main.py
4
main.py
@ -71,8 +71,8 @@ clock = pygame.time.Clock()
|
||||
|
||||
|
||||
# know = Knowledge(draw_items, gc)
|
||||
model = load_model(
|
||||
'trained_models\\lr=0.001_gamma=0.5___-35.90max_-1172.30avg_-4394.80min__2020-05-01_23-03.model')
|
||||
model = load_model(os.path.join('trained_models', 'working_one_trash.model'))
|
||||
|
||||
# Game Loop
|
||||
run_a = False
|
||||
running = True
|
||||
|
@ -1,5 +1,5 @@
|
||||
import pygame
|
||||
from config import CELL_SIZE, MAP_HEIGHT, MAP_WIDTH, MAP, FONT, BLACK, BLUE, GREEN, YELLOW, GARBAGE_COLLECTOR_IMAGE
|
||||
from config import CELL_SIZE, MAP_HEIGHT, MAP_WIDTH, MAP, FONT, BLACK, BLUE, GREEN, YELLOW, GARBAGE_COLLECTOR_IMAGE, TRASH_TYPES
|
||||
from random import randint
|
||||
from models.House import House
|
||||
from PIL import Image
|
||||
@ -116,6 +116,9 @@ class Garbage_Collector(Numbers):
|
||||
return result
|
||||
|
||||
def pick_trash(self):
|
||||
if self.mixed == self.limit and self.glass == self.limit and self.paper == self.limit and self.plastic == self.limit:
|
||||
return - 10
|
||||
|
||||
to_check = [
|
||||
{"col": self.col - 1, "row": self.row},
|
||||
{"col": self.col + 1, "row": self.row},
|
||||
@ -125,42 +128,21 @@ class Garbage_Collector(Numbers):
|
||||
houses_around = False
|
||||
transfered = 0
|
||||
for field in to_check:
|
||||
if 0 <= field["row"] < MAP_HEIGHT and 0 <= field["col"] < MAP_WIDTH:
|
||||
if field["row"] >= 0 and field["row"] < MAP_HEIGHT and field["col"] >= 0 and field["col"] < MAP_WIDTH:
|
||||
item = self.draw_items[(field["col"], field["row"])]
|
||||
if isinstance(item, House):
|
||||
houses_around = True
|
||||
|
||||
mixed = True
|
||||
while mixed and self.mixed < self.limit:
|
||||
mixed = item.get_mixed()
|
||||
if mixed:
|
||||
self.mixed += 1
|
||||
transfered += 1
|
||||
|
||||
paper = True
|
||||
while paper and self.paper < self.limit:
|
||||
paper = item.get_paper()
|
||||
if paper:
|
||||
self.paper += 1
|
||||
transfered += 1
|
||||
|
||||
glass = True
|
||||
while glass and self.glass < self.limit:
|
||||
glass = item.get_glass()
|
||||
if glass:
|
||||
self.glass += 1
|
||||
transfered += 1
|
||||
|
||||
plastic = True
|
||||
while plastic and self.plastic < self.limit:
|
||||
plastic = item.get_plastic()
|
||||
if plastic:
|
||||
self.plastic += 1
|
||||
transfered += 1
|
||||
if houses_around:
|
||||
return transfered * 10
|
||||
else:
|
||||
return -10
|
||||
# debug - unit test
|
||||
for trash_type in TRASH_TYPES:
|
||||
gc_trash, house_trash = getattr(
|
||||
self, trash_type), getattr(item, trash_type)
|
||||
if house_trash and gc_trash < self.limit:
|
||||
if gc_trash + house_trash > self.limit:
|
||||
house_trash = self.limit - gc_trash
|
||||
item.get_trash(trash_type=trash_type,
|
||||
queried_ammount=house_trash)
|
||||
transfered += house_trash
|
||||
|
||||
def leave_trash(self):
|
||||
to_check = [
|
||||
@ -172,32 +154,14 @@ class Garbage_Collector(Numbers):
|
||||
transfered = 0
|
||||
trashes_around = False
|
||||
for field in to_check:
|
||||
if 0 <= field["row"] < MAP_HEIGHT and 0 <= field["col"] < MAP_WIDTH:
|
||||
if field["row"] >= 0 and field["row"] < MAP_HEIGHT and field["col"] >= 0 and field["col"] < MAP_WIDTH:
|
||||
item = self.draw_items[(field["col"], field["row"])]
|
||||
if isinstance(item, Trash):
|
||||
trashes_around = True
|
||||
if item.trash_type == "mixed":
|
||||
while self.mixed > 0:
|
||||
item.put_trash()
|
||||
self.mixed -= 1
|
||||
transfered += 1
|
||||
elif item.trash_type == "paper":
|
||||
while self.paper > 0:
|
||||
item.put_trash()
|
||||
self.paper -= 1
|
||||
transfered += 1
|
||||
elif item.trash_type == "glass":
|
||||
while self.glass > 0:
|
||||
item.put_trash()
|
||||
self.glass -= 1
|
||||
transfered += 1
|
||||
elif item.trash_type == "plastic":
|
||||
while self.plastic > 0:
|
||||
item.put_trash()
|
||||
self.plastic -= 1
|
||||
transfered += 1
|
||||
|
||||
if trashes_around:
|
||||
return transfered * 100
|
||||
else:
|
||||
return -10
|
||||
if item.trash_type in TRASH_TYPES:
|
||||
trash_ammount = getattr(self, item.trash_type)
|
||||
if trash_ammount:
|
||||
item.put_trash(trash_ammount)
|
||||
setattr(self, item.trash_type, 0)
|
||||
transfered += trash_ammount
|
||||
break
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
trained_models/working_one_house.model
Normal file
BIN
trained_models/working_one_house.model
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user