diff --git a/README.md b/README.md index 6c1bcdc..2888320 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ You can log the training using a tensorboard object that logs the training resul To start tensorboard: -``` tensorboard --logdir src/agents/agent_storage ``` +``` tensorboard --logdir /RoboSchool/src/agents/agent_storage ``` This will start tensorboard on localhost:6006 from the runs folder @@ -53,47 +53,6 @@ Setting up a docker volume, i.e. a dynamic connection between a folder on the lo https://github.com/Cyb3rWard0g/HELK/issues/79 -### Omniboard and mongoDB - -Omniboard and mongo DB must be run and also be able to talk with each other. This can be obtained by starting them on the same docker network. First, create a new docker network or use an existing network - -``` docker network create omniboard-network ``` - -We now have a network on which to run the docker containers. The mongodb and omniboard container should use the same docker network - -``` docker run --rm --name mongo-container --net omniboard-network -d mongo ``` - -Then run the omniboard network - -``` docker run --rm -d -p 9000:9000 --name omniboard --net=omniboard-network vivekratnavel/omniboard -m MONGODB_CONTAINER:27017:sacred ``` - - -### RL development environment - -To start up the RL environment with a jupyter notebook running, write: - -``` docker run --rm -it -v pwd:/notebooks -p 8888:8888 justheuristic/practical_rl ``` - -Go to localhost:8888 and insert the token from the console to log in. A RL environment image has been made for this projects and can be run by - -``` docker run --rm -it -p 8888:8888 fabiansd/rl-env bash ``` - -You will then start up a linux container with all the necessary libraries installed. Here you can run python scripts and linux commands, and also start jupyter by typing - -``` sh /RoboSchool/src/run_jyputer.sh ``` - - -### Flask application - -The frontend application is implemented with a Flask app and docker image built from /Roboschool with - -``` docker build -f app/Dockerfile -t fabiansd/roboschool-app . ``` - -The application can be started up by running: - -``` docker run --rm -d -p 9999:9999 fabiansd/roboschool-app ``` - -Then go to localhost:9090 to see the frontend application ### Docker-compose diff --git a/src/entities/__init__.py b/src/entities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/features/bomberman_state_encoding.py b/src/features/bomberman_state_encoding.py new file mode 100644 index 0000000..0b25e2b --- /dev/null +++ b/src/features/bomberman_state_encoding.py @@ -0,0 +1,130 @@ +import numpy as np + +BOMB_MAX_TIME = 5 +BOARD_HEIGHT = 8 +BOARD_WIDTH = 7 +NUM_IMAGES = 4 + +# TODO: Load in a config file or something instead of having hard coded constants for board height/width and max bomb time + +# Nice to have todos: +# TODO: Figure out a way to encode movement (aka player 2 is moving west even though he is standing in position x right now) +# TODO: Add representation for how long the fire will remain on the board +# TODO: Add powerup information (number of bombs the agent has left, the agents speed, and other players bombs available) +# TODO: Implement a relative state vector that encodes information locally and relative to the agents position (reduced state vector) + +# LAYERS: +# - Fire and danger (e.g 0 when bomb is placed -> progress to -1 as it approaches explosion, 1 for safe) +# - Crates, bonus, empty tile (-1 for bonus, 0 for empty, 1 for create) +# - Adversaries (1 for enemy, 0 for not) +# - Player position (1 for current position, 0 for not) + +def construct_full_state(bombs, fires, walls, agent, enemies, crates, bonuses, flatten = True): + images = [] + solids_image = construct_solids_image(walls, crates) # for help to construct danger_image only + images.append(construct_danger_image(bombs, fires, solids_image)) + images.append(construct_env_image(bonuses, crates)) + images.append(construct_agent_image(agent)) + images.append(construct_adversary_image(enemies)) + if flatten: + state_vector = np.concatenate([flatten_image(image, walls) for image in images]) + else: + state_vector = np.dstack((image for image in images)) + return state_vector + +def construct_relative_state(): + return NotImplemented() + +def construct_danger_image(bombs, fires, wall_image): + danger_image = np.ones((BOARD_HEIGHT, BOARD_WIDTH)) + bombs_timer_sorted = sorted(bombs, key=lambda bomb: bomb.timer, reverse=True) # sort on ascending timers + for bomb in bombs_timer_sorted: + # Mark danger in horizontal direction + danger_most_left = max(bomb.position.x - bomb.strength, 0) + danger_most_right = min(bomb.position.x + bomb.strength, BOARD_WIDTH - 1) + for x in range(bomb.position.x, danger_most_left - 1, -1): + if(wall_image[bomb.position.y, x] == 1): + break + else: + danger_image[bomb.position.y, x] = bomb.timer/BOMB_MAX_TIME - 1 + if(wall_image[bomb.position.y, x] != 0): + break + for x in range(bomb.position.x, danger_most_right + 1, 1): + if(wall_image[bomb.position.y, x] == 1): + break + else: + danger_image[bomb.position.y, x] = bomb.timer / BOMB_MAX_TIME - 1 + if (wall_image[bomb.position.y, x] != 0): + break + + # Mark danger in vertical direction + danger_most_up = max(bomb.position.y - bomb.strength, 0) + danger_most_down = min(bomb.position.y + bomb.strength, BOARD_HEIGHT - 1) + for y in range(bomb.position.y, danger_most_up - 1, -1): + if(wall_image[y, bomb.position.x] == 1): + break + else: + danger_image[y, bomb.position.x] = bomb.timer/BOMB_MAX_TIME - 1 + if(wall_image[y, bomb.position.x] != 0): + break + for y in range(bomb.position.y, danger_most_down + 1, 1): + if(wall_image[y, bomb.position.x] == 1): + break + else: + danger_image[y, bomb.position.x] = bomb.timer/BOMB_MAX_TIME - 1 + if(wall_image[y, bomb.position.x] != 0): + break + + for fire in fires: + danger_image[fire.position.y, fire.position.x] = -1 + + return danger_image + +def construct_env_image(bonuses, crates): + env_image = np.zeros((BOARD_HEIGHT, BOARD_WIDTH)) + for crate in crates: + env_image[crate.position.y, crate.position.x] = 1 + for bonus in bonuses: + env_image[bonus.position.y, bonus.position.x] = -1 + return env_image + +def construct_adversary_image(enemies): + enemy_image = np.zeros((BOARD_HEIGHT, BOARD_WIDTH)) + for enemy in enemies: + enemy_image[enemy.position.y, enemy.position.x] = 1 + return enemy_image + +def construct_agent_image(agent): + agent_image = np.zeros((BOARD_HEIGHT, BOARD_WIDTH)) + agent_image[agent.position.y, agent.position.x] = 1 + return agent_image + +def construct_solids_image(walls, crates): + solids_image = np.zeros((BOARD_HEIGHT, BOARD_WIDTH)) + for wall in walls: + solids_image[wall.position.y, wall.position.x] = 1 + for crate in crates: + solids_image[crate.position.y, crate.position.x] = -1 + return solids_image + +def construct_powerup_vector(agent, enemies): + raise NotImplemented() + +def remove_walls_from_image(walls, image_flatten): + new_image = image_flatten + for wall in walls: + index = BOARD_WIDTH * wall.position.y + wall.position.x + new_image = np.delete(new_image, index) + return new_image + +def flatten_image(image, walls): + image_flatten = image.flatten() + image_flatten_no_walls = remove_walls_from_image(walls=walls, image_flatten=image_flatten) + return image_flatten_no_walls + +def visualize_image(image): + ydim, xdim = image.shape + for y in range(0, ydim): + for x in range(0, xdim): + print(image[y, x], end = " ") + print() diff --git a/tests/features/test_bomberman_state_encoding.py b/tests/features/test_bomberman_state_encoding.py new file mode 100644 index 0000000..5c159f8 --- /dev/null +++ b/tests/features/test_bomberman_state_encoding.py @@ -0,0 +1,150 @@ +from src.features import bomberman_state_encoding +from src.entities.bomb import Bomb +from src.entities.crate import Crate +from src.entities.position import Position +from src.entities.fire import Fire +from src.entities.player import Player +from src.entities.wall import Wall +from src.entities.bonus import Bonus +import numpy as np + +board_height = bomberman_state_encoding.BOARD_HEIGHT +board_width = bomberman_state_encoding.BOARD_WIDTH +num_images = bomberman_state_encoding.NUM_IMAGES + +danger_image_solution = np.array([ + [0, 0, 0, 0, 1, 1, 1], + [1, 0, 1, 1, 1, 1, 1], + [1, 0, 1, 1, 1, 1, 1], + [1, 1, 2/5-1, 1, 1, 1, 0.1/5-1], + [2/5-1, 2/5-1, 2/5-1, 2/5-1, 1, 0.1/5-1, 0.1/5-1], + [1, -1, 1, 1, 1, 0.2/5-1, 0.1/5-1], + [1, -1, -1, 1, 1, 1, 0.2/5-1], + [1, -1, 1, 1, 1, 1, 1] +]) + +env_image_solution = np.array([ + [1, 0, 0, 0, 0, 0, -1], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 1, 1, 1, 1, 1], + [0, 0, 0, 1, -1, 1, 0], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0] +]) + +adversary_image_solution = np.array([ + [0, 0, 0, 0, 1, 0, 0], + [0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [1, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 1] +]) + +agent_image_solution = np.array([ + [0, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0] +]) + +solids_image_solution = np.array([ + [-1, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, -1, 1, 0, 0], + [0, 0, -1, -1, -1, -1, -1], + [0, 0, 0, -1, 0, -1, 0], + [0, 0, 1, -1, 1, 0, 0], + [0, 0, 0, -1, 0, 0, 0], + [0, 0, 0, -1, 0, 0, 0] +]) + +# Objects +bombs = [] +bombs.append(Bomb(position = Position(x=2, y=4), strength = 3, timer = 2)) +bombs.append(Bomb(position = Position(x=1,y=0), strength = 2, timer = 5)) +bombs.append(Bomb(position = Position(x=6,y=4), strength = 1, timer = 0.1)) +bombs.append(Bomb(position = Position(x=6,y=5), strength = 1, timer = 0.2)) +fires = [] +fires.append(Fire(position = Position(x=1,y=5))) +fires.append(Fire(position = Position(x=1,y=6))) +fires.append(Fire(position = Position(x=1,y=7))) +fires.append(Fire(position = Position(x=2,y=6))) +crates = [] +crates.append(Crate(position = Position(x=0,y=0))) +crates.append(Crate(position = Position(x=3,y=2))) +crates.append(Crate(position = Position(x=3,y=3))) +crates.append(Crate(position = Position(x=3,y=4))) +crates.append(Crate(position = Position(x=3,y=5))) +crates.append(Crate(position = Position(x=3,y=6))) +crates.append(Crate(position = Position(x=3,y=7))) +crates.append(Crate(position = Position(x=2,y=3))) +crates.append(Crate(position = Position(x=3,y=3))) +crates.append(Crate(position = Position(x=4,y=3))) +crates.append(Crate(position = Position(x=5,y=3))) +crates.append(Crate(position = Position(x=6,y=3))) +crates.append(Crate(position = Position(x=5,y=4))) +walls = [] +walls.append(Wall(position = Position(x=2, y=2))) +walls.append(Wall(position = Position(x=2, y=5))) +walls.append(Wall(position = Position(x=4, y=2))) +walls.append(Wall(position = Position(x=4, y=5))) +agent = Player(position = Position(1, 1), username = 'agent') +enemies = [] +enemies.append(Player(position = Position(1,1), username = 'enemy_1')) +enemies.append(Player(position = Position(4,0), username = 'enemy_2')) +enemies.append(Player(position = Position(0,6), username = 'enemy_3')) +enemies.append(Player(position = Position(6,7), username = 'enemy_4')) +bonuses = [] +bonuses.append(Bonus(position = Position(6, 0), type = 'BLAST_RADIUS')) +bonuses.append(Bonus(position = Position(4, 4), type = 'BLAST_RADIUS')) + +def test_construct_full_state(): + state_vector = bomberman_state_encoding.construct_full_state( + bombs = bombs, + fires = fires, + walls = walls, + agent = agent, + enemies = enemies, + crates = crates, + bonuses = bonuses, + flatten = True + ) + assert len(state_vector) == (num_images*board_width*board_height - num_images*len(walls)),\ + "Length of the flattened-full state vector is incorrect!" + +def test_construct_danger_image(): + solids_image = bomberman_state_encoding.construct_solids_image(walls=walls, crates=crates) + danger_image = bomberman_state_encoding.construct_danger_image(bombs=bombs, fires=fires, wall_image=solids_image) + assert np.array_equal(danger_image, danger_image_solution) == True, "Danger image is incorrect!" + +def test_construct_env_image(): + env_image = bomberman_state_encoding.construct_env_image(crates=crates, bonuses=bonuses) + assert np.array_equal(env_image, env_image_solution) == True, "Environment image is incorrect!" + +def test_construct_adversary_image(): + adversary_image = bomberman_state_encoding.construct_adversary_image(enemies=enemies) + assert np.array_equal(adversary_image, adversary_image_solution) == True, "Adversary image is incorrect!" + +def test_construct_agent_image(): + agent_image = bomberman_state_encoding.construct_agent_image(agent=agent) + assert np.array_equal(agent_image, agent_image_solution) == True, "Adversary image is incorrect!" + +def test_construct_solids_image(): + solids_image = bomberman_state_encoding.construct_solids_image(walls=walls, crates=crates) + assert np.array_equal(solids_image, solids_image_solution) == True, "Adversary image is incorrect!" + +# test_construct_full_state() +# test_construct_danger_image() +# test_construct_env_image() +# test_construct_adversary_image() +# test_construct_agent_image() +# test_construct_solids_image()