CollectMineralShards

from pysc2.env import sc2_env
from pysc2.lib import features 
from pysc2.agents import base_agent
from pysc2.lib import actions

from absl import app

MAPNAME = 'CollectMineralShards'
APM = 300
APM = int(APM / 18.75)
UNLIMIT = 0
VISUALIZE = True
REALTIME = True

SCREEN_SIZE = 84
MINIMAP_SIZE = 64

players = [sc2_env.Agent(sc2_env.Race.terran)]

interface = features.AgentInterfaceFormat(\
                feature_dimensions = features.Dimensions(\
                screen = SCREEN_SIZE, minimap = MINIMAP_SIZE), use_feature_units = True)

class Agent(base_agent.BaseAgent):
    def step(self,obs):
        super(Agent,self).step(obs)
        return actions.FUNCTIONS.no_op()
    


def main(args):
    agent = Agent()
    try:
        with sc2_env.SC2Env(map_name = MAPNAME, players = players,\
                agent_interface_format = interface,\
                step_mul = APM, game_steps_per_episode = UNLIMIT,\
                visualize = VISUALIZE, realtime = REALTIME) as env:
            agent.setup(env.observation_spec(), env.action_spec())

            timestep = env.reset()
            agent.reset()

            while True:
                step_actions = [agent.step(timestep[0])]
                if timestep[0].last():
                    break
                timestep = env.step(step_actions)
    except KeyboardInterrupt:
        pass
app.run(main)

๋‹ค์Œ์˜ ์Šคํฌ๋ฆฝํŠธ๋ฅผ ํ†ตํ•ด CollectMineralShards ๋งต์„ ์‹คํ–‰์‹œ์ผœ๋ณผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์ด์ œ RL์„ ํ•  ์ˆ˜ ์žˆ๋„๋ก ์ด ํ™˜๊ฒฝ์—์„œ ํ•„์š”ํ•œ ์ •๋ณด๋ฅผ ๋นผ์˜ค๋„๋ก ํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค.

MDP ์ •์˜, ํ™˜๊ฒฝ ์ •์˜๋Š” ๊ฐ™์€ ๋ฌธ์ œ์˜ ๊ฐ™์€ ๋ชฉ์ ์ด๋ผ๋„ ์ถฉ๋ถ„ํžˆ ๋‹ค๋ฅด๊ฒŒ ์ •์˜ํ•  ์ˆ˜ ์žˆ๊ณ , ๊ทธ ์ •์˜์— ๋”ฐ๋ผ ๋ฌธ์ œ๊ฐ€ ์–ผ๋งˆ๋‚˜ ๋ณต์žกํ•ด์ง€๋Š๋ƒ๋Š” ๋ฌผ๋ก ์ด๊ณ , time series์˜ ๊ธธ์ด์™€ action dimension์‚ฌ์ด์˜ trade off์˜ ๊ท ํ˜•, ๊ฐ€์žฅ ์ค‘์š”ํ•œ ์„ฑ๋Šฅ๊นŒ์ง€ ์ขŒ์ง€์šฐ์ง€๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.

ํ•˜์ง€๋งŒ ๊ฐ€์žฅ์ค‘์š”ํ•œ ๊ฒƒ์€ ํ™•์‹คํ•œ ๊ฒƒ์„ ์กฐ๊ธˆ์”ฉ ์Œ“์•„๊ฐ€๋Š” ๊ฒƒ์ž…๋‹ˆ๋‹ค. minimalํ•œ ํ™˜๊ฒฝ์—์„œ minimalํ•œ agent network๋ฅผ ํ†ตํ•ด ์ ์  ๋ฐœ์ „์‹œ์ผœ๋‚˜๊ฐ€๋Š” ๊ฒƒ์ด ๋””๋ฒ„๊น…๊ณผ ๊ฐœ์„ ์ ์—๋Œ€ํ•œ ์‹คํ—˜์„ ์‰ฝ๊ณ  ๋น ๋ฅด๊ฒŒ ์ง„ํ–‰ํ•  ์ˆ˜ ์žˆ๋„๋ก ํ•ฉ๋‹ˆ๋‹ค. ๊ทธ๋ ‡๊ธฐ์—, ๋จผ์ € ๋‹ค์Œ๊ณผ ๊ฐ™์ด environment๋ฅผ ์ •์˜ํ•ด๋ณด๊ฒ ์Šต๋‹ˆ๋‹ค.

  • Environment Definition :

    • State : 2 * 32(screen_size) * 32(screen_size)

      • feature_screen : 2 * 32 * 32

        • is_selected : 1 * 32 * 32

        • player_relative : 1 * 32 * 32

    • Action :

      • screen : 1 * 32 * 32

    • Reward :

      • mineral : 0 ~ n (๋ฏธ๋„ค๋ž„ ํ•œ๊ฐœ๋‹น 1)

  • Agent Definition :

    • Algorithm :

      • PPO + GAE

    • Network :

      • CNN + MLP

Agent Network๋ฅผ CNN-based๋กœ ํ•˜๊ฒŒ๋˜๋ฉด ๋‹จ์ ์ด ์žˆ์Šต๋‹ˆ๋‹ค. ์ด์ „์˜ Starcraft II ์—ฐ๊ตฌ๋“ค์— ์˜ํ•˜๋ฉด, action์—๋Œ€ํ•œ ์ผ๊ด€์„ฑ์ด ๋ถ€์กฑํ•˜๋‹ค๊ณ  ํ•ฉ๋‹ˆ๋‹ค. ์˜ˆ๋ฅผ๋“ค๋ฉด, ์ผ๊พผ ์œ ๋‹›์ด ๊ฑด๋ฌผ์„ ์ง€์œผ๋Ÿฌ ๊ฐ€๋‹ค๊ฐ€๋„ ๋‹ค์‹œ ๋ฏธ๋„ค๋ž„์„ ์บ๋Ÿฌ๊ฐ€๋Š”๋“ฑ ์ด์ „์˜ ์ •๋ณด์— ๋Œ€ํ•œ ์ „๋‹ฌ ๋Šฅ๋ ฅ์ด ๋ถ€์กฑํ•ฉ๋‹ˆ๋‹ค. ๊ทธ๋ ‡๊ธฐ ๋•Œ๋ฌธ์—, ์ €ํฌ๋Š” ๋งˆ๋ฆฐ์ด ์›€์ง์ด๋Š”๋™์•ˆ์€ ๋งˆ๋ฆฐ์—๊ฒŒ ์•„๋ฌด action๋„ ์ฃผ์ง€ ์•Š์„ ๊ฒƒ์ž…๋‹ˆ๋‹ค. ๋˜ ๊ทธ๋Ÿฌ๋ฉด ์ƒ๊ธฐ๋Š” ๋ฌธ์ œ๊ฐ€, ๊ธธ๊ฒŒ์›€์ง์ด๋ฉด ์‹œ๊ฐ„์ด ๋งŽ์ด๊ฐ€๊ณ , ์ ๊ฒŒ์›€์ง์ด๋ฉด ์‹œ๊ฐ„์ด ์งง๊ฒŒ๊ฐ‘๋‹ˆ๋‹ค. ํ•˜์ง€๋งŒ ๊ทธ์—๋Œ€ํ•œ ํŒจ๋„ํ‹ฐ๊ฐ€ ๋ถ€์กฑํ•˜์ฃ . ๋ฌผ๋ก  ์ œํ•œ์‹œ๊ฐ„ 2๋ถ„๋™์•ˆ ๋” ๋งŽ์€ reward๋ฅผ ๋ฐ›๊ธฐ ์œ„ํ•ด, ์–ด๋Š์ •๋„ ํ•™์Šต์ด ๋  ๊ฒƒ์œผ๋กœ ์˜ˆ์ƒํ•˜๊ณ , ํ™˜๊ฒฝ์„ ๋‹ค๋ค„๋ณด๋„๋กํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค.

state๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์€ ํ•จ์ˆ˜๋ฅผ ํ†ตํ•ด state๋ฅผ ๋ฐ›์•„์˜ฌ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.

def get_state(obs):
    player_relative = np.expand_dims(np.array(obs.observation.feature_screen.player_relative),0)
    selected = np.expand_dims(np.array(obs.observation.feature_screen.selected),0)
    state = np.concatenate([player_relative, selected],0)
    return state

๊ธฐ๋ณธ์ ์ธ ์œ ๋‹› ์›€์ง์ž„๊ณผ reward๋Š” Basic about pysc2์˜ How to select your SCV!? ์„น์…˜์—์„œ ๋ณด๊ณ  ์˜ค์‹œ๋ฉด random agent์ •๋„๋Š” ์‰ฝ๊ฒŒ ๋งŒ๋“ค ์ˆ˜ ์žˆ์„ ๊ฒƒ์ž…๋‹ˆ๋‹ค. ์ด์ƒ์ž…๋‹ˆ๋‹ค.

Last updated

Was this helpful?