CollectMineralShards backup

저는 이번 튜토리얼을 통해, AlphaStar논문에서의 Agent Network에서 적용된 AutoRegressive model을 모방해, 화면 RGB와 action은 두 개의 마린과 mineralshards들을 entity로 모두 넣을 생각입니다.(화면 RGB만으로 충분할것으로 보이지만)

기본적으로 제가 생각한 필요 정보들은 다음과 같습니다.

화면의 정보
- observation.screen_features.player_relative 를 사용할 예정입니다.(마린은 1, Mineralshards 은 3)
화면의 마린과 Mineralshards(dynamic length)
- x좌표
  - observation.feature_units.x
- y좌표
  - observation.feature_units.y
- alliance
  - observation.feature_units.alliance
    (Mineralshards는 3, marine은 1)

그렇다면, state를 구하는 function을 만들어보겠습니다.

def get_state(obs):
    screen = obs.observation.feature_screen.player_relative
    units_info = obs.observation.feature_units
    units_info = [[unit.x,unit.y,unit.alliance] for unit in units_info]
    return np.expand_dims(np.array(screen),0), np.expand_dims(np.array(units_info),0)

그렇다면 action은 어떻게 정의할 수 있을까요? 저는 위에서 말했듯이 AutoRegressive 모델을 만들기 위해 다음과 같이 정의하여보았습니다.

Action
- unit selection
  - mineralshards
  - marine
- target unit selection
  - marine
  - mineralshards
  - none
- target point
  - x,y

이들을 통합해 만든 스크립트입니다. Brain은 현재 마구잡이 random으로 action하도록 만들어진 상태입니다.

MOVE_SCREEN = 331
NOT_QUEUED = [0]

SELF = features.PlayerRelative.SELF
NEUTRAL = features.PlayerRelative.NEUTRAL

MINERALSHARDS = 1680
MARINES = 48


class Brain:
    def __init__(self):
        self.action_lst = []
    def network(self, obs,screen_info,units_info): 
        ## network를 잠시 function형태로 쓰겠습니다.
        units_info = obs.observation.feature_units
        units_info = [[unit.alliance,unit.x,unit.y] for unit in units_info]
        #모든 selection풀기 지금 deselect 기능 못쓰므로 잘못눌렀다면 no_op()해주어야함
        unit = random.choice(units_info)
        if unit[0] == NEUTRAL:
            return actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op()
        else:
            action_1 = actions.FUNCTIONS.select_point("select",(unit[1],unit[2]))
            if random.randint(0,1) == 0: # target unit이 잡혔을 시
                target_unit = random.choice(units_info)
                action_2 = actions.FunctionCall(MOVE_SCREEN,[NOT_QUEUED,[target_unit[1],target_unit[2]]])
                action_3 = actions.FUNCTIONS.no_op()
            else: # target unit없을시
                action_2 = actions.FUNCTIONS.no_op
                x = random.randint(0,SCREEN_SIZE - 1)
                y = random.randint(0,SCREEN_SIZE - 1)
                action_3 = actions.FunctionCall(MOVE_SCREEN,[NOT_QUEUED,[x,y]])
            return action_1,action_2,action_3
    def action(self,obs,screen_info,units_info):
        if len(self.action_lst) == 0:
            [self.action_lst.append(x) for x in self.network(obs,screen_info,units_info)]
        return self.action_lst.pop(0) 
            
class Agent(base_agent.BaseAgent):
    def __init__(self):
        super(Agent, self).__init__()
        self.brain = Brain()
    def get_state(self,obs):
        screen = obs.observation.feature_screen.player_relative
        units_info = obs.observation.feature_units
        units_info = [[unit.x,unit.y,unit.alliance] for unit in units_info]
        return np.expand_dims(np.array(screen),0), np.expand_dims(np.array(units_info),0) # .transpose(1, 0)
    def step(self, obs):
        super(Agent, self).step(obs)
        screen_info, units_info = self.get_state(obs)
        action = self.brain.action(obs,screen_info,units_info)
        #self.brain.train()
        return action

다음은 reward를 받아와보겠습니다. 위의 스크립트로 본다면 98번째 줄에서 reward를 확인해야합니다.

    try:
        with sc2_env.SC2Env(map_name=MAPNAME, players=players, \
                            agent_interface_format=interface, \
                            step_mul=APM, game_steps_per_episode=UNLIMIT, \
                            visualize=VISUALIZE, realtime=REALTIME) as env:
            agent.setup(env.observation_spec(), env.action_spec())

            timestep = env.reset()
            agent.reset()
            reward = 0
            while True:
                step_actions = [agent.step(timestep[0])]
                if timestep[0].last():
                    break
                timestep = env.step(step_actions)
                reward -= timestep[0].observation.player.minerals 
    except KeyboardInterrupt:
        pass

그렇다면 전체 코드는 다음과 같습니다.

from pysc2.env import sc2_env
from pysc2.lib import features
from pysc2.agents import base_agent
import numpy as np
from pysc2.lib import actions

from absl import app

import random

MAPNAME = 'CollectMineralShards'
APM = 300
APM = int(APM / 18.75)
UNLIMIT = 0
VISUALIZE = True
REALTIME = True

SCREEN_SIZE = 84
MINIMAP_SIZE = 64


MOVE_SCREEN = 331
NOT_QUEUED = [0]

SELF = features.PlayerRelative.SELF
NEUTRAL = features.PlayerRelative.NEUTRAL

MINERALSHARDS = 1680
MARINES = 48

players = [sc2_env.Agent(sc2_env.Race.terran)]

interface = features.AgentInterfaceFormat( \
    feature_dimensions=features.Dimensions( \
        screen=SCREEN_SIZE, minimap=MINIMAP_SIZE), use_feature_units=True)

class Brain:
    def __init__(self):
        self.action_lst = []
    def network(self, obs,screen_info,units_info): 
        ## network를 잠시 function형태로 쓰겠습니다.
        units_info = obs.observation.feature_units
        units_info = [[unit.alliance,unit.x,unit.y] for unit in units_info]
        #모든 selection풀기 지금 deselect 기능 못쓰므로 잘못눌렀다면 no_op()해주어야함
        unit = random.choice(units_info)
        if random.randint(0,1) == 0:
            #아무것도 안하는 행위(이미 마린들이 목표를향해 움직인다면 아무것도안하고 있어도 괜찮음)
            return actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op()
        elif unit[0] == NEUTRAL:
            return actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op()
        else:
            action_1 = actions.FUNCTIONS.select_point("select",(unit[1],unit[2]))
            if random.randint(0,1) == 0: # target unit이 잡혔을 시
                target_unit = random.choice(units_info)
                action_2 = actions.FunctionCall(MOVE_SCREEN,[NOT_QUEUED,[target_unit[1],target_unit[2]]])
                action_3 = actions.FUNCTIONS.no_op()
            else: # target unit없을시
                action_2 = actions.FUNCTIONS.no_op()
                x = random.randint(0,SCREEN_SIZE - 1)
                y = random.randint(0,SCREEN_SIZE - 1)
                action_3 = actions.FunctionCall(MOVE_SCREEN,[NOT_QUEUED,[x,y]])
            return action_1,action_2,action_3
    def action(self,obs,screen_info,units_info):
        if len(self.action_lst) == 0:
            [self.action_lst.append(x) for x in self.network(obs,screen_info,units_info)]
        return self.action_lst.pop(0) 
            
class Agent(base_agent.BaseAgent):
    def __init__(self):
        super(Agent, self).__init__()
        self.brain = Brain()
    def get_state(self,obs):
        screen = obs.observation.feature_screen.player_relative
        units_info = obs.observation.feature_units
        units_info = [[unit.x,unit.y,unit.alliance] for unit in units_info]
        return np.expand_dims(np.array(screen),0), np.expand_dims(np.array(units_info),0) # .transpose(1, 0)
    def step(self, obs):
        super(Agent, self).step(obs)
        screen_info, units_info = self.get_state(obs)
        action = self.brain.action(obs,screen_info,units_info)
        #self.brain.train()
        return action


def main(args):
    agent = Agent()
    try:
        with sc2_env.SC2Env(map_name=MAPNAME, players=players, \
                            agent_interface_format=interface, \
                            step_mul=APM, game_steps_per_episode=UNLIMIT, \
                            visualize=VISUALIZE, realtime=REALTIME) as env:
            agent.setup(env.observation_spec(), env.action_spec())

            timestep = env.reset()
            agent.reset()
            while True:
                reward = - timestep[0].observation.player.minerals 
                step_actions = [agent.step(timestep[0])]
                if timestep[0].last():
                    break
                timestep = env.step(step_actions)
                reward += timestep[0].observation.player.minerals 
    except KeyboardInterrupt:
        pass


app.run(main)

완료입니다!

이후 테스트하고싶은 것은 CollectMineralsAndGas 맵에서 MultiObjective로 미네랄과 가스의 weights 를 주어 weights에 따라 미네랄과 가스의 채취량을 조절할 수 있는가에 대한 테스트를 개인적으로 진행할 예정입니다.

번외로, units로 screen에 안보이더라도 확인가능한지

PreviousCollectMineralShards NextUntitled

Last updated 4 years ago

Was this helpful?