# CollectMineralShards backup

저는 이번 튜토리얼을 통해, AlphaStar논문에서의 Agent Network에서 적용된 AutoRegressive model을 모방해, 화면 RGB와 action은 두 개의 마린과 mineralshards들을 entity로 모두 넣을 생각입니다.(화면 RGB만으로 충분할것으로 보이지만)

기본적으로 제가 생각한  필요 정보들은 다음과 같습니다.

* 화면의 정보
  * observation.screen\_features.player\_relative 를 사용할 예정입니다.(마린은 1, Mineralshards 은 3)
* 화면의 마린과 Mineralshards(dynamic length)
  * x좌표
    * observation.feature\_units.x
  * y좌표
    * observation.feature\_units.y
  * alliance
    * observation.feature\_units.alliance
      * (Mineralshards는 3, marine은 1)

그렇다면, state를 구하는 function을 만들어보겠습니다.&#x20;

```python
def get_state(obs):
    screen = obs.observation.feature_screen.player_relative
    units_info = obs.observation.feature_units
    units_info = [[unit.x,unit.y,unit.alliance] for unit in units_info]
    return np.expand_dims(np.array(screen),0), np.expand_dims(np.array(units_info),0)
```

그렇다면 action은 어떻게 정의할 수 있을까요? 저는 위에서 말했듯이 AutoRegressive 모델을 만들기 위해 다음과 같이 정의하여보았습니다.

* Action
  * unit selection
    * mineralshards
    * marine
  * target unit selection
    * marine
    * mineralshards
    * none
  * target point
    * x,y

이들을 통합해 만든 스크립트입니다. Brain은 현재 마구잡이 random으로 action하도록 만들어진 상태입니다.

```python
MOVE_SCREEN = 331
NOT_QUEUED = [0]

SELF = features.PlayerRelative.SELF
NEUTRAL = features.PlayerRelative.NEUTRAL

MINERALSHARDS = 1680
MARINES = 48


class Brain:
    def __init__(self):
        self.action_lst = []
    def network(self, obs,screen_info,units_info): 
        ## network를 잠시 function형태로 쓰겠습니다.
        units_info = obs.observation.feature_units
        units_info = [[unit.alliance,unit.x,unit.y] for unit in units_info]
        #모든 selection풀기 지금 deselect 기능 못쓰므로 잘못눌렀다면 no_op()해주어야함
        unit = random.choice(units_info)
        if unit[0] == NEUTRAL:
            return actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op()
        else:
            action_1 = actions.FUNCTIONS.select_point("select",(unit[1],unit[2]))
            if random.randint(0,1) == 0: # target unit이 잡혔을 시
                target_unit = random.choice(units_info)
                action_2 = actions.FunctionCall(MOVE_SCREEN,[NOT_QUEUED,[target_unit[1],target_unit[2]]])
                action_3 = actions.FUNCTIONS.no_op()
            else: # target unit없을시
                action_2 = actions.FUNCTIONS.no_op
                x = random.randint(0,SCREEN_SIZE - 1)
                y = random.randint(0,SCREEN_SIZE - 1)
                action_3 = actions.FunctionCall(MOVE_SCREEN,[NOT_QUEUED,[x,y]])
            return action_1,action_2,action_3
    def action(self,obs,screen_info,units_info):
        if len(self.action_lst) == 0:
            [self.action_lst.append(x) for x in self.network(obs,screen_info,units_info)]
        return self.action_lst.pop(0) 
            
class Agent(base_agent.BaseAgent):
    def __init__(self):
        super(Agent, self).__init__()
        self.brain = Brain()
    def get_state(self,obs):
        screen = obs.observation.feature_screen.player_relative
        units_info = obs.observation.feature_units
        units_info = [[unit.x,unit.y,unit.alliance] for unit in units_info]
        return np.expand_dims(np.array(screen),0), np.expand_dims(np.array(units_info),0) # .transpose(1, 0)
    def step(self, obs):
        super(Agent, self).step(obs)
        screen_info, units_info = self.get_state(obs)
        action = self.brain.action(obs,screen_info,units_info)
        #self.brain.train()
        return action

```

다음은 reward를 받아와보겠습니다. 위의 스크립트로 본다면 98번째 줄에서 reward를 확인해야합니다.&#x20;

```python
    try:
        with sc2_env.SC2Env(map_name=MAPNAME, players=players, \
                            agent_interface_format=interface, \
                            step_mul=APM, game_steps_per_episode=UNLIMIT, \
                            visualize=VISUALIZE, realtime=REALTIME) as env:
            agent.setup(env.observation_spec(), env.action_spec())

            timestep = env.reset()
            agent.reset()
            reward = 0
            while True:
                step_actions = [agent.step(timestep[0])]
                if timestep[0].last():
                    break
                timestep = env.step(step_actions)
                reward -= timestep[0].observation.player.minerals 
    except KeyboardInterrupt:
        pass
```

그렇다면 전체 코드는 다음과 같습니다.

```python
from pysc2.env import sc2_env
from pysc2.lib import features
from pysc2.agents import base_agent
import numpy as np
from pysc2.lib import actions

from absl import app

import random

MAPNAME = 'CollectMineralShards'
APM = 300
APM = int(APM / 18.75)
UNLIMIT = 0
VISUALIZE = True
REALTIME = True

SCREEN_SIZE = 84
MINIMAP_SIZE = 64


MOVE_SCREEN = 331
NOT_QUEUED = [0]

SELF = features.PlayerRelative.SELF
NEUTRAL = features.PlayerRelative.NEUTRAL

MINERALSHARDS = 1680
MARINES = 48

players = [sc2_env.Agent(sc2_env.Race.terran)]

interface = features.AgentInterfaceFormat( \
    feature_dimensions=features.Dimensions( \
        screen=SCREEN_SIZE, minimap=MINIMAP_SIZE), use_feature_units=True)

class Brain:
    def __init__(self):
        self.action_lst = []
    def network(self, obs,screen_info,units_info): 
        ## network를 잠시 function형태로 쓰겠습니다.
        units_info = obs.observation.feature_units
        units_info = [[unit.alliance,unit.x,unit.y] for unit in units_info]
        #모든 selection풀기 지금 deselect 기능 못쓰므로 잘못눌렀다면 no_op()해주어야함
        unit = random.choice(units_info)
        if random.randint(0,1) == 0:
            #아무것도 안하는 행위(이미 마린들이 목표를향해 움직인다면 아무것도안하고 있어도 괜찮음)
            return actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op()
        elif unit[0] == NEUTRAL:
            return actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op(), actions.FUNCTIONS.no_op()
        else:
            action_1 = actions.FUNCTIONS.select_point("select",(unit[1],unit[2]))
            if random.randint(0,1) == 0: # target unit이 잡혔을 시
                target_unit = random.choice(units_info)
                action_2 = actions.FunctionCall(MOVE_SCREEN,[NOT_QUEUED,[target_unit[1],target_unit[2]]])
                action_3 = actions.FUNCTIONS.no_op()
            else: # target unit없을시
                action_2 = actions.FUNCTIONS.no_op()
                x = random.randint(0,SCREEN_SIZE - 1)
                y = random.randint(0,SCREEN_SIZE - 1)
                action_3 = actions.FunctionCall(MOVE_SCREEN,[NOT_QUEUED,[x,y]])
            return action_1,action_2,action_3
    def action(self,obs,screen_info,units_info):
        if len(self.action_lst) == 0:
            [self.action_lst.append(x) for x in self.network(obs,screen_info,units_info)]
        return self.action_lst.pop(0) 
            
class Agent(base_agent.BaseAgent):
    def __init__(self):
        super(Agent, self).__init__()
        self.brain = Brain()
    def get_state(self,obs):
        screen = obs.observation.feature_screen.player_relative
        units_info = obs.observation.feature_units
        units_info = [[unit.x,unit.y,unit.alliance] for unit in units_info]
        return np.expand_dims(np.array(screen),0), np.expand_dims(np.array(units_info),0) # .transpose(1, 0)
    def step(self, obs):
        super(Agent, self).step(obs)
        screen_info, units_info = self.get_state(obs)
        action = self.brain.action(obs,screen_info,units_info)
        #self.brain.train()
        return action


def main(args):
    agent = Agent()
    try:
        with sc2_env.SC2Env(map_name=MAPNAME, players=players, \
                            agent_interface_format=interface, \
                            step_mul=APM, game_steps_per_episode=UNLIMIT, \
                            visualize=VISUALIZE, realtime=REALTIME) as env:
            agent.setup(env.observation_spec(), env.action_spec())

            timestep = env.reset()
            agent.reset()
            while True:
                reward = - timestep[0].observation.player.minerals 
                step_actions = [agent.step(timestep[0])]
                if timestep[0].last():
                    break
                timestep = env.step(step_actions)
                reward += timestep[0].observation.player.minerals 
    except KeyboardInterrupt:
        pass


app.run(main)
```

완료입니다!

이후 테스트하고싶은 것은 CollectMineralsAndGas 맵에서 MultiObjective로 미네랄과 가스의 weights 를 주어 weights에 따라 미네랄과 가스의 채취량을 조절할 수 있는가에 대한 테스트를 개인적으로 진행할 예정입니다.

번외로, units로 screen에 안보이더라도 확인가능한지


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://kilmya1.gitbook.io/startcraft2-rl-agent-tutorial/solve-mini-game/mini-game/collectmineralshards-backup.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
