Skills API

Skill Configuration

from composabl_core.config import SkillConfig

skill_config = SkillConfig(
    name="temperature-control",
    type="SkillTeacher",
    config={
        "learning_rate": 0.001,
        "hidden_layers": [128, 128],
        "activation": "tanh"
    }
)

Skills API

Skills define agent behaviors through different implementation strategies.

Skill Types

1. SkillTeacher (Learning-based)

from composabl import SkillTeacher

class CustomTeacher(SkillTeacher):
    def __init__(self, target_position=10.0):
        self.target = target_position
        self.episode_steps = 0
        
    async def compute_reward(self, transformed_obs, action, sim_reward):
        """Calculate reward for reinforcement learning"""
        distance = abs(transformed_obs["position"] - self.target)
        
        # Shaped reward
        reward = -distance  # Negative distance
        
        # Bonus for reaching target
        if distance < 0.1:
            reward += 100
            
        # Penalty for energy usage
        reward -= 0.1 * abs(action[0])
        
        return reward
    
    async def compute_success_criteria(self, transformed_obs, action):
        """Define success condition"""
        return abs(transformed_obs["position"] - self.target) < 0.1
    
    async def compute_termination(self, transformed_obs, action):
        """Define episode termination"""
        self.episode_steps += 1
        
        # Terminate on success
        if await self.compute_success_criteria(transformed_obs, action):
            return True
            
        # Terminate on failure conditions
        if abs(transformed_obs["position"]) > 100:  # Out of bounds
            return True
            
        # Terminate on timeout
        return self.episode_steps >= 1000
    
    async def transform_sensors(self, sensors, action):
        """Preprocess sensors if needed"""
        # Normalize position to [-1, 1]
        transformed = dict(sensors)
        if "position" in transformed:
            transformed["position"] = transformed["position"] / 50.0
        return transformed
    
    async def transform_action(self, transformed_obs, action):
        """Transform action to simulator space"""
        # Clip action to valid range
        return np.clip(action, -1, 1)
    
    async def filtered_sensor_space(self):
        """Specify which sensors this skill needs"""
        return ["position", "velocity", "target"]
    
    async def compute_action_mask(self, transformed_obs, action):
        """Optional: Define valid actions"""
        # Example: Disable reverse if at boundary
        if transformed_obs["position"] <= -50:
            return [True, False]  # Can only go forward
        elif transformed_obs["position"] >= 50:
            return [False, True]  # Can only go backward
        return None  # All actions valid

# Create skill with teacher
skill = Skill("reach-target", CustomTeacher(target_position=25.0))

2. SkillController (Programmatic)

3. SkillSelector

4. Coordinated Skills

Skill Composition Patterns

Per Skill Configuration

Algorithms

PPO (Proximal Policy Optimization)

SAC (Soft Actor-Critic)

DQN (Deep Q-Network)

IMPALA

Custom Algorithm

Last updated