Skills API
Skill Configuration
from composabl_core.config import SkillConfig
skill_config = SkillConfig(
name="temperature-control",
type="SkillTeacher",
config={
"learning_rate": 0.001,
"hidden_layers": [128, 128],
"activation": "tanh"
}
)Skills API
Skill Types
from composabl import SkillTeacher
class CustomTeacher(SkillTeacher):
def __init__(self, target_position=10.0):
self.target = target_position
self.episode_steps = 0
async def compute_reward(self, transformed_obs, action, sim_reward):
"""Calculate reward for reinforcement learning"""
distance = abs(transformed_obs["position"] - self.target)
# Shaped reward
reward = -distance # Negative distance
# Bonus for reaching target
if distance < 0.1:
reward += 100
# Penalty for energy usage
reward -= 0.1 * abs(action[0])
return reward
async def compute_success_criteria(self, transformed_obs, action):
"""Define success condition"""
return abs(transformed_obs["position"] - self.target) < 0.1
async def compute_termination(self, transformed_obs, action):
"""Define episode termination"""
self.episode_steps += 1
# Terminate on success
if await self.compute_success_criteria(transformed_obs, action):
return True
# Terminate on failure conditions
if abs(transformed_obs["position"]) > 100: # Out of bounds
return True
# Terminate on timeout
return self.episode_steps >= 1000
async def transform_sensors(self, sensors, action):
"""Preprocess sensors if needed"""
# Normalize position to [-1, 1]
transformed = dict(sensors)
if "position" in transformed:
transformed["position"] = transformed["position"] / 50.0
return transformed
async def transform_action(self, transformed_obs, action):
"""Transform action to simulator space"""
# Clip action to valid range
return np.clip(action, -1, 1)
async def filtered_sensor_space(self):
"""Specify which sensors this skill needs"""
return ["position", "velocity", "target"]
async def compute_action_mask(self, transformed_obs, action):
"""Optional: Define valid actions"""
# Example: Disable reverse if at boundary
if transformed_obs["position"] <= -50:
return [True, False] # Can only go forward
elif transformed_obs["position"] >= 50:
return [False, True] # Can only go backward
return None # All actions valid
# Create skill with teacher
skill = Skill("reach-target", CustomTeacher(target_position=25.0))Skill Composition Patterns
Per Skill Configuration
Algorithms
PPO (Proximal Policy Optimization)
SAC (Soft Actor-Critic)
DQN (Deep Q-Network)
IMPALA
Custom Algorithm
Last updated