Hidden Modes --- Teleport, Voice, Fast Mode, Effort, Ultraplan¶

Claude Code has several modes beyond the standard REPL. Understanding these patterns lets you build similar capabilities into your own harness.

Why this chapter exists¶

Most developers interact with Claude Code through the basic prompt-response loop. Beneath the surface, however, there are specialized modes that change how the harness communicates with the model, what tools are available, and where a session can run. Each mode is a self-contained pattern you can lift into your own agent.

1. Teleport Mode (Cross-Machine Session Transfer)¶

Sessions can be transferred between machines. The flow:

Serialize conversation messages to JSON
Create a git bundle of the working directory state
Upload session data via API
On the target machine: download session, apply git bundle, resume conversation

When to use

Teleport is useful for handing off long-running sessions from a laptop to a cloud VM, or sharing in-progress work with a teammate who needs full conversation context.

from dataclasses import dataclass


@dataclass(frozen=True)
class TeleportPayload:
    session_id: str
    messages: list[dict]
    branch_name: str
    git_bundle_path: str | None


async def teleport_export(session, working_dir: str) -> TeleportPayload:
    """Export current session for transfer to another machine."""
    import subprocess

    # Create git bundle of current branch
    branch = subprocess.check_output(
        ["git", "rev-parse", "--abbrev-ref", "HEAD"],
        cwd=working_dir,
        text=True,
    ).strip()

    bundle_path = f"/tmp/teleport-{session.session_id}.bundle"
    subprocess.run(
        ["git", "bundle", "create", bundle_path, branch],
        cwd=working_dir,
        check=True,
    )

    return TeleportPayload(
        session_id=session.session_id,
        messages=session.messages,
        branch_name=branch,
        git_bundle_path=bundle_path,
    )


async def teleport_import(
    payload: TeleportPayload, working_dir: str
) -> list[dict]:
    """Import a teleported session on the target machine."""
    import subprocess

    if payload.git_bundle_path:
        subprocess.run(
            ["git", "bundle", "unbundle", payload.git_bundle_path],
            cwd=working_dir,
            check=True,
        )
        subprocess.run(
            ["git", "checkout", payload.branch_name],
            cwd=working_dir,
            check=True,
        )

    # Resume with a system message noting the transfer
    resume_message = {
        "role": "user",
        "content": (
            "This session was continued from another machine. "
            f"Working directory: {working_dir}"
        ),
    }
    return [*payload.messages, resume_message]

2. Fast Mode (Priority Processing)¶

Fast mode provides faster inference at 6x the cost (Opus 4.6: $30/$150 per Mtok vs $5/$25). The harness manages a cooldown state so rate-limit errors degrade gracefully.

from dataclasses import dataclass, replace
import time


@dataclass(frozen=True)
class FastModeState:
    enabled: bool = False
    status: str = "active"  # "active" | "cooldown"
    cooldown_until: float = 0.0
    cooldown_reason: str = ""

    def is_available(self) -> bool:
        if not self.enabled:
            return False
        if self.status == "cooldown":
            if time.time() >= self.cooldown_until:
                return True  # cooldown expired
            return False
        return True

    def with_cooldown(
        self, duration_seconds: float, reason: str
    ) -> "FastModeState":
        return replace(
            self,
            status="cooldown",
            cooldown_until=time.time() + duration_seconds,
            cooldown_reason=reason,
        )

    def check_and_reset(self) -> "FastModeState":
        """Return a new state with cooldown cleared if it has expired."""
        if self.status == "cooldown" and time.time() >= self.cooldown_until:
            return replace(self, status="active", cooldown_reason="")
        return self

Use with either SDK:

# Fast mode sends a beta header to get priority processing
model = "claude-opus-4-6"

def build_fast_headers(fast_mode: FastModeState) -> dict[str, str]:
    if fast_mode.is_available():
        # Anthropic: add beta header
        # Cost: $30/$150 per Mtok instead of $5/$25
        return {"anthropic-beta": "fast-mode-2025-04-01"}
    return {}

3. Effort Control (Thinking Budget)¶

Effort levels control how much "thinking" the model does before responding. Higher effort means deeper reasoning at the cost of latency and tokens.

from enum import Enum


class EffortLevel(Enum):
    LOW = "low"        # Quick, minimal overhead
    MEDIUM = "medium"  # Balanced (default for most models)
    HIGH = "high"      # Comprehensive, thorough
    MAX = "max"        # Maximum reasoning (Opus 4.6 only)


def resolve_effort(
    model: str, user_preference: str | None
) -> EffortLevel | None:
    """Resolve effort level from user preference and model capabilities."""
    if user_preference is None:
        # Model-specific defaults
        if "opus-4-6" in model:
            return EffortLevel.MEDIUM
        return None  # No effort parameter sent

    level = EffortLevel(user_preference)

    # MAX is only supported by Opus 4.6
    if level == EffortLevel.MAX and "opus-4-6" not in model:
        return EffortLevel.HIGH  # downgrade gracefully

    return level


def apply_effort_to_params(
    params: dict, effort: EffortLevel | None
) -> dict:
    """Return new params dict with effort-related fields set."""
    if effort is None:
        return params

    budget_map = {
        EffortLevel.LOW: 1024,
        EffortLevel.MEDIUM: 8192,
        EffortLevel.HIGH: 16384,
        EffortLevel.MAX: 31999,
    }

    return {
        **params,
        "thinking": {
            "type": "enabled",
            "budget_tokens": budget_map[effort],
        },
    }

Budget tokens are not output tokens

The budget_tokens field controls internal reasoning tokens. The model may still produce a long final answer regardless of the thinking budget.

4. Ultraplan (Keyword-Triggered Planning)¶

When a user types a specific keyword (e.g., "ultraplan"), the harness intercepts the input and switches into a structured planning mode before sending anything to the model.

import re

ULTRAPLAN_KEYWORDS = frozenset({"ultraplan", "ultrareview"})


def find_trigger_keyword(text: str) -> str | None:
    """Detect planning keywords while skipping quoted strings and paths."""
    # Strip content inside quotes
    stripped = re.sub(r'["\'].*?["\']', "", text)
    # Strip file paths
    stripped = re.sub(r"/\S+", "", stripped)

    words = stripped.lower().split()
    for word in words:
        if word in ULTRAPLAN_KEYWORDS:
            return word
    return None


def handle_ultraplan(keyword: str, user_input: str, session) -> str:
    """Trigger planning mode when keyword detected."""
    if keyword == "ultraplan":
        session.enter_plan_mode()
        return f"Entering structured planning mode for: {user_input}"
    if keyword == "ultrareview":
        session.enter_review_mode()
        return f"Starting deep code review for: {user_input}"
    raise ValueError(f"Unknown trigger keyword: {keyword}")

5. Plan Mode (Structured Planning Before Implementation)¶

Plan mode restricts the model to read-only operations until a plan is approved. This prevents the agent from making changes before the user has reviewed its approach.

from dataclasses import dataclass, field, replace


@dataclass(frozen=True)
class PlanMode:
    active: bool = False
    plan_text: str = ""
    approved: bool = False

    def enter(self) -> "PlanMode":
        return replace(self, active=True, approved=False, plan_text="")

    def exit_with_plan(self, plan: str) -> "PlanMode":
        """Exit plan mode with a plan for user approval."""
        return replace(self, plan_text=plan)

    def approve(self) -> "PlanMode":
        return replace(self, approved=True, active=False)

    def get_restricted_tools(self, all_tools: list) -> list:
        """In plan mode, only allow read-only tools."""
        if not self.active:
            return all_tools
        return [t for t in all_tools if t.is_read_only()]

Combine with Ultraplan

The ultraplan keyword from section 4 calls session.enter_plan_mode(), which activates the PlanMode guard above. Together they form a two-step pipeline: keyword detection triggers plan mode, and plan mode restricts tool access until the user approves.

6. Voice Mode (Audio Input)¶

Voice mode captures audio from a microphone, detects silence to know when the user has stopped speaking, and transcribes the result into text that feeds the normal query loop.

import asyncio
from dataclasses import dataclass


@dataclass(frozen=True)
class VoiceConfig:
    sample_rate: int = 16_000       # 16kHz
    silence_threshold_ms: int = 1500
    max_recording_seconds: int = 120


async def capture_voice_input(config: VoiceConfig) -> str:
    """Capture audio input and transcribe using STT."""
    try:
        audio_data = await _native_capture(config)
    except ImportError:
        audio_data = await _sox_capture(config)

    transcript = await _transcribe(audio_data)
    return transcript


async def _sox_capture(config: VoiceConfig) -> bytes:
    """Fallback audio capture using SoX rec command."""
    silence_secs = str(config.silence_threshold_ms / 1000)
    proc = await asyncio.create_subprocess_exec(
        "rec", "-q",
        "-r", str(config.sample_rate),
        "-c", "1", "-b", "16", "-t", "wav", "-",
        "silence", "1", "0.1", "1%",
        "1", silence_secs, "1%",
        stdout=asyncio.subprocess.PIPE,
    )
    audio_data, _ = await proc.communicate()
    return audio_data


async def _native_capture(config: VoiceConfig) -> bytes:
    """Platform-native audio capture (requires sounddevice)."""
    import sounddevice as sd  # type: ignore[import-untyped]
    import numpy as np

    frames: list[np.ndarray] = []
    max_frames = config.sample_rate * config.max_recording_seconds

    def callback(indata, _frame_count, _time_info, _status):
        frames.append(indata.copy())

    with sd.InputStream(
        samplerate=config.sample_rate,
        channels=1,
        dtype="int16",
        callback=callback,
    ):
        while sum(len(f) for f in frames) < max_frames:
            await asyncio.sleep(0.1)

    return np.concatenate(frames).tobytes()


async def _transcribe(audio_data: bytes) -> str:
    """Placeholder --- swap in your STT provider."""
    raise NotImplementedError("Plug in Whisper, Deepgram, or another STT API")

Build it yourself¶

Pick the modes that match your product:

Mode	Core idea	Key mechanism
Teleport	Serialize session + git state	`git bundle` + JSON export
Fast mode	Priority tier with higher pricing	Beta header + cooldown FSM
Effort	Map effort levels to model params	Thinking budget tokens
Ultraplan	Keyword detection	Intercept input, enter plan mode
Plan mode	Read-only until approved	Tool filtering by `is_read_only()`
Voice	Audio capture to text	SoX / sounddevice + STT

Each mode is independent --- you can adopt one without the others, or compose them (e.g., voice input triggers ultraplan which activates plan mode with low effort).