Skip to main content

Signature

synkro.create_pipeline(
    model: str = "gpt-4o-mini",
    grading_model: str = "gpt-4o",
    dataset_type: DatasetType = DatasetType.CONVERSATION,
    max_iterations: int = 3,
    skip_grading: bool = False,
    reporter: ProgressReporter | None = None,
    tools: list[ToolDefinition] | None = None,
    checkpoint_dir: str | None = None,
    enable_hitl: bool = True,
    base_url: str | None = None,
    temperature: float = 0.7,
    thinking: bool = False,
) -> Pipeline

Parameters

model
str
default:"gpt-4o-mini"
Model for generating responses
grading_model
str
default:"gpt-4o"
Model for grading responses (use stronger model)
dataset_type
DatasetType
default:"CONVERSATION"
Type of dataset to generate
max_iterations
int
default:"3"
Max refinement iterations per trace
skip_grading
bool
default:"False"
Skip grading phase for faster generation
tools
list[ToolDefinition]
default:"None"
Tool definitions for TOOL_CALL dataset type
checkpoint_dir
str
default:"None"
Directory for checkpointing (resume interrupted generations)
enable_hitl
bool
default:"True"
Enable Human-in-the-Loop editing
temperature
float
default:"0.7"
Sampling temperature for generation
thinking
bool
default:"False"
Enable thinking mode with <think> tags

Returns

Pipeline object with generate() method.

Examples

Basic Pipeline

from synkro import create_pipeline

pipeline = create_pipeline()
dataset = pipeline.generate(policy, traces=100)

Custom Models

from synkro import create_pipeline
from synkro.models import Google

pipeline = create_pipeline(
    model=Google.GEMINI_25_FLASH,
    grading_model=Google.GEMINI_25_PRO,
)

Tool Calling Pipeline

from synkro import create_pipeline, ToolDefinition, DatasetType

tools = [
    ToolDefinition(
        name="search",
        description="Search the web",
        parameters={"type": "object", "properties": {...}},
    )
]

pipeline = create_pipeline(
    dataset_type=DatasetType.TOOL_CALL,
    tools=tools,
)

With Checkpointing

pipeline = create_pipeline(checkpoint_dir="./checkpoints")

# If interrupted, will resume from last checkpoint
dataset = pipeline.generate(policy, traces=500)

Thinking Mode

pipeline = create_pipeline(thinking=True)
dataset = pipeline.generate(policy, traces=50)

# Output includes <think> tags with reasoning

Silent Mode

from synkro import create_pipeline, SilentReporter

pipeline = create_pipeline(reporter=SilentReporter())

Fast Generation (Skip Grading)

pipeline = create_pipeline(skip_grading=True)

# Faster but no quality verification
dataset = pipeline.generate(policy, traces=100)