How to Create Data Augmentation Agents Using the Python SDK
Below are examples of how to create various Data Augmentation (DA) agents using the Python SDK. Each example includes the necessary imports, configuration, and code to start a task for the specific agent.
Create Labeler Agent
This example sets up a Labeler agent to apply a predefined label ("Science") to resources, based on a description ("Content related to science"). The task is configured to operate on all resources (exisiting and future resources).
from nuclia import sdk
from nuclia_models.worker.tasks import TaskName, ApplyOptions, DataAugmentation
from nuclia_models.worker.proto import (
ApplyTo,
Filter,
LLMConfig,
Operation,
LabelOperation,
Label,
)
KNOWLEDGE_BOX_URL = (
"https://europe-1.nuclia.cloud/api/v1/kb/<your-kb-id>"
)
sdk.NucliaAuth().kb(url=KNOWLEDGE_BOX_URL, token="<you-api-key>")
kb = sdk.NucliaKB()
output = kb.task.start(
task_name=TaskName.LABELER,
apply=ApplyOptions.ALL,
parameters=DataAugmentation(
name="my-labeler-task",
on=ApplyTo.FIELD,
filter=Filter(), # If you want to filter to which resources to apply this task
operations=[
Operation(
label=LabelOperation(
labels=[
Label(
label="Science", description="Content related to science"
),
],
ident="category",
)
)
],
llm=LLMConfig(model="chatgpt-azure-4o-mini"),
),
)
print(output)
>> name=<TaskName.LABELER: 'labeler'> status=<JobStatus.STARTED: 'started'> id='e64c8225-4eb1-4954-a9e5-b46dfd32a37b'
Create Generator Agent
This example configures a Generator agent to create short summaries for newly added documents. The task specifies a custom destination field to store the generated summaries.
from nuclia import sdk
from nuclia_models.worker.tasks import TaskName, ApplyOptions, DataAugmentation
from nuclia_models.worker.proto import (
ApplyTo,
Filter,
LLMConfig,
Operation,
AskOperation,
)
KNOWLEDGE_BOX_URL = (
"https://europe-1.nuclia.cloud/api/v1/kb/<your-kb-id>"
)
sdk.NucliaAuth().kb(url=KNOWLEDGE_BOX_URL, token="<you-api-key>")
kb = sdk.NucliaKB()
output = kb.task.start(
task_name=TaskName.ASK,
apply=ApplyOptions.NEW,
parameters=DataAugmentation(
name="my-generator-task",
on=ApplyTo.FIELD,
filter=Filter(),
operations=[
Operation(
ask=AskOperation(
question="Make a short summary of the document",
destination="summarized_field_id",
json=False,
)
)
],
llm=LLMConfig(model="chatgpt-azure-4o-mini"),
),
)
print(output)
>> name=<TaskName.ASK: 'ask'> status=<JobStatus.NOT_RUNNING: 'not_running'> id='d3e7980a-16b6-4ba7-80c6-1c870d8c1c5a'
Create Graph extraction Agent
This example demonstrates how to create a Graph Extraction agent that extracts entities ("Developer" and "CTO") from resources. The task builds a graph structure with these entities and their relationships.
from nuclia import sdk
from nuclia_models.worker.tasks import TaskName, ApplyOptions, DataAugmentation
from nuclia_models.worker.proto import (
ApplyTo,
Filter,
LLMConfig,
Operation,
GraphOperation,
EntityDefinition,
)
KNOWLEDGE_BOX_URL = (
"https://europe-1.nuclia.cloud/api/v1/kb/<your-kb-id>"
)
sdk.NucliaAuth().kb(url=KNOWLEDGE_BOX_URL, token="<you-api-key>")
kb = sdk.NucliaKB()
output = kb.task.start(
task_name=TaskName.LLM_GRAPH,
apply=ApplyOptions.NEW,
parameters=DataAugmentation(
name="my-graph-task",
on=ApplyTo.FIELD,
filter=Filter(),
operations=[
Operation(
graph=GraphOperation(
entity_defs=[
EntityDefinition(
label="Developer",
description="Person that implements software solutions",
),
EntityDefinition(
label="CTO",
description=(
"The highest technology executive position "
"within a company and leads the technology or engineering department"
),
),
],
ident="my-graph-task_213d",
)
)
],
llm=LLMConfig(model="chatgpt-azure-4o-mini"),
),
)
print(output)
>> name=<TaskName.LLM_GRAPH: 'llm-graph'> status=<JobStatus.NOT_RUNNING: 'not_running'> id='751db63c-0a41-44ce-b5ed-43ebc699b878'
Create Generate questions & answers Agent
This example sets up an agent to generate synthetic questions and answers for new resources.
from nuclia import sdk
from nuclia_models.worker.tasks import TaskName, ApplyOptions, DataAugmentation
from nuclia_models.worker.proto import (
ApplyTo,
Filter,
LLMConfig,
Operation,
QAOperation,
EntityDefinition,
)
KNOWLEDGE_BOX_URL = (
"https://europe-1.nuclia.cloud/api/v1/kb/<your-kb-id>"
)
sdk.NucliaAuth().kb(url=KNOWLEDGE_BOX_URL, token="<you-api-key>")
kb = sdk.NucliaKB()
output = kb.task.start(
task_name=TaskName.SYNTHETIC_QUESTIONS,
apply=ApplyOptions.NEW,
parameters=DataAugmentation(
name="my-q&a-task",
on=ApplyTo.FIELD,
filter=Filter(),
operations=[Operation(qa=QAOperation())],
llm=LLMConfig(model="chatgpt-azure-4o-mini"),
),
)
print(output)
>> name=<TaskName.SYNTHETIC_QUESTIONS: 'synthetic-questions'> status=<JobStatus.NOT_RUNNING: 'not_running'> id='e9f5fc13-4289-44d2-b841-c16df1ade404'