Skip to content
GitHubDiscord

Spy on Internal Calls

Open In Colab

WithSpy is an InteractionSpec wrapper that temporarily patches a target function β€” identified by its Python import path β€” with a MagicMock while the wrapped interaction generator runs. After each interaction completes, the mock’s call history (call_count, call_args, call_args_list, mock_calls) is injected into Interaction.metadata under the target key, and the mock is reset before the next interaction.

The typical use case is verifying that an agent passes the right arguments to an internal call β€” for example, confirming that a database query triggered by a tool call used the correct filter parameters.

We have an order-support agent. When a user asks about their orders, the agent calls fetch_orders to retrieve them from the database and then formats a reply. We want to verify two things:

  1. The agent’s final answer mentions the order count.
  2. fetch_orders was called with the correct customer_id and status filter β€” i.e., the agent didn’t accidentally query the wrong customer or drop the status filter.
# This is the internal DB call we want to spy on.
def fetch_orders(customer_id: str, status: str = "all") -> list:
"""Retrieve orders from the database."""
# In production this would hit a real DB.
return [
{"order_id": "ORD-1", "status": status, "total": 49.99},
{"order_id": "ORD-2", "status": status, "total": 12.50},
]
def order_support_agent(inputs: str) -> str:
"""Answer an order-related question.
In a real system an LLM would decide to call fetch_orders as a tool.
Here we simulate that decision with a simple heuristic.
"""
customer_id = "CUST-001" # extracted from session / question by the LLM
orders = fetch_orders(customer_id, status="shipped")
return f"You have {len(orders)} shipped order(s), {customer_id}."

target is the Python import path that mock.patch will use to replace fetch_orders for the duration of the interaction. Use the same dotted path you would pass to unittest.mock.patch.

from giskard.checks import Scenario, Interact, WithSpy, FnCheck
interaction_spec = Interact(
inputs="What are my shipped orders?",
outputs=order_support_agent,
)
spied_spec = WithSpy(
interaction_generator=interaction_spec,
target="__main__.fetch_orders", # Python import path β€” same as mock.patch
)

Use .add_interaction() instead of .interact() when passing a WithSpy (or any raw InteractionSpec). The scenario check verifies the agent’s output; the spy check is done separately after run() using spy_data.

scenario = (
Scenario("verify_order_query")
.add_interaction(spied_spec)
.check(
FnCheck(
fn=lambda trace: "CUST-001" in trace.last.outputs,
name="references_correct_customer",
)
)
)

After run(), the mock’s call history is available in result.final_trace.last.metadata under the same key as target.

result = await scenario.run()
target = "__main__.fetch_orders"
spy_data = result.final_trace.last.metadata.get(target)
print(spy_data)

Output

{'call_args_list': [call('CUST-001', status='shipped')], 'call_count': 1, 'call_args': call('CUST-001', status='shipped'), 'mock_calls': [call('CUST-001', status='shipped'), call().__len__()]}
assert spy_data is not None, "No spy data β€” check the target import path"
assert spy_data["call_count"] == 1, "fetch_orders should be called exactly once"
call_args = spy_data["call_args"]
assert call_args.args[0] == "CUST-001", "Wrong customer_id passed to fetch_orders"
assert call_args.kwargs.get("status") == "shipped", "Status filter was not 'shipped'"
print(f"fetch_orders called with customer_id={call_args.args[0]!r}, "
f"status={call_args.kwargs['status']!r}")
print(f"Scenario passed: {result.passed}")

Output

fetch_orders called with customer_id=β€˜CUST-001’, status=β€˜shipped’ Scenario passed: True

import asyncio
from giskard.checks import Scenario, Interact, WithSpy, FnCheck
# --- system under test ---
def fetch_orders_complete(customer_id: str, status: str = "all") -> list:
return [
{"order_id": "ORD-1", "status": status, "total": 49.99},
{"order_id": "ORD-2", "status": status, "total": 12.50},
]
def order_agent_complete(inputs: str) -> str:
customer_id = "CUST-001"
orders = fetch_orders_complete(customer_id, status="shipped")
return f"You have {len(orders)} shipped order(s), {customer_id}."
# --- scenario ---
async def run_spy_scenario():
spied_spec = WithSpy(
interaction_generator=Interact(
inputs="What are my shipped orders?",
outputs=order_agent_complete,
),
target="__main__.fetch_orders_complete",
)
scenario = (
Scenario("verify_order_query")
.add_interaction(spied_spec)
.check(
FnCheck(
fn=lambda trace: "CUST-001" in trace.last.outputs,
name="references_correct_customer",
)
)
)
result = await scenario.run()
target = "__main__.fetch_orders_complete"
spy_data = result.final_trace.last.metadata.get(target)
call_args = spy_data["call_args"]
print(f"fetch_orders called {spy_data['call_count']} time(s)")
print(f" customer_id = {call_args.args[0]!r}")
print(f" status = {call_args.kwargs['status']!r}")
print(f"Scenario passed: {result.passed}")
return result
asyncio.run(run_spy_scenario())

Output

fetch_orders called 1 time(s) customer_id = β€˜CUST-001’ status = β€˜shipped’ Scenario passed: True

──────────────────────────────────────────────────── βœ… PASSED ──────────────────────────────────────────────────── references_correct_customer PASS
────────────────────────────────────────────────────── Trace ────────────────────────────────────────────────────── ────────────────────────────────────────────────── Interaction 1 ────────────────────────────────────────────────── Inputs: ’What are my shipped orders?’ Outputs: ’You have 0 shipped order(s), CUST-001.’ ────────────────────────────────────────────────── 1 step in 0ms ──────────────────────────────────────────────────