Cache-first
Enable semantic cache for repeated prompts. Tune cache_similarity_threshold (0–1) to match console behavior.
python
from tokensaver_sdk import TokenSaver
ts = TokenSaver(api_key="ts_...")
prompt = "Summarize the Q1 support incidents in exactly 4 bullets."
first = ts.ask(
prompt,
provider="openai",
model="gpt-4o",
use_cache=True,
cache_similarity_threshold=0.85,
)
second = ts.ask(
prompt,
provider="openai",
model="gpt-4o",
use_cache=True,
cache_similarity_threshold=0.85,
)
print("first cache_hit:", first.metrics.cache_hit)print("second cache_hit:", second.metrics.cache_hit)print("second cost:", second.metrics.cost_usd)