Cache-first

Enable semantic cache for repeated prompts. Tune cache_similarity_threshold (0–1) to match console behavior.

python

from tokensaver_sdk import TokenSaver

ts = TokenSaver(api_key="ts_...")

prompt = "Summarize the Q1 support incidents in exactly 4 bullets."

first = ts.ask(

    prompt,

    provider="openai",

    model="gpt-4o",

    use_cache=True,

    cache_similarity_threshold=0.85,

second = ts.ask(

    prompt,

    provider="openai",

    model="gpt-4o",

    use_cache=True,

    cache_similarity_threshold=0.85,

print("first cache_hit:", first.metrics.cache_hit)

print("second cache_hit:", second.metrics.cache_hit)

print("second cost:", second.metrics.cost_usd)