interface LlamaCppCallOptions {
    callbacks?: Callbacks;
    configurable?: Record<string, any>;
    maxConcurrency?: number;
    maxTokens?: number;
    metadata?: Record<string, unknown>;
    onToken?: ((tokens: number[]) => void);
    recursionLimit?: number;
    runId?: string;
    runName?: string;
    signal?: AbortSignal;
    stop?: string[];
    tags?: string[];
    timeout?: number;
}

Hierarchy

  • BaseLLMCallOptions
    • LlamaCppCallOptions

Properties

callbacks?: Callbacks

Callbacks for this call and any sub-calls (eg. a Chain calling an LLM). Tags are passed to all callbacks, metadata is passed to handle*Start callbacks.

configurable?: Record<string, any>

Runtime values for attributes previously made configurable on this Runnable, or sub-Runnables.

maxConcurrency?: number

Maximum number of parallel calls to make.

maxTokens?: number

The maximum number of tokens the response should contain.

metadata?: Record<string, unknown>

Metadata for this call and any sub-calls (eg. a Chain calling an LLM). Keys should be strings, values should be JSON-serializable.

onToken?: ((tokens: number[]) => void)

A function called when matching the provided token array

recursionLimit?: number

Maximum number of times a call can recurse. If not provided, defaults to 25.

runId?: string

Unique identifier for the tracer run for this call. If not provided, a new UUID will be generated.

runName?: string

Name for the tracer run for this call. Defaults to the name of the class.

signal?: AbortSignal

Abort signal for this call. If provided, the call will be aborted when the signal is aborted.

stop?: string[]

Stop tokens to use for this call. If not provided, the default stop tokens for the model will be used.

tags?: string[]

Tags for this call and any sub-calls (eg. a Chain calling an LLM). You can use these to filter calls.

timeout?: number

Timeout for this call in milliseconds.