Add comprehensive RustDocs for all public API items
- Add crate-level docs with usage examples for lib.rs - Document OllamaClient, OllamaClientBuilder, and all public methods - Document error module with OllamaError variants and OllamaResult - Document types module with endpoint mapping table - Document all types in common, chat, generate, ps, pull, tags, version - Add doc examples for builders, constructors, and key types - All 26 doc-tests pass
This commit is contained in:
21
src/error.rs
21
src/error.rs
@@ -1,13 +1,34 @@
|
||||
//! Error types for the Ollama client.
|
||||
//!
|
||||
//! This module defines [`OllamaError`], the unified error type returned by all
|
||||
//! client operations, and the [`OllamaResult<T>`] type alias for convenience.
|
||||
|
||||
use std::{error::Error, fmt::Display};
|
||||
|
||||
use tokio_util::codec::LinesCodecError;
|
||||
|
||||
/// A type alias for `Result<T, OllamaError>`.
|
||||
///
|
||||
/// Used throughout the crate as the standard return type for fallible operations.
|
||||
pub type OllamaResult<T> = Result<T, OllamaError>;
|
||||
|
||||
/// Errors that can occur when communicating with the Ollama server.
|
||||
///
|
||||
/// This enum covers three failure categories:
|
||||
///
|
||||
/// - **Network** -- connection failures, timeouts, or HTTP error status codes.
|
||||
/// - **Parsing** -- the server returned a response that could not be deserialized as JSON.
|
||||
/// - **Streaming** -- an error occurred while reading a streaming response line-by-line.
|
||||
///
|
||||
/// All variants wrap their underlying error and implement [`std::error::Error`],
|
||||
/// [`Display`], and the relevant [`From`] conversions so they work seamlessly with `?`.
|
||||
#[derive(Debug)]
|
||||
pub enum OllamaError {
|
||||
/// An HTTP or connection-level error from [`reqwest`].
|
||||
NetworkError(reqwest::Error),
|
||||
/// A JSON deserialization error from [`serde_json`].
|
||||
ResponseParseError(serde_json::Error),
|
||||
/// An error from the line-delimited streaming codec.
|
||||
LinesCodecError(LinesCodecError),
|
||||
}
|
||||
|
||||
|
||||
254
src/lib.rs
254
src/lib.rs
@@ -1,3 +1,83 @@
|
||||
//! # ollama-rs
|
||||
//!
|
||||
//! An async Rust client library for the [Ollama](https://ollama.com/) API.
|
||||
//!
|
||||
//! This crate provides a streaming-first interface for interacting with Ollama,
|
||||
//! supporting text generation, multi-turn chat conversations, model management,
|
||||
//! structured JSON output, and tool calling.
|
||||
//!
|
||||
//! ## Quick Start
|
||||
//!
|
||||
//! ```no_run
|
||||
//! use ollama_rs::OllamaClient;
|
||||
//! use ollama_rs::types::generate::GenerateRequest;
|
||||
//! use futures_util::StreamExt;
|
||||
//!
|
||||
//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
|
||||
//! let client = OllamaClient::default();
|
||||
//!
|
||||
//! let request = GenerateRequest::builder("llama3")
|
||||
//! .prompt("Why is the sky blue?")
|
||||
//! .build();
|
||||
//!
|
||||
//! let mut stream = client.generate(request);
|
||||
//! while let Some(chunk) = stream.next().await {
|
||||
//! let response = chunk?;
|
||||
//! print!("{}", response.response);
|
||||
//! }
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! ## Chat Conversations
|
||||
//!
|
||||
//! ```no_run
|
||||
//! use ollama_rs::OllamaClient;
|
||||
//! use ollama_rs::types::chat::{ChatRequest, Message};
|
||||
//! use futures_util::StreamExt;
|
||||
//!
|
||||
//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
|
||||
//! let client = OllamaClient::default();
|
||||
//!
|
||||
//! let request = ChatRequest::builder("llama3")
|
||||
//! .messages(vec![
|
||||
//! Message::system("You are a helpful assistant."),
|
||||
//! Message::user("Hello!"),
|
||||
//! ])
|
||||
//! .build();
|
||||
//!
|
||||
//! let mut stream = client.chat(request);
|
||||
//! while let Some(chunk) = stream.next().await {
|
||||
//! let response = chunk?;
|
||||
//! print!("{}", response.message.content);
|
||||
//! }
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
//!
|
||||
//! ## Custom Client Configuration
|
||||
//!
|
||||
//! ```
|
||||
//! use ollama_rs::OllamaClient;
|
||||
//! use std::time::Duration;
|
||||
//!
|
||||
//! // Connect to a remote server with a custom timeout
|
||||
//! let client = OllamaClient::builder("http://my-server:11434")
|
||||
//! .connection_timeout(Duration::from_secs(60))
|
||||
//! .build();
|
||||
//! ```
|
||||
//!
|
||||
//! ## Features
|
||||
//!
|
||||
//! - **Streaming responses** -- all generation endpoints return [`futures_util::Stream`]s,
|
||||
//! allowing token-by-token processing.
|
||||
//! - **Builder pattern** -- all request types use builders for ergonomic construction.
|
||||
//! - **Multi-turn chat** -- maintain conversation context with [`types::chat::ChatRequest`].
|
||||
//! - **Structured output** -- request JSON responses conforming to a schema via the `format` field.
|
||||
//! - **Tool calling** -- let the model invoke functions with [`types::chat::Tool`] definitions.
|
||||
//! - **Thinking mode** -- enable extended reasoning with [`types::common::Think`].
|
||||
//! - **Model management** -- list, pull, and inspect models.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use async_stream::stream;
|
||||
@@ -24,8 +104,33 @@ use crate::{
|
||||
pub mod error;
|
||||
pub mod types;
|
||||
|
||||
/// Default connection timeout applied when no custom timeout is specified (30 seconds).
|
||||
const DEFAULT_CONNECTION_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
/// An async client for communicating with an Ollama server.
|
||||
///
|
||||
/// `OllamaClient` is the primary entry point for this crate. It wraps an HTTP client
|
||||
/// and provides methods for every Ollama API endpoint.
|
||||
///
|
||||
/// # Construction
|
||||
///
|
||||
/// There are three ways to create a client:
|
||||
///
|
||||
/// - [`OllamaClient::default()`] -- connects to `http://localhost:11434` with a 30-second timeout.
|
||||
/// - [`OllamaClient::new()`] -- connects to a custom address with a 30-second timeout.
|
||||
/// - [`OllamaClient::builder()`] -- full control over address and connection timeout.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::OllamaClient;
|
||||
///
|
||||
/// // Default local connection
|
||||
/// let client = OllamaClient::default();
|
||||
///
|
||||
/// // Custom server address
|
||||
/// let client = OllamaClient::new("http://my-server:11434");
|
||||
/// ```
|
||||
#[derive(Clone)]
|
||||
pub struct OllamaClient {
|
||||
server_address: String,
|
||||
@@ -39,6 +144,18 @@ impl Default for OllamaClient {
|
||||
}
|
||||
|
||||
impl OllamaClient {
|
||||
/// Creates a new client connected to the given server address.
|
||||
///
|
||||
/// Uses the default connection timeout of 30 seconds. For custom timeouts,
|
||||
/// use [`OllamaClient::builder()`] instead.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::OllamaClient;
|
||||
///
|
||||
/// let client = OllamaClient::new("http://localhost:11434");
|
||||
/// ```
|
||||
pub fn new<S: AsRef<str>>(server_address: S) -> Self {
|
||||
Self {
|
||||
server_address: server_address.as_ref().to_string(),
|
||||
@@ -49,6 +166,18 @@ impl OllamaClient {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an [`OllamaClientBuilder`] for constructing a client with custom settings.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::OllamaClient;
|
||||
/// use std::time::Duration;
|
||||
///
|
||||
/// let client = OllamaClient::builder("http://localhost:11434")
|
||||
/// .connection_timeout(Duration::from_secs(60))
|
||||
/// .build();
|
||||
/// ```
|
||||
pub fn builder<S: AsRef<str>>(server_address: S) -> OllamaClientBuilder {
|
||||
OllamaClientBuilder {
|
||||
server_address: server_address.as_ref().to_string(),
|
||||
@@ -56,7 +185,14 @@ impl OllamaClient {
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve the version of the Ollama
|
||||
/// Retrieves the version of the connected Ollama server.
|
||||
///
|
||||
/// Calls `GET /api/version`.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns
|
||||
/// a non-success status code.
|
||||
pub async fn version(&self) -> OllamaResult<VersionResponse> {
|
||||
let request_address = format!("{}/api/version", self.server_address);
|
||||
Ok(self
|
||||
@@ -69,7 +205,14 @@ impl OllamaClient {
|
||||
.await?)
|
||||
}
|
||||
|
||||
/// Fetch a list of models and their details
|
||||
/// Lists all models available on the Ollama server along with their details.
|
||||
///
|
||||
/// Calls `GET /api/tags`.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns
|
||||
/// a non-success status code.
|
||||
pub async fn tags(&self) -> OllamaResult<TagsResponse> {
|
||||
let request_address = format!("{}/api/tags", self.server_address);
|
||||
info!("List models: {}", request_address);
|
||||
@@ -83,7 +226,14 @@ impl OllamaClient {
|
||||
.await?)
|
||||
}
|
||||
|
||||
/// Retrieve a list of models that are currently running
|
||||
/// Lists models that are currently loaded and running on the Ollama server.
|
||||
///
|
||||
/// Calls `GET /api/ps`.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns
|
||||
/// a non-success status code.
|
||||
pub async fn ps(&self) -> OllamaResult<PsResponse> {
|
||||
let request_address = format!("{}/api/ps", self.server_address);
|
||||
info!("List models: {}", request_address);
|
||||
@@ -135,7 +285,33 @@ impl OllamaClient {
|
||||
})
|
||||
}
|
||||
|
||||
/// Generates a response for the provided prompt
|
||||
/// Generates a text completion for the given prompt.
|
||||
///
|
||||
/// Returns a stream of [`GenerateResponse`] chunks. Each chunk contains a
|
||||
/// fragment of the generated text. The final chunk has `done` set to `true`
|
||||
/// and includes timing and token count statistics.
|
||||
///
|
||||
/// Calls `POST /api/generate`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use ollama_rs::OllamaClient;
|
||||
/// # use ollama_rs::types::generate::GenerateRequest;
|
||||
/// # use futures_util::StreamExt;
|
||||
/// # async fn run() -> ollama_rs::error::OllamaResult<()> {
|
||||
/// let client = OllamaClient::default();
|
||||
/// let request = GenerateRequest::builder("llama3")
|
||||
/// .prompt("Explain quantum computing in one sentence.")
|
||||
/// .build();
|
||||
///
|
||||
/// let mut stream = client.generate(request);
|
||||
/// while let Some(chunk) = stream.next().await {
|
||||
/// print!("{}", chunk?.response);
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn generate(
|
||||
&self,
|
||||
request: GenerateRequest,
|
||||
@@ -144,30 +320,96 @@ impl OllamaClient {
|
||||
self.stream_response(request_address, request)
|
||||
}
|
||||
|
||||
/// Generate the next chat message in a conversation between a user and an assistant.
|
||||
/// Generates the next message in a multi-turn chat conversation.
|
||||
///
|
||||
/// Returns a stream of [`ChatResponse`] chunks. Each chunk contains a partial
|
||||
/// [`Message`](types::chat::Message) from the assistant. The final chunk has
|
||||
/// `done` set to `true`.
|
||||
///
|
||||
/// Calls `POST /api/chat`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use ollama_rs::OllamaClient;
|
||||
/// # use ollama_rs::types::chat::{ChatRequest, Message};
|
||||
/// # use futures_util::StreamExt;
|
||||
/// # async fn run() -> ollama_rs::error::OllamaResult<()> {
|
||||
/// let client = OllamaClient::default();
|
||||
/// let request = ChatRequest::builder("llama3")
|
||||
/// .messages(vec![Message::user("What is 2 + 2?")])
|
||||
/// .build();
|
||||
///
|
||||
/// let mut stream = client.chat(request);
|
||||
/// while let Some(chunk) = stream.next().await {
|
||||
/// print!("{}", chunk?.message.content);
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn chat(&self, request: ChatRequest) -> impl Stream<Item = OllamaResult<ChatResponse>> {
|
||||
let request_address = format!("{}/api/chat", self.server_address);
|
||||
self.stream_response(request_address, request)
|
||||
}
|
||||
|
||||
/// Pull a model
|
||||
/// Downloads (pulls) a model from the Ollama registry.
|
||||
///
|
||||
/// Returns a stream of [`PullResponse`] chunks reporting the download status.
|
||||
///
|
||||
/// Calls `POST /api/pull`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use ollama_rs::OllamaClient;
|
||||
/// # use ollama_rs::types::pull::PullRequest;
|
||||
/// # use futures_util::StreamExt;
|
||||
/// # async fn run() -> ollama_rs::error::OllamaResult<()> {
|
||||
/// let client = OllamaClient::default();
|
||||
/// let request = PullRequest::builder("llama3").build();
|
||||
///
|
||||
/// let mut stream = client.pull(request);
|
||||
/// while let Some(chunk) = stream.next().await {
|
||||
/// println!("{}", chunk?.status);
|
||||
/// }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn pull(&self, request: PullRequest) -> impl Stream<Item = OllamaResult<PullResponse>> {
|
||||
let request_address = format!("{}/api/pull", self.server_address);
|
||||
self.stream_response(request_address, request)
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for constructing an [`OllamaClient`] with custom configuration.
|
||||
///
|
||||
/// Obtain a builder via [`OllamaClient::builder()`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::OllamaClient;
|
||||
/// use std::time::Duration;
|
||||
///
|
||||
/// let client = OllamaClient::builder("http://localhost:11434")
|
||||
/// .connection_timeout(Duration::from_secs(10))
|
||||
/// .build();
|
||||
/// ```
|
||||
pub struct OllamaClientBuilder {
|
||||
server_address: String,
|
||||
connection_timeout: Duration,
|
||||
}
|
||||
|
||||
impl OllamaClientBuilder {
|
||||
/// Sets the TCP connection timeout for the underlying HTTP client.
|
||||
///
|
||||
/// Defaults to 30 seconds if not specified.
|
||||
pub fn connection_timeout(mut self, timeout: Duration) -> Self {
|
||||
self.connection_timeout = timeout;
|
||||
self
|
||||
}
|
||||
|
||||
/// Consumes the builder and returns a configured [`OllamaClient`].
|
||||
pub fn build(self) -> OllamaClient {
|
||||
OllamaClient {
|
||||
server_address: self.server_address,
|
||||
|
||||
@@ -1,28 +1,84 @@
|
||||
//! Types for the chat conversation endpoint (`POST /api/chat`).
|
||||
//!
|
||||
//! Use [`ChatRequest::builder()`] to construct a request and pass it to
|
||||
//! [`OllamaClient::chat()`](crate::OllamaClient::chat). The response is
|
||||
//! streamed as a sequence of [`ChatResponse`] chunks.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! ```no_run
|
||||
//! # use ollama_rs::OllamaClient;
|
||||
//! # use ollama_rs::types::chat::{ChatRequest, Message};
|
||||
//! # use futures_util::StreamExt;
|
||||
//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
|
||||
//! let client = OllamaClient::default();
|
||||
//!
|
||||
//! let request = ChatRequest::builder("llama3")
|
||||
//! .messages(vec![
|
||||
//! Message::system("You are a helpful assistant."),
|
||||
//! Message::user("What is Rust?"),
|
||||
//! ])
|
||||
//! .build();
|
||||
//!
|
||||
//! let mut stream = client.chat(request);
|
||||
//! while let Some(chunk) = stream.next().await {
|
||||
//! print!("{}", chunk?.message.content);
|
||||
//! }
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error::OllamaResult;
|
||||
use crate::types::common::{Options, Think};
|
||||
|
||||
/// The role of a participant in a chat conversation.
|
||||
///
|
||||
/// Serialized as a lowercase string: `"user"`, `"system"`, `"assistant"`, `"tool"`.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Role {
|
||||
/// A human user.
|
||||
User,
|
||||
/// A system prompt that sets the assistant's behavior.
|
||||
System,
|
||||
/// The AI assistant.
|
||||
Assistant,
|
||||
/// A tool response providing data back to the model.
|
||||
Tool,
|
||||
}
|
||||
|
||||
/// A single message in a chat conversation.
|
||||
///
|
||||
/// Use the convenience constructors [`Message::system()`], [`Message::user()`],
|
||||
/// and [`Message::tool_response()`] to create messages for the common roles.
|
||||
/// Assistant messages are typically received from the model via [`ChatResponse`].
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct Message {
|
||||
/// The text content of the message.
|
||||
pub content: String,
|
||||
/// The role of the message sender.
|
||||
pub role: Role,
|
||||
/// Tool calls requested by the assistant, if any.
|
||||
///
|
||||
/// Empty for non-assistant messages. Omitted from serialization when empty.
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
#[serde(default)]
|
||||
pub tool_calls: Vec<ToolCall>,
|
||||
}
|
||||
|
||||
impl Message {
|
||||
/// Creates a system message that sets the assistant's behavior.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::chat::Message;
|
||||
///
|
||||
/// let msg = Message::system("You are a helpful assistant.");
|
||||
/// ```
|
||||
pub fn system<T: Into<String>>(content: T) -> Self {
|
||||
Self {
|
||||
content: content.into(),
|
||||
@@ -31,6 +87,15 @@ impl Message {
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a user message.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::chat::Message;
|
||||
///
|
||||
/// let msg = Message::user("Hello, how are you?");
|
||||
/// ```
|
||||
pub fn user<T: Into<String>>(content: T) -> Self {
|
||||
Self {
|
||||
content: content.into(),
|
||||
@@ -39,6 +104,23 @@ impl Message {
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a tool response message from a JSON value.
|
||||
///
|
||||
/// The value is serialized to a JSON string and placed in the message content.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns [`OllamaError::ResponseParseError`](crate::error::OllamaError::ResponseParseError)
|
||||
/// if the value cannot be serialized.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::chat::Message;
|
||||
/// use serde_json::json;
|
||||
///
|
||||
/// let msg = Message::tool_response(&json!({"temperature": 22.0})).unwrap();
|
||||
/// ```
|
||||
pub fn tool_response(content: &Value) -> OllamaResult<Self> {
|
||||
Ok(Message {
|
||||
content: serde_json::to_string(content)?,
|
||||
@@ -48,45 +130,89 @@ impl Message {
|
||||
}
|
||||
}
|
||||
|
||||
/// A request to the chat endpoint (`POST /api/chat`).
|
||||
///
|
||||
/// Construct via [`ChatRequest::builder()`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::chat::{ChatRequest, Message};
|
||||
///
|
||||
/// let request = ChatRequest::builder("llama3")
|
||||
/// .messages(vec![Message::user("Hello!")])
|
||||
/// .stream(true)
|
||||
/// .build();
|
||||
/// ```
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ChatRequest {
|
||||
/// The model name to use for generation (e.g., `"llama3"`).
|
||||
pub model: String,
|
||||
|
||||
/// The conversation history as a list of messages.
|
||||
pub messages: Vec<Message>,
|
||||
|
||||
/// Whether to stream the response. When `None`, the server default applies.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub stream: Option<bool>,
|
||||
|
||||
/// Runtime options that control text generation
|
||||
/// Runtime options that control text generation behavior.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub options: Option<Options>,
|
||||
|
||||
/// Tool definitions available for the model to call.
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub tools: Vec<Tool>,
|
||||
|
||||
/// A JSON schema to constrain the response format for structured output.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub format: Option<Value>,
|
||||
|
||||
/// When set, returns separate thinking output in addition to content. Can be a boolean
|
||||
/// (true/false) or a string ("high", "medium", "low") for supported models.
|
||||
/// Controls extended-thinking (reasoning) mode. Can be a boolean
|
||||
/// (`true`/`false`) or a level (`"high"`, `"medium"`, `"low"`) for
|
||||
/// supported models.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub think: Option<Think>,
|
||||
}
|
||||
|
||||
impl ChatRequest {
|
||||
/// Returns a [`ChatRequestBuilder`] for the given model.
|
||||
pub fn builder<M: Into<String>>(model: M) -> ChatRequestBuilder {
|
||||
ChatRequestBuilder::new(model)
|
||||
}
|
||||
}
|
||||
|
||||
/// A single chunk of a streaming chat response.
|
||||
///
|
||||
/// When streaming, each chunk contains a partial [`Message`]. The final chunk
|
||||
/// has [`done`](ChatResponse::done) set to `true`.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ChatResponse {
|
||||
/// The model that generated this response.
|
||||
pub model: String,
|
||||
/// ISO 8601 timestamp of when this chunk was created.
|
||||
pub created_at: String,
|
||||
/// The (partial) assistant message for this chunk.
|
||||
pub message: Message,
|
||||
/// `true` when this is the final chunk of the response.
|
||||
pub done: bool,
|
||||
}
|
||||
|
||||
/// A builder for constructing a [`ChatRequest`].
|
||||
///
|
||||
/// Obtain a builder via [`ChatRequest::builder()`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::chat::{ChatRequest, Message};
|
||||
/// use ollama_rs::types::common::Options;
|
||||
///
|
||||
/// let request = ChatRequest::builder("llama3")
|
||||
/// .messages(vec![Message::user("Hi")])
|
||||
/// .options(Options::builder().temperature(0.5).build())
|
||||
/// .build();
|
||||
/// ```
|
||||
pub struct ChatRequestBuilder {
|
||||
chat_request: ChatRequest,
|
||||
}
|
||||
@@ -106,70 +232,114 @@ impl ChatRequestBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the conversation history.
|
||||
pub fn messages(mut self, messages: Vec<Message>) -> Self {
|
||||
self.chat_request.messages = messages;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets runtime generation options.
|
||||
pub fn options(mut self, options: Options) -> Self {
|
||||
self.chat_request.options = Some(options);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the tools available for the model to call.
|
||||
pub fn tools(mut self, tools: Vec<Tool>) -> Self {
|
||||
self.chat_request.tools = tools;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets whether to stream the response.
|
||||
pub fn stream(mut self, stream: bool) -> Self {
|
||||
self.chat_request.stream = Some(stream);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets a JSON schema to constrain the response format.
|
||||
pub fn format(mut self, json_schema: Value) -> Self {
|
||||
self.chat_request.format = Some(json_schema);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables or configures extended-thinking mode.
|
||||
pub fn think(mut self, think: Think) -> Self {
|
||||
self.chat_request.think = Some(think);
|
||||
self
|
||||
}
|
||||
|
||||
/// Consumes the builder and returns the configured [`ChatRequest`].
|
||||
pub fn build(self) -> ChatRequest {
|
||||
self.chat_request
|
||||
}
|
||||
}
|
||||
|
||||
/// A tool definition that the model can invoke during a chat.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::chat::{Tool, ToolType, Function};
|
||||
/// use serde_json::json;
|
||||
///
|
||||
/// let tool = Tool {
|
||||
/// tool_type: ToolType::Function,
|
||||
/// function: Function {
|
||||
/// name: "get_weather".to_string(),
|
||||
/// description: "Get current weather for a city".to_string(),
|
||||
/// parameters: json!({
|
||||
/// "type": "object",
|
||||
/// "properties": {
|
||||
/// "city": { "type": "string" }
|
||||
/// },
|
||||
/// "required": ["city"]
|
||||
/// }),
|
||||
/// },
|
||||
/// };
|
||||
/// ```
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct Tool {
|
||||
/// The type of tool (currently only `Function`).
|
||||
#[serde(rename = "type")]
|
||||
pub tool_type: ToolType,
|
||||
/// The function definition.
|
||||
pub function: Function,
|
||||
}
|
||||
|
||||
/// The kind of tool. Currently only [`Function`](ToolType::Function) is supported.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum ToolType {
|
||||
/// A callable function.
|
||||
Function,
|
||||
}
|
||||
|
||||
/// A function definition for tool calling.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct Function {
|
||||
/// The function name the model will use to invoke this tool.
|
||||
pub name: String,
|
||||
/// A JSON Schema describing the function's parameters.
|
||||
pub parameters: Value,
|
||||
/// A description of what the function does, to guide the model.
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
/// A tool call requested by the model in an assistant message.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct ToolCall {
|
||||
/// The function the model wants to invoke.
|
||||
pub function: ToolCallFunction,
|
||||
}
|
||||
|
||||
/// Details of a specific function call requested by the model.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct ToolCallFunction {
|
||||
/// The name of the function to call.
|
||||
pub name: String,
|
||||
/// The arguments to pass, as a JSON value.
|
||||
pub arguments: Value,
|
||||
/// The index of this tool call within the message (for parallel calls).
|
||||
pub index: usize,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,65 +1,132 @@
|
||||
//! Types shared across multiple Ollama API endpoints.
|
||||
//!
|
||||
//! This module provides:
|
||||
//!
|
||||
//! - [`Options`] / [`OptionsBuilder`] -- sampling and generation parameters.
|
||||
//! - [`Think`] / [`ThinkLevel`] -- controls for extended-thinking (reasoning) mode.
|
||||
//! - [`Stop`] -- stop-sequence configuration.
|
||||
//! - [`ModelDetails`] -- metadata returned when listing models.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Detailed metadata about a model, returned by the tags and ps endpoints.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ModelDetails {
|
||||
/// The model file format (e.g., `"gguf"`).
|
||||
pub format: String,
|
||||
/// The primary model family (e.g., `"llama"`).
|
||||
pub family: String,
|
||||
/// Additional model families, if any (e.g., `["llama", "clip"]`).
|
||||
pub families: Option<Vec<String>>,
|
||||
/// Human-readable parameter count (e.g., `"8B"`).
|
||||
pub parameter_size: String,
|
||||
/// Quantization level (e.g., `"Q4_0"`).
|
||||
pub quantization_level: String,
|
||||
}
|
||||
|
||||
/// Controls extended-thinking (reasoning) mode for supported models.
|
||||
///
|
||||
/// Can be a simple boolean toggle or a named level. Serialized as an untagged
|
||||
/// enum so `true`, `false`, `"high"`, `"medium"`, and `"low"` are all valid JSON
|
||||
/// representations.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::common::Think;
|
||||
///
|
||||
/// // Enable thinking
|
||||
/// let think = Think::Bool(true);
|
||||
///
|
||||
/// // Use a specific thinking level
|
||||
/// use ollama_rs::types::common::ThinkLevel;
|
||||
/// let think = Think::Level(ThinkLevel::High);
|
||||
/// ```
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum Think {
|
||||
/// Enable (`true`) or disable (`false`) thinking mode.
|
||||
Bool(bool),
|
||||
/// Use a named thinking intensity level.
|
||||
Level(ThinkLevel),
|
||||
}
|
||||
|
||||
/// Named intensity levels for extended-thinking mode.
|
||||
///
|
||||
/// Serialized as lowercase strings: `"high"`, `"medium"`, `"low"`.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum ThinkLevel {
|
||||
/// Maximum reasoning depth.
|
||||
High,
|
||||
/// Balanced reasoning depth.
|
||||
Medium,
|
||||
/// Minimal reasoning depth.
|
||||
Low,
|
||||
}
|
||||
|
||||
/// Runtime options that control text generation behavior.
|
||||
///
|
||||
/// All fields are optional. Only fields set to `Some` are included in the
|
||||
/// serialized JSON request, letting the server apply its own defaults for
|
||||
/// omitted parameters.
|
||||
///
|
||||
/// Use [`Options::builder()`] for ergonomic construction.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::common::{Options, Stop};
|
||||
///
|
||||
/// let options = Options::builder()
|
||||
/// .temperature(0.7)
|
||||
/// .top_k(40)
|
||||
/// .stop(Stop::Single("END".to_string()))
|
||||
/// .build();
|
||||
/// ```
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct Options {
|
||||
/// Random seed used for reproducible outputs
|
||||
/// Random seed for reproducible outputs.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub seed: Option<u64>,
|
||||
|
||||
/// Controls randomness in generation (higher = more random)
|
||||
/// Controls randomness in generation. Higher values (e.g., `1.5`) produce
|
||||
/// more creative output; lower values (e.g., `0.2`) produce more
|
||||
/// deterministic output.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub temperature: Option<f32>,
|
||||
|
||||
/// Limits next token selection to the K most likely
|
||||
/// Limits the next-token selection to the *K* most likely tokens.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub top_k: Option<u32>,
|
||||
|
||||
/// Cumulative probability threshold for nucleus sampling
|
||||
/// Cumulative probability threshold for nucleus sampling.
|
||||
/// A value of `0.9` means only the smallest set of tokens whose cumulative
|
||||
/// probability exceeds 90% are considered.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub top_p: Option<f32>,
|
||||
|
||||
/// Minimum probability threshold for token selection
|
||||
/// Minimum probability threshold for token selection.
|
||||
/// Tokens with probability below this value are discarded.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub min_p: Option<f32>,
|
||||
|
||||
/// Stop sequences that will halt generation
|
||||
/// One or more stop sequences that will halt generation when produced.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub stop: Option<Stop>,
|
||||
|
||||
/// Context length size (number of tokens)
|
||||
/// Context window size in tokens. Determines how many tokens the model
|
||||
/// can attend to at once.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub num_ctx: Option<u32>,
|
||||
|
||||
/// Maximum number of tokens to generate
|
||||
/// Maximum number of tokens to generate in the response.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub num_predict: Option<u32>,
|
||||
}
|
||||
|
||||
impl Options {
|
||||
/// Returns an [`OptionsBuilder`] for constructing an `Options` value.
|
||||
pub fn builder() -> OptionsBuilder {
|
||||
OptionsBuilder {
|
||||
options: Options::default(),
|
||||
@@ -67,60 +134,98 @@ impl Options {
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for constructing [`Options`] with only the desired parameters set.
|
||||
///
|
||||
/// Obtain a builder via [`Options::builder()`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::common::Options;
|
||||
///
|
||||
/// let options = Options::builder()
|
||||
/// .seed(42)
|
||||
/// .temperature(0.8)
|
||||
/// .num_predict(256)
|
||||
/// .build();
|
||||
/// ```
|
||||
pub struct OptionsBuilder {
|
||||
options: Options,
|
||||
}
|
||||
|
||||
impl OptionsBuilder {
|
||||
/// Sets the random seed for reproducible outputs.
|
||||
pub fn seed(mut self, seed: u64) -> Self {
|
||||
self.options.seed = Some(seed);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the temperature for generation randomness.
|
||||
pub fn temperature(mut self, temperature: f32) -> Self {
|
||||
self.options.temperature = Some(temperature);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the top-K sampling parameter.
|
||||
pub fn top_k(mut self, top_k: u32) -> Self {
|
||||
self.options.top_k = Some(top_k);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the nucleus sampling probability threshold.
|
||||
pub fn top_p(mut self, top_p: f32) -> Self {
|
||||
self.options.top_p = Some(top_p);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the minimum probability threshold for token selection.
|
||||
pub fn min_p(mut self, min_p: f32) -> Self {
|
||||
self.options.min_p = Some(min_p);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets one or more stop sequences.
|
||||
pub fn stop(mut self, stop: Stop) -> Self {
|
||||
self.options.stop = Some(stop);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the context window size in tokens.
|
||||
pub fn num_ctx(mut self, num_ctx: u32) -> Self {
|
||||
self.options.num_ctx = Some(num_ctx);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the maximum number of tokens to generate.
|
||||
pub fn num_predict(mut self, num_predict: u32) -> Self {
|
||||
self.options.num_predict = Some(num_predict);
|
||||
self
|
||||
}
|
||||
|
||||
/// Consumes the builder and returns the configured [`Options`].
|
||||
pub fn build(self) -> Options {
|
||||
self.options
|
||||
}
|
||||
}
|
||||
|
||||
/// Stop sequences that halt text generation when produced by the model.
|
||||
///
|
||||
/// Serialized as an untagged enum: a single string or an array of strings.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::common::Stop;
|
||||
///
|
||||
/// let single = Stop::Single("END".to_string());
|
||||
/// let multiple = Stop::Multiple(vec!["END".to_string(), "STOP".to_string()]);
|
||||
/// ```
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum Stop {
|
||||
/// A single stop sequence.
|
||||
Single(String),
|
||||
/// Multiple stop sequences.
|
||||
Multiple(Vec<String>),
|
||||
}
|
||||
|
||||
|
||||
@@ -1,55 +1,113 @@
|
||||
//! Types for the text generation endpoint (`POST /api/generate`).
|
||||
//!
|
||||
//! Use [`GenerateRequest::builder()`] to construct a request and pass it to
|
||||
//! [`OllamaClient::generate()`](crate::OllamaClient::generate). The response
|
||||
//! is streamed as a sequence of [`GenerateResponse`] chunks.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! ```no_run
|
||||
//! # use ollama_rs::OllamaClient;
|
||||
//! # use ollama_rs::types::generate::GenerateRequest;
|
||||
//! # use futures_util::StreamExt;
|
||||
//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
|
||||
//! let client = OllamaClient::default();
|
||||
//!
|
||||
//! let request = GenerateRequest::builder("llama3")
|
||||
//! .prompt("Why is the sky blue?")
|
||||
//! .system_prompt("Answer in one sentence.")
|
||||
//! .build();
|
||||
//!
|
||||
//! let mut stream = client.generate(request);
|
||||
//! while let Some(chunk) = stream.next().await {
|
||||
//! print!("{}", chunk?.response);
|
||||
//! }
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::types::common::{Options, Think};
|
||||
|
||||
/// A request to the text generation endpoint (`POST /api/generate`).
|
||||
///
|
||||
/// Construct via [`GenerateRequest::builder()`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::generate::GenerateRequest;
|
||||
///
|
||||
/// let request = GenerateRequest::builder("llama3")
|
||||
/// .prompt("Hello, world!")
|
||||
/// .build();
|
||||
/// ```
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct GenerateRequest {
|
||||
/// Model name
|
||||
/// The model name to use for generation (e.g., `"llama3"`).
|
||||
pub model: String,
|
||||
|
||||
/// Text for the model to generate a response from
|
||||
/// The prompt text for the model to generate a response from.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub prompt: Option<String>,
|
||||
|
||||
/// Used for fill-in-the-middle models, text that appears after the user prompt and before the
|
||||
/// model response
|
||||
/// Text that appears after the user prompt and before the model response.
|
||||
/// Used for fill-in-the-middle models.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub suffix: Option<String>,
|
||||
|
||||
/// System prompt for the model to generate a response from
|
||||
/// A system prompt that sets the model's behavior.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub system: Option<String>,
|
||||
|
||||
/// When true, returns a stream of partial responses
|
||||
/// Whether to stream the response. When `None`, the server default applies.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub stream: Option<bool>,
|
||||
|
||||
/// Base64-encoded images for models that support image input
|
||||
/// Base64-encoded images for multimodal models that support image input.
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub images: Vec<String>,
|
||||
|
||||
/// Structured output format for the model to generate a response from.
|
||||
/// Supports either the string "json" or a JSON schema object
|
||||
/// A structured output format constraint. Accepts the string `"json"` for
|
||||
/// free-form JSON or a JSON Schema object for strict validation.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub format: Option<Value>,
|
||||
|
||||
/// When true, returns separate thinking output in addition to content. Can be a boolean
|
||||
/// (true/false) or a string ("high", "medium", "low") for supported models.
|
||||
/// Controls extended-thinking (reasoning) mode. Can be a boolean
|
||||
/// (`true`/`false`) or a level (`"high"`, `"medium"`, `"low"`) for
|
||||
/// supported models.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub think: Option<Think>,
|
||||
|
||||
/// Runtime options that control text generation
|
||||
/// Runtime options that control text generation behavior.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub options: Option<Options>,
|
||||
}
|
||||
|
||||
impl GenerateRequest {
|
||||
/// Returns a [`GenerateRequestBuilder`] for the given model.
|
||||
pub fn builder<M: Into<String>>(model: M) -> GenerateRequestBuilder {
|
||||
GenerateRequestBuilder::new(model)
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for constructing a [`GenerateRequest`].
|
||||
///
|
||||
/// Obtain a builder via [`GenerateRequest::builder()`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::generate::GenerateRequest;
|
||||
/// use ollama_rs::types::common::Options;
|
||||
///
|
||||
/// let request = GenerateRequest::builder("llama3")
|
||||
/// .prompt("Tell me a joke")
|
||||
/// .options(Options::builder().temperature(1.0).build())
|
||||
/// .build();
|
||||
/// ```
|
||||
pub struct GenerateRequestBuilder {
|
||||
generate_request: GenerateRequest,
|
||||
}
|
||||
@@ -71,87 +129,109 @@ impl GenerateRequestBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the system prompt that guides the model's behavior.
|
||||
pub fn system_prompt<P: Into<String>>(mut self, system_prompt: P) -> Self {
|
||||
self.generate_request.system = Some(system_prompt.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the prompt text to generate a response from.
|
||||
pub fn prompt<P: Into<String>>(mut self, prompt: P) -> Self {
|
||||
self.generate_request.prompt = Some(prompt.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets whether to stream the response.
|
||||
pub fn stream(mut self, stream: bool) -> Self {
|
||||
self.generate_request.stream = Some(stream);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the suffix for fill-in-the-middle generation.
|
||||
pub fn suffix(mut self, suffix: String) -> Self {
|
||||
self.generate_request.suffix = Some(suffix);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets base64-encoded images for multimodal generation.
|
||||
pub fn images(mut self, images: Vec<String>) -> Self {
|
||||
self.generate_request.images = images;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets a structured output format constraint (JSON or a JSON Schema).
|
||||
pub fn format<T: Into<Value>>(mut self, value: T) -> Self {
|
||||
self.generate_request.format = Some(value.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables or configures extended-thinking mode.
|
||||
pub fn think(mut self, think: Think) -> Self {
|
||||
self.generate_request.think = Some(think);
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets runtime generation options.
|
||||
pub fn options(mut self, options: Options) -> Self {
|
||||
self.generate_request.options = Some(options);
|
||||
self
|
||||
}
|
||||
|
||||
/// Consumes the builder and returns the configured [`GenerateRequest`].
|
||||
pub fn build(self) -> GenerateRequest {
|
||||
self.generate_request
|
||||
}
|
||||
}
|
||||
|
||||
/// A single chunk of a streaming text generation response.
|
||||
///
|
||||
/// When streaming, each chunk contains a fragment of the generated text in the
|
||||
/// [`response`](GenerateResponse::response) field. The final chunk has
|
||||
/// [`done`](GenerateResponse::done) set to `true` and includes performance
|
||||
/// statistics.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct GenerateResponse {
|
||||
/// Model name
|
||||
/// The model that generated this response.
|
||||
pub model: String,
|
||||
|
||||
/// ISO 8601 timestamp of response creation
|
||||
/// ISO 8601 timestamp of when this chunk was created.
|
||||
pub created_at: String,
|
||||
|
||||
/// The model's generated text response
|
||||
/// The generated text fragment for this chunk.
|
||||
pub response: String,
|
||||
|
||||
/// The model's generated thinking output
|
||||
/// The model's thinking/reasoning output, if thinking mode was enabled.
|
||||
pub thinking: Option<String>,
|
||||
|
||||
/// Indicates whether generation has finished
|
||||
/// `true` when this is the final chunk of the response.
|
||||
pub done: bool,
|
||||
|
||||
/// Reason the generation stopped
|
||||
/// The reason generation stopped (e.g., `"stop"`). Only present in the
|
||||
/// final chunk.
|
||||
pub done_reason: Option<String>,
|
||||
|
||||
/// Time spent generating the response in nanoseconds
|
||||
/// Total time spent generating the response, in nanoseconds. Only present
|
||||
/// in the final chunk.
|
||||
pub total_duration: Option<u64>,
|
||||
|
||||
/// Time spent loading the model in nanoseconds
|
||||
/// Time spent loading the model, in nanoseconds. Only present in the final
|
||||
/// chunk.
|
||||
pub load_duration: Option<u64>,
|
||||
|
||||
/// Number of input tokens in the prompt
|
||||
/// Number of tokens in the evaluated prompt. Only present in the final
|
||||
/// chunk.
|
||||
pub prompt_eval_count: Option<u64>,
|
||||
|
||||
/// Time spent evaluating the prompt in nanoseconds
|
||||
/// Time spent evaluating the prompt, in nanoseconds. Only present in the
|
||||
/// final chunk.
|
||||
pub prompt_eval_duration: Option<u64>,
|
||||
|
||||
/// Number of output tokens generated in the response
|
||||
/// Number of tokens generated in the response. Only present in the final
|
||||
/// chunk.
|
||||
pub eval_count: Option<u64>,
|
||||
|
||||
/// Time spent generating tokens in nanoseconds
|
||||
/// Time spent generating output tokens, in nanoseconds. Only present in
|
||||
/// the final chunk.
|
||||
pub eval_duration: Option<u64>,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,20 @@
|
||||
//! Request and response types for the Ollama API.
|
||||
//!
|
||||
//! Each submodule corresponds to an API endpoint:
|
||||
//!
|
||||
//! | Module | Endpoint | Description |
|
||||
//! |--------------|-------------------|------------------------------------------|
|
||||
//! | [`chat`] | `POST /api/chat` | Multi-turn chat conversations |
|
||||
//! | [`generate`] | `POST /api/generate` | Single-prompt text generation |
|
||||
//! | [`pull`] | `POST /api/pull` | Download models from the registry |
|
||||
//! | [`tags`] | `GET /api/tags` | List available models |
|
||||
//! | [`ps`] | `GET /api/ps` | List currently loaded/running models |
|
||||
//! | [`version`] | `GET /api/version` | Query the server version |
|
||||
//!
|
||||
//! The [`common`] module contains types shared across multiple endpoints, such as
|
||||
//! [`Options`](common::Options) for generation parameters, [`Think`](common::Think)
|
||||
//! for reasoning mode, and [`ModelDetails`](common::ModelDetails).
|
||||
|
||||
pub mod chat;
|
||||
pub mod common;
|
||||
pub mod generate;
|
||||
|
||||
@@ -1,21 +1,39 @@
|
||||
//! Types for the running-models endpoint (`GET /api/ps`).
|
||||
//!
|
||||
//! The response from [`OllamaClient::ps()`](crate::OllamaClient::ps) is
|
||||
//! deserialized into a [`PsResponse`].
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::types::common::ModelDetails;
|
||||
|
||||
/// Response from the `GET /api/ps` endpoint.
|
||||
///
|
||||
/// Contains a list of models currently loaded in memory on the Ollama server.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct PsResponse {
|
||||
/// The currently loaded models.
|
||||
pub models: Vec<RunningModel>,
|
||||
}
|
||||
|
||||
/// A model that is currently loaded and running on the server.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct RunningModel {
|
||||
/// The model name including tag (e.g., `"llama3:latest"`).
|
||||
pub name: String,
|
||||
/// The model identifier.
|
||||
pub model: String,
|
||||
/// Total model size in bytes.
|
||||
pub size: u64,
|
||||
/// The SHA-256 digest of the model.
|
||||
pub digest: String,
|
||||
/// Detailed model metadata.
|
||||
pub details: ModelDetails,
|
||||
/// ISO 8601 timestamp of when the model will be unloaded from memory.
|
||||
pub expires_at: String,
|
||||
/// Amount of VRAM the model occupies, in bytes.
|
||||
pub size_vram: u64,
|
||||
/// The context length configured for this model instance.
|
||||
pub context_length: u32,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,15 +1,56 @@
|
||||
//! Types for the model pull (download) endpoint (`POST /api/pull`).
|
||||
//!
|
||||
//! Use [`PullRequest::builder()`] to construct a request and pass it to
|
||||
//! [`OllamaClient::pull()`](crate::OllamaClient::pull). The response is
|
||||
//! streamed as a sequence of [`PullResponse`] status updates.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! ```no_run
|
||||
//! # use ollama_rs::OllamaClient;
|
||||
//! # use ollama_rs::types::pull::PullRequest;
|
||||
//! # use futures_util::StreamExt;
|
||||
//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
|
||||
//! let client = OllamaClient::default();
|
||||
//! let request = PullRequest::builder("llama3").build();
|
||||
//!
|
||||
//! let mut stream = client.pull(request);
|
||||
//! while let Some(chunk) = stream.next().await {
|
||||
//! println!("{}", chunk?.status);
|
||||
//! }
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A request to download a model from the Ollama registry (`POST /api/pull`).
|
||||
///
|
||||
/// Construct via [`PullRequest::builder()`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ollama_rs::types::pull::PullRequest;
|
||||
///
|
||||
/// let request = PullRequest::builder("llama3")
|
||||
/// .stream(true)
|
||||
/// .build();
|
||||
/// ```
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct PullRequest {
|
||||
/// The model name to pull (e.g., `"llama3"`, `"llama3:latest"`).
|
||||
pub model: String,
|
||||
/// Allow insecure (HTTP) connections to the registry.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub insecure: Option<bool>,
|
||||
/// Whether to stream status updates. When `None`, the server default applies.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub stream: Option<bool>,
|
||||
}
|
||||
|
||||
impl PullRequest {
|
||||
/// Returns a [`PullRequestBuilder`] for the given model name.
|
||||
pub fn builder<M: Into<String>>(model: M) -> PullRequestBuilder {
|
||||
PullRequestBuilder {
|
||||
pull_request: PullRequest {
|
||||
@@ -21,28 +62,37 @@ impl PullRequest {
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for constructing a [`PullRequest`].
|
||||
///
|
||||
/// Obtain a builder via [`PullRequest::builder()`].
|
||||
pub struct PullRequestBuilder {
|
||||
pull_request: PullRequest,
|
||||
}
|
||||
|
||||
impl PullRequestBuilder {
|
||||
/// Sets whether to stream status updates.
|
||||
pub fn stream(mut self, stream: bool) -> Self {
|
||||
self.pull_request.stream = Some(stream);
|
||||
self
|
||||
}
|
||||
|
||||
/// Allows insecure (HTTP) connections to the model registry.
|
||||
pub fn insecure(mut self, insecure: bool) -> Self {
|
||||
self.pull_request.insecure = Some(insecure);
|
||||
self
|
||||
}
|
||||
|
||||
/// Consumes the builder and returns the configured [`PullRequest`].
|
||||
pub fn build(self) -> PullRequest {
|
||||
self.pull_request
|
||||
}
|
||||
}
|
||||
|
||||
/// A streaming status update from the model pull operation.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct PullResponse {
|
||||
/// A human-readable status message (e.g., `"pulling manifest"`,
|
||||
/// `"downloading sha256:..."`).
|
||||
pub status: String,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,19 +1,35 @@
|
||||
//! Types for the model listing endpoint (`GET /api/tags`).
|
||||
//!
|
||||
//! The response from [`OllamaClient::tags()`](crate::OllamaClient::tags) is
|
||||
//! deserialized into a [`TagsResponse`].
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::types::common::ModelDetails;
|
||||
|
||||
/// Response from the `GET /api/tags` endpoint.
|
||||
///
|
||||
/// Contains a list of all models available on the Ollama server.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct TagsResponse {
|
||||
/// The available models.
|
||||
pub models: Vec<Model>,
|
||||
}
|
||||
|
||||
/// An available model on the Ollama server.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct Model {
|
||||
/// The model name including tag (e.g., `"llama3:latest"`).
|
||||
pub name: String,
|
||||
/// The model identifier.
|
||||
pub model: String,
|
||||
/// ISO 8601 timestamp of when the model was last modified.
|
||||
pub modified_at: String,
|
||||
/// Total model size in bytes.
|
||||
pub size: u64,
|
||||
/// The SHA-256 digest of the model.
|
||||
pub digest: String,
|
||||
/// Detailed model metadata.
|
||||
pub details: ModelDetails,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,14 @@
|
||||
//! Types for the server version endpoint (`GET /api/version`).
|
||||
//!
|
||||
//! The response from [`OllamaClient::version()`](crate::OllamaClient::version)
|
||||
//! is deserialized into a [`VersionResponse`].
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Response from the `GET /api/version` endpoint.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct VersionResponse {
|
||||
/// The Ollama server version string (e.g., `"0.6.2"`).
|
||||
pub version: String,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user