From f15a2c53d6091d0a052afa66980027816d500644 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Cipriani=20Bandarra?= Date: Fri, 30 Jan 2026 20:16:15 +0000 Subject: [PATCH] Add comprehensive RustDocs for all public API items - Add crate-level docs with usage examples for lib.rs - Document OllamaClient, OllamaClientBuilder, and all public methods - Document error module with OllamaError variants and OllamaResult - Document types module with endpoint mapping table - Document all types in common, chat, generate, ps, pull, tags, version - Add doc examples for builders, constructors, and key types - All 26 doc-tests pass --- src/error.rs | 21 ++++ src/lib.rs | 254 +++++++++++++++++++++++++++++++++++++++++- src/types/chat.rs | 176 ++++++++++++++++++++++++++++- src/types/common.rs | 121 ++++++++++++++++++-- src/types/generate.rs | 128 +++++++++++++++++---- src/types/mod.rs | 17 +++ src/types/ps.rs | 18 +++ src/types/pull.rs | 50 +++++++++ src/types/tags.rs | 16 +++ src/types/version.rs | 7 ++ 10 files changed, 767 insertions(+), 41 deletions(-) diff --git a/src/error.rs b/src/error.rs index 36c0c0e..73f80a2 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,13 +1,34 @@ +//! Error types for the Ollama client. +//! +//! This module defines [`OllamaError`], the unified error type returned by all +//! client operations, and the [`OllamaResult`] type alias for convenience. + use std::{error::Error, fmt::Display}; use tokio_util::codec::LinesCodecError; +/// A type alias for `Result`. +/// +/// Used throughout the crate as the standard return type for fallible operations. pub type OllamaResult = Result; +/// Errors that can occur when communicating with the Ollama server. +/// +/// This enum covers three failure categories: +/// +/// - **Network** -- connection failures, timeouts, or HTTP error status codes. +/// - **Parsing** -- the server returned a response that could not be deserialized as JSON. +/// - **Streaming** -- an error occurred while reading a streaming response line-by-line. +/// +/// All variants wrap their underlying error and implement [`std::error::Error`], +/// [`Display`], and the relevant [`From`] conversions so they work seamlessly with `?`. #[derive(Debug)] pub enum OllamaError { + /// An HTTP or connection-level error from [`reqwest`]. NetworkError(reqwest::Error), + /// A JSON deserialization error from [`serde_json`]. ResponseParseError(serde_json::Error), + /// An error from the line-delimited streaming codec. LinesCodecError(LinesCodecError), } diff --git a/src/lib.rs b/src/lib.rs index 69a73b5..83d940f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,83 @@ +//! # ollama-rs +//! +//! An async Rust client library for the [Ollama](https://ollama.com/) API. +//! +//! This crate provides a streaming-first interface for interacting with Ollama, +//! supporting text generation, multi-turn chat conversations, model management, +//! structured JSON output, and tool calling. +//! +//! ## Quick Start +//! +//! ```no_run +//! use ollama_rs::OllamaClient; +//! use ollama_rs::types::generate::GenerateRequest; +//! use futures_util::StreamExt; +//! +//! # async fn run() -> ollama_rs::error::OllamaResult<()> { +//! let client = OllamaClient::default(); +//! +//! let request = GenerateRequest::builder("llama3") +//! .prompt("Why is the sky blue?") +//! .build(); +//! +//! let mut stream = client.generate(request); +//! while let Some(chunk) = stream.next().await { +//! let response = chunk?; +//! print!("{}", response.response); +//! } +//! # Ok(()) +//! # } +//! ``` +//! +//! ## Chat Conversations +//! +//! ```no_run +//! use ollama_rs::OllamaClient; +//! use ollama_rs::types::chat::{ChatRequest, Message}; +//! use futures_util::StreamExt; +//! +//! # async fn run() -> ollama_rs::error::OllamaResult<()> { +//! let client = OllamaClient::default(); +//! +//! let request = ChatRequest::builder("llama3") +//! .messages(vec![ +//! Message::system("You are a helpful assistant."), +//! Message::user("Hello!"), +//! ]) +//! .build(); +//! +//! let mut stream = client.chat(request); +//! while let Some(chunk) = stream.next().await { +//! let response = chunk?; +//! print!("{}", response.message.content); +//! } +//! # Ok(()) +//! # } +//! ``` +//! +//! ## Custom Client Configuration +//! +//! ``` +//! use ollama_rs::OllamaClient; +//! use std::time::Duration; +//! +//! // Connect to a remote server with a custom timeout +//! let client = OllamaClient::builder("http://my-server:11434") +//! .connection_timeout(Duration::from_secs(60)) +//! .build(); +//! ``` +//! +//! ## Features +//! +//! - **Streaming responses** -- all generation endpoints return [`futures_util::Stream`]s, +//! allowing token-by-token processing. +//! - **Builder pattern** -- all request types use builders for ergonomic construction. +//! - **Multi-turn chat** -- maintain conversation context with [`types::chat::ChatRequest`]. +//! - **Structured output** -- request JSON responses conforming to a schema via the `format` field. +//! - **Tool calling** -- let the model invoke functions with [`types::chat::Tool`] definitions. +//! - **Thinking mode** -- enable extended reasoning with [`types::common::Think`]. +//! - **Model management** -- list, pull, and inspect models. + use std::time::Duration; use async_stream::stream; @@ -24,8 +104,33 @@ use crate::{ pub mod error; pub mod types; +/// Default connection timeout applied when no custom timeout is specified (30 seconds). const DEFAULT_CONNECTION_TIMEOUT: Duration = Duration::from_secs(30); +/// An async client for communicating with an Ollama server. +/// +/// `OllamaClient` is the primary entry point for this crate. It wraps an HTTP client +/// and provides methods for every Ollama API endpoint. +/// +/// # Construction +/// +/// There are three ways to create a client: +/// +/// - [`OllamaClient::default()`] -- connects to `http://localhost:11434` with a 30-second timeout. +/// - [`OllamaClient::new()`] -- connects to a custom address with a 30-second timeout. +/// - [`OllamaClient::builder()`] -- full control over address and connection timeout. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::OllamaClient; +/// +/// // Default local connection +/// let client = OllamaClient::default(); +/// +/// // Custom server address +/// let client = OllamaClient::new("http://my-server:11434"); +/// ``` #[derive(Clone)] pub struct OllamaClient { server_address: String, @@ -39,6 +144,18 @@ impl Default for OllamaClient { } impl OllamaClient { + /// Creates a new client connected to the given server address. + /// + /// Uses the default connection timeout of 30 seconds. For custom timeouts, + /// use [`OllamaClient::builder()`] instead. + /// + /// # Examples + /// + /// ``` + /// use ollama_rs::OllamaClient; + /// + /// let client = OllamaClient::new("http://localhost:11434"); + /// ``` pub fn new>(server_address: S) -> Self { Self { server_address: server_address.as_ref().to_string(), @@ -49,6 +166,18 @@ impl OllamaClient { } } + /// Returns an [`OllamaClientBuilder`] for constructing a client with custom settings. + /// + /// # Examples + /// + /// ``` + /// use ollama_rs::OllamaClient; + /// use std::time::Duration; + /// + /// let client = OllamaClient::builder("http://localhost:11434") + /// .connection_timeout(Duration::from_secs(60)) + /// .build(); + /// ``` pub fn builder>(server_address: S) -> OllamaClientBuilder { OllamaClientBuilder { server_address: server_address.as_ref().to_string(), @@ -56,7 +185,14 @@ impl OllamaClient { } } - /// Retrieve the version of the Ollama + /// Retrieves the version of the connected Ollama server. + /// + /// Calls `GET /api/version`. + /// + /// # Errors + /// + /// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns + /// a non-success status code. pub async fn version(&self) -> OllamaResult { let request_address = format!("{}/api/version", self.server_address); Ok(self @@ -69,7 +205,14 @@ impl OllamaClient { .await?) } - /// Fetch a list of models and their details + /// Lists all models available on the Ollama server along with their details. + /// + /// Calls `GET /api/tags`. + /// + /// # Errors + /// + /// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns + /// a non-success status code. pub async fn tags(&self) -> OllamaResult { let request_address = format!("{}/api/tags", self.server_address); info!("List models: {}", request_address); @@ -83,7 +226,14 @@ impl OllamaClient { .await?) } - /// Retrieve a list of models that are currently running + /// Lists models that are currently loaded and running on the Ollama server. + /// + /// Calls `GET /api/ps`. + /// + /// # Errors + /// + /// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns + /// a non-success status code. pub async fn ps(&self) -> OllamaResult { let request_address = format!("{}/api/ps", self.server_address); info!("List models: {}", request_address); @@ -135,7 +285,33 @@ impl OllamaClient { }) } - /// Generates a response for the provided prompt + /// Generates a text completion for the given prompt. + /// + /// Returns a stream of [`GenerateResponse`] chunks. Each chunk contains a + /// fragment of the generated text. The final chunk has `done` set to `true` + /// and includes timing and token count statistics. + /// + /// Calls `POST /api/generate`. + /// + /// # Examples + /// + /// ```no_run + /// # use ollama_rs::OllamaClient; + /// # use ollama_rs::types::generate::GenerateRequest; + /// # use futures_util::StreamExt; + /// # async fn run() -> ollama_rs::error::OllamaResult<()> { + /// let client = OllamaClient::default(); + /// let request = GenerateRequest::builder("llama3") + /// .prompt("Explain quantum computing in one sentence.") + /// .build(); + /// + /// let mut stream = client.generate(request); + /// while let Some(chunk) = stream.next().await { + /// print!("{}", chunk?.response); + /// } + /// # Ok(()) + /// # } + /// ``` pub fn generate( &self, request: GenerateRequest, @@ -144,30 +320,96 @@ impl OllamaClient { self.stream_response(request_address, request) } - /// Generate the next chat message in a conversation between a user and an assistant. + /// Generates the next message in a multi-turn chat conversation. + /// + /// Returns a stream of [`ChatResponse`] chunks. Each chunk contains a partial + /// [`Message`](types::chat::Message) from the assistant. The final chunk has + /// `done` set to `true`. + /// + /// Calls `POST /api/chat`. + /// + /// # Examples + /// + /// ```no_run + /// # use ollama_rs::OllamaClient; + /// # use ollama_rs::types::chat::{ChatRequest, Message}; + /// # use futures_util::StreamExt; + /// # async fn run() -> ollama_rs::error::OllamaResult<()> { + /// let client = OllamaClient::default(); + /// let request = ChatRequest::builder("llama3") + /// .messages(vec![Message::user("What is 2 + 2?")]) + /// .build(); + /// + /// let mut stream = client.chat(request); + /// while let Some(chunk) = stream.next().await { + /// print!("{}", chunk?.message.content); + /// } + /// # Ok(()) + /// # } + /// ``` pub fn chat(&self, request: ChatRequest) -> impl Stream> { let request_address = format!("{}/api/chat", self.server_address); self.stream_response(request_address, request) } - /// Pull a model + /// Downloads (pulls) a model from the Ollama registry. + /// + /// Returns a stream of [`PullResponse`] chunks reporting the download status. + /// + /// Calls `POST /api/pull`. + /// + /// # Examples + /// + /// ```no_run + /// # use ollama_rs::OllamaClient; + /// # use ollama_rs::types::pull::PullRequest; + /// # use futures_util::StreamExt; + /// # async fn run() -> ollama_rs::error::OllamaResult<()> { + /// let client = OllamaClient::default(); + /// let request = PullRequest::builder("llama3").build(); + /// + /// let mut stream = client.pull(request); + /// while let Some(chunk) = stream.next().await { + /// println!("{}", chunk?.status); + /// } + /// # Ok(()) + /// # } + /// ``` pub fn pull(&self, request: PullRequest) -> impl Stream> { let request_address = format!("{}/api/pull", self.server_address); self.stream_response(request_address, request) } } +/// A builder for constructing an [`OllamaClient`] with custom configuration. +/// +/// Obtain a builder via [`OllamaClient::builder()`]. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::OllamaClient; +/// use std::time::Duration; +/// +/// let client = OllamaClient::builder("http://localhost:11434") +/// .connection_timeout(Duration::from_secs(10)) +/// .build(); +/// ``` pub struct OllamaClientBuilder { server_address: String, connection_timeout: Duration, } impl OllamaClientBuilder { + /// Sets the TCP connection timeout for the underlying HTTP client. + /// + /// Defaults to 30 seconds if not specified. pub fn connection_timeout(mut self, timeout: Duration) -> Self { self.connection_timeout = timeout; self } + /// Consumes the builder and returns a configured [`OllamaClient`]. pub fn build(self) -> OllamaClient { OllamaClient { server_address: self.server_address, diff --git a/src/types/chat.rs b/src/types/chat.rs index 957423e..f67c27c 100644 --- a/src/types/chat.rs +++ b/src/types/chat.rs @@ -1,28 +1,84 @@ +//! Types for the chat conversation endpoint (`POST /api/chat`). +//! +//! Use [`ChatRequest::builder()`] to construct a request and pass it to +//! [`OllamaClient::chat()`](crate::OllamaClient::chat). The response is +//! streamed as a sequence of [`ChatResponse`] chunks. +//! +//! # Examples +//! +//! ```no_run +//! # use ollama_rs::OllamaClient; +//! # use ollama_rs::types::chat::{ChatRequest, Message}; +//! # use futures_util::StreamExt; +//! # async fn run() -> ollama_rs::error::OllamaResult<()> { +//! let client = OllamaClient::default(); +//! +//! let request = ChatRequest::builder("llama3") +//! .messages(vec![ +//! Message::system("You are a helpful assistant."), +//! Message::user("What is Rust?"), +//! ]) +//! .build(); +//! +//! let mut stream = client.chat(request); +//! while let Some(chunk) = stream.next().await { +//! print!("{}", chunk?.message.content); +//! } +//! # Ok(()) +//! # } +//! ``` + use serde::{Deserialize, Serialize}; use serde_json::Value; use crate::error::OllamaResult; use crate::types::common::{Options, Think}; +/// The role of a participant in a chat conversation. +/// +/// Serialized as a lowercase string: `"user"`, `"system"`, `"assistant"`, `"tool"`. #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum Role { + /// A human user. User, + /// A system prompt that sets the assistant's behavior. System, + /// The AI assistant. Assistant, + /// A tool response providing data back to the model. Tool, } +/// A single message in a chat conversation. +/// +/// Use the convenience constructors [`Message::system()`], [`Message::user()`], +/// and [`Message::tool_response()`] to create messages for the common roles. +/// Assistant messages are typically received from the model via [`ChatResponse`]. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Message { + /// The text content of the message. pub content: String, + /// The role of the message sender. pub role: Role, + /// Tool calls requested by the assistant, if any. + /// + /// Empty for non-assistant messages. Omitted from serialization when empty. #[serde(skip_serializing_if = "Vec::is_empty")] #[serde(default)] pub tool_calls: Vec, } impl Message { + /// Creates a system message that sets the assistant's behavior. + /// + /// # Examples + /// + /// ``` + /// use ollama_rs::types::chat::Message; + /// + /// let msg = Message::system("You are a helpful assistant."); + /// ``` pub fn system>(content: T) -> Self { Self { content: content.into(), @@ -31,6 +87,15 @@ impl Message { } } + /// Creates a user message. + /// + /// # Examples + /// + /// ``` + /// use ollama_rs::types::chat::Message; + /// + /// let msg = Message::user("Hello, how are you?"); + /// ``` pub fn user>(content: T) -> Self { Self { content: content.into(), @@ -39,6 +104,23 @@ impl Message { } } + /// Creates a tool response message from a JSON value. + /// + /// The value is serialized to a JSON string and placed in the message content. + /// + /// # Errors + /// + /// Returns [`OllamaError::ResponseParseError`](crate::error::OllamaError::ResponseParseError) + /// if the value cannot be serialized. + /// + /// # Examples + /// + /// ``` + /// use ollama_rs::types::chat::Message; + /// use serde_json::json; + /// + /// let msg = Message::tool_response(&json!({"temperature": 22.0})).unwrap(); + /// ``` pub fn tool_response(content: &Value) -> OllamaResult { Ok(Message { content: serde_json::to_string(content)?, @@ -48,45 +130,89 @@ impl Message { } } +/// A request to the chat endpoint (`POST /api/chat`). +/// +/// Construct via [`ChatRequest::builder()`]. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::types::chat::{ChatRequest, Message}; +/// +/// let request = ChatRequest::builder("llama3") +/// .messages(vec![Message::user("Hello!")]) +/// .stream(true) +/// .build(); +/// ``` #[derive(Debug, Serialize, Deserialize)] pub struct ChatRequest { + /// The model name to use for generation (e.g., `"llama3"`). pub model: String, + /// The conversation history as a list of messages. pub messages: Vec, + /// Whether to stream the response. When `None`, the server default applies. #[serde(skip_serializing_if = "Option::is_none")] pub stream: Option, - /// Runtime options that control text generation + /// Runtime options that control text generation behavior. #[serde(skip_serializing_if = "Option::is_none")] pub options: Option, + /// Tool definitions available for the model to call. #[serde(skip_serializing_if = "Vec::is_empty")] pub tools: Vec, + /// A JSON schema to constrain the response format for structured output. #[serde(skip_serializing_if = "Option::is_none")] pub format: Option, - /// When set, returns separate thinking output in addition to content. Can be a boolean - /// (true/false) or a string ("high", "medium", "low") for supported models. + /// Controls extended-thinking (reasoning) mode. Can be a boolean + /// (`true`/`false`) or a level (`"high"`, `"medium"`, `"low"`) for + /// supported models. #[serde(skip_serializing_if = "Option::is_none")] pub think: Option, } impl ChatRequest { + /// Returns a [`ChatRequestBuilder`] for the given model. pub fn builder>(model: M) -> ChatRequestBuilder { ChatRequestBuilder::new(model) } } +/// A single chunk of a streaming chat response. +/// +/// When streaming, each chunk contains a partial [`Message`]. The final chunk +/// has [`done`](ChatResponse::done) set to `true`. #[derive(Debug, Serialize, Deserialize)] pub struct ChatResponse { + /// The model that generated this response. pub model: String, + /// ISO 8601 timestamp of when this chunk was created. pub created_at: String, + /// The (partial) assistant message for this chunk. pub message: Message, + /// `true` when this is the final chunk of the response. pub done: bool, } +/// A builder for constructing a [`ChatRequest`]. +/// +/// Obtain a builder via [`ChatRequest::builder()`]. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::types::chat::{ChatRequest, Message}; +/// use ollama_rs::types::common::Options; +/// +/// let request = ChatRequest::builder("llama3") +/// .messages(vec![Message::user("Hi")]) +/// .options(Options::builder().temperature(0.5).build()) +/// .build(); +/// ``` pub struct ChatRequestBuilder { chat_request: ChatRequest, } @@ -106,70 +232,114 @@ impl ChatRequestBuilder { } } + /// Sets the conversation history. pub fn messages(mut self, messages: Vec) -> Self { self.chat_request.messages = messages; self } + /// Sets runtime generation options. pub fn options(mut self, options: Options) -> Self { self.chat_request.options = Some(options); self } + /// Sets the tools available for the model to call. pub fn tools(mut self, tools: Vec) -> Self { self.chat_request.tools = tools; self } + /// Sets whether to stream the response. pub fn stream(mut self, stream: bool) -> Self { self.chat_request.stream = Some(stream); self } + /// Sets a JSON schema to constrain the response format. pub fn format(mut self, json_schema: Value) -> Self { self.chat_request.format = Some(json_schema); self } + /// Enables or configures extended-thinking mode. pub fn think(mut self, think: Think) -> Self { self.chat_request.think = Some(think); self } + /// Consumes the builder and returns the configured [`ChatRequest`]. pub fn build(self) -> ChatRequest { self.chat_request } } +/// A tool definition that the model can invoke during a chat. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::types::chat::{Tool, ToolType, Function}; +/// use serde_json::json; +/// +/// let tool = Tool { +/// tool_type: ToolType::Function, +/// function: Function { +/// name: "get_weather".to_string(), +/// description: "Get current weather for a city".to_string(), +/// parameters: json!({ +/// "type": "object", +/// "properties": { +/// "city": { "type": "string" } +/// }, +/// "required": ["city"] +/// }), +/// }, +/// }; +/// ``` #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Tool { + /// The type of tool (currently only `Function`). #[serde(rename = "type")] pub tool_type: ToolType, + /// The function definition. pub function: Function, } +/// The kind of tool. Currently only [`Function`](ToolType::Function) is supported. #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum ToolType { + /// A callable function. Function, } +/// A function definition for tool calling. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Function { + /// The function name the model will use to invoke this tool. pub name: String, + /// A JSON Schema describing the function's parameters. pub parameters: Value, + /// A description of what the function does, to guide the model. pub description: String, } +/// A tool call requested by the model in an assistant message. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct ToolCall { + /// The function the model wants to invoke. pub function: ToolCallFunction, } +/// Details of a specific function call requested by the model. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct ToolCallFunction { + /// The name of the function to call. pub name: String, + /// The arguments to pass, as a JSON value. pub arguments: Value, + /// The index of this tool call within the message (for parallel calls). pub index: usize, } diff --git a/src/types/common.rs b/src/types/common.rs index 1cf52cc..85df597 100644 --- a/src/types/common.rs +++ b/src/types/common.rs @@ -1,65 +1,132 @@ +//! Types shared across multiple Ollama API endpoints. +//! +//! This module provides: +//! +//! - [`Options`] / [`OptionsBuilder`] -- sampling and generation parameters. +//! - [`Think`] / [`ThinkLevel`] -- controls for extended-thinking (reasoning) mode. +//! - [`Stop`] -- stop-sequence configuration. +//! - [`ModelDetails`] -- metadata returned when listing models. + use serde::{Deserialize, Serialize}; +/// Detailed metadata about a model, returned by the tags and ps endpoints. #[derive(Debug, Serialize, Deserialize)] pub struct ModelDetails { + /// The model file format (e.g., `"gguf"`). pub format: String, + /// The primary model family (e.g., `"llama"`). pub family: String, + /// Additional model families, if any (e.g., `["llama", "clip"]`). pub families: Option>, + /// Human-readable parameter count (e.g., `"8B"`). pub parameter_size: String, + /// Quantization level (e.g., `"Q4_0"`). pub quantization_level: String, } +/// Controls extended-thinking (reasoning) mode for supported models. +/// +/// Can be a simple boolean toggle or a named level. Serialized as an untagged +/// enum so `true`, `false`, `"high"`, `"medium"`, and `"low"` are all valid JSON +/// representations. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::types::common::Think; +/// +/// // Enable thinking +/// let think = Think::Bool(true); +/// +/// // Use a specific thinking level +/// use ollama_rs::types::common::ThinkLevel; +/// let think = Think::Level(ThinkLevel::High); +/// ``` #[derive(Debug, Serialize, Deserialize)] #[serde(untagged)] pub enum Think { + /// Enable (`true`) or disable (`false`) thinking mode. Bool(bool), + /// Use a named thinking intensity level. Level(ThinkLevel), } +/// Named intensity levels for extended-thinking mode. +/// +/// Serialized as lowercase strings: `"high"`, `"medium"`, `"low"`. #[derive(Debug, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum ThinkLevel { + /// Maximum reasoning depth. High, + /// Balanced reasoning depth. Medium, + /// Minimal reasoning depth. Low, } +/// Runtime options that control text generation behavior. +/// +/// All fields are optional. Only fields set to `Some` are included in the +/// serialized JSON request, letting the server apply its own defaults for +/// omitted parameters. +/// +/// Use [`Options::builder()`] for ergonomic construction. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::types::common::{Options, Stop}; +/// +/// let options = Options::builder() +/// .temperature(0.7) +/// .top_k(40) +/// .stop(Stop::Single("END".to_string())) +/// .build(); +/// ``` #[derive(Debug, Default, Serialize, Deserialize)] pub struct Options { - /// Random seed used for reproducible outputs + /// Random seed for reproducible outputs. #[serde(skip_serializing_if = "Option::is_none")] pub seed: Option, - /// Controls randomness in generation (higher = more random) + /// Controls randomness in generation. Higher values (e.g., `1.5`) produce + /// more creative output; lower values (e.g., `0.2`) produce more + /// deterministic output. #[serde(skip_serializing_if = "Option::is_none")] pub temperature: Option, - /// Limits next token selection to the K most likely + /// Limits the next-token selection to the *K* most likely tokens. #[serde(skip_serializing_if = "Option::is_none")] pub top_k: Option, - /// Cumulative probability threshold for nucleus sampling + /// Cumulative probability threshold for nucleus sampling. + /// A value of `0.9` means only the smallest set of tokens whose cumulative + /// probability exceeds 90% are considered. #[serde(skip_serializing_if = "Option::is_none")] pub top_p: Option, - /// Minimum probability threshold for token selection + /// Minimum probability threshold for token selection. + /// Tokens with probability below this value are discarded. #[serde(skip_serializing_if = "Option::is_none")] pub min_p: Option, - /// Stop sequences that will halt generation + /// One or more stop sequences that will halt generation when produced. #[serde(skip_serializing_if = "Option::is_none")] pub stop: Option, - /// Context length size (number of tokens) + /// Context window size in tokens. Determines how many tokens the model + /// can attend to at once. #[serde(skip_serializing_if = "Option::is_none")] pub num_ctx: Option, - /// Maximum number of tokens to generate + /// Maximum number of tokens to generate in the response. #[serde(skip_serializing_if = "Option::is_none")] pub num_predict: Option, } impl Options { + /// Returns an [`OptionsBuilder`] for constructing an `Options` value. pub fn builder() -> OptionsBuilder { OptionsBuilder { options: Options::default(), @@ -67,60 +134,98 @@ impl Options { } } +/// A builder for constructing [`Options`] with only the desired parameters set. +/// +/// Obtain a builder via [`Options::builder()`]. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::types::common::Options; +/// +/// let options = Options::builder() +/// .seed(42) +/// .temperature(0.8) +/// .num_predict(256) +/// .build(); +/// ``` pub struct OptionsBuilder { options: Options, } impl OptionsBuilder { + /// Sets the random seed for reproducible outputs. pub fn seed(mut self, seed: u64) -> Self { self.options.seed = Some(seed); self } + /// Sets the temperature for generation randomness. pub fn temperature(mut self, temperature: f32) -> Self { self.options.temperature = Some(temperature); self } + /// Sets the top-K sampling parameter. pub fn top_k(mut self, top_k: u32) -> Self { self.options.top_k = Some(top_k); self } + /// Sets the nucleus sampling probability threshold. pub fn top_p(mut self, top_p: f32) -> Self { self.options.top_p = Some(top_p); self } + /// Sets the minimum probability threshold for token selection. pub fn min_p(mut self, min_p: f32) -> Self { self.options.min_p = Some(min_p); self } + /// Sets one or more stop sequences. pub fn stop(mut self, stop: Stop) -> Self { self.options.stop = Some(stop); self } + /// Sets the context window size in tokens. pub fn num_ctx(mut self, num_ctx: u32) -> Self { self.options.num_ctx = Some(num_ctx); self } + /// Sets the maximum number of tokens to generate. pub fn num_predict(mut self, num_predict: u32) -> Self { self.options.num_predict = Some(num_predict); self } + /// Consumes the builder and returns the configured [`Options`]. pub fn build(self) -> Options { self.options } } +/// Stop sequences that halt text generation when produced by the model. +/// +/// Serialized as an untagged enum: a single string or an array of strings. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::types::common::Stop; +/// +/// let single = Stop::Single("END".to_string()); +/// let multiple = Stop::Multiple(vec!["END".to_string(), "STOP".to_string()]); +/// ``` #[derive(Debug, Serialize, Deserialize)] #[serde(untagged)] pub enum Stop { + /// A single stop sequence. Single(String), + /// Multiple stop sequences. Multiple(Vec), } diff --git a/src/types/generate.rs b/src/types/generate.rs index 2602953..5b1326d 100644 --- a/src/types/generate.rs +++ b/src/types/generate.rs @@ -1,55 +1,113 @@ +//! Types for the text generation endpoint (`POST /api/generate`). +//! +//! Use [`GenerateRequest::builder()`] to construct a request and pass it to +//! [`OllamaClient::generate()`](crate::OllamaClient::generate). The response +//! is streamed as a sequence of [`GenerateResponse`] chunks. +//! +//! # Examples +//! +//! ```no_run +//! # use ollama_rs::OllamaClient; +//! # use ollama_rs::types::generate::GenerateRequest; +//! # use futures_util::StreamExt; +//! # async fn run() -> ollama_rs::error::OllamaResult<()> { +//! let client = OllamaClient::default(); +//! +//! let request = GenerateRequest::builder("llama3") +//! .prompt("Why is the sky blue?") +//! .system_prompt("Answer in one sentence.") +//! .build(); +//! +//! let mut stream = client.generate(request); +//! while let Some(chunk) = stream.next().await { +//! print!("{}", chunk?.response); +//! } +//! # Ok(()) +//! # } +//! ``` + use serde::{Deserialize, Serialize}; use serde_json::Value; use crate::types::common::{Options, Think}; +/// A request to the text generation endpoint (`POST /api/generate`). +/// +/// Construct via [`GenerateRequest::builder()`]. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::types::generate::GenerateRequest; +/// +/// let request = GenerateRequest::builder("llama3") +/// .prompt("Hello, world!") +/// .build(); +/// ``` #[derive(Debug, Serialize, Deserialize)] pub struct GenerateRequest { - /// Model name + /// The model name to use for generation (e.g., `"llama3"`). pub model: String, - /// Text for the model to generate a response from + /// The prompt text for the model to generate a response from. #[serde(skip_serializing_if = "Option::is_none")] pub prompt: Option, - /// Used for fill-in-the-middle models, text that appears after the user prompt and before the - /// model response + /// Text that appears after the user prompt and before the model response. + /// Used for fill-in-the-middle models. #[serde(skip_serializing_if = "Option::is_none")] pub suffix: Option, - /// System prompt for the model to generate a response from + /// A system prompt that sets the model's behavior. #[serde(skip_serializing_if = "Option::is_none")] pub system: Option, - /// When true, returns a stream of partial responses + /// Whether to stream the response. When `None`, the server default applies. #[serde(skip_serializing_if = "Option::is_none")] pub stream: Option, - /// Base64-encoded images for models that support image input + /// Base64-encoded images for multimodal models that support image input. #[serde(skip_serializing_if = "Vec::is_empty")] pub images: Vec, - /// Structured output format for the model to generate a response from. - /// Supports either the string "json" or a JSON schema object + /// A structured output format constraint. Accepts the string `"json"` for + /// free-form JSON or a JSON Schema object for strict validation. #[serde(skip_serializing_if = "Option::is_none")] pub format: Option, - /// When true, returns separate thinking output in addition to content. Can be a boolean - /// (true/false) or a string ("high", "medium", "low") for supported models. + /// Controls extended-thinking (reasoning) mode. Can be a boolean + /// (`true`/`false`) or a level (`"high"`, `"medium"`, `"low"`) for + /// supported models. #[serde(skip_serializing_if = "Option::is_none")] pub think: Option, - /// Runtime options that control text generation + /// Runtime options that control text generation behavior. #[serde(skip_serializing_if = "Option::is_none")] pub options: Option, } impl GenerateRequest { + /// Returns a [`GenerateRequestBuilder`] for the given model. pub fn builder>(model: M) -> GenerateRequestBuilder { GenerateRequestBuilder::new(model) } } +/// A builder for constructing a [`GenerateRequest`]. +/// +/// Obtain a builder via [`GenerateRequest::builder()`]. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::types::generate::GenerateRequest; +/// use ollama_rs::types::common::Options; +/// +/// let request = GenerateRequest::builder("llama3") +/// .prompt("Tell me a joke") +/// .options(Options::builder().temperature(1.0).build()) +/// .build(); +/// ``` pub struct GenerateRequestBuilder { generate_request: GenerateRequest, } @@ -71,87 +129,109 @@ impl GenerateRequestBuilder { } } + /// Sets the system prompt that guides the model's behavior. pub fn system_prompt>(mut self, system_prompt: P) -> Self { self.generate_request.system = Some(system_prompt.into()); self } + /// Sets the prompt text to generate a response from. pub fn prompt>(mut self, prompt: P) -> Self { self.generate_request.prompt = Some(prompt.into()); self } + /// Sets whether to stream the response. pub fn stream(mut self, stream: bool) -> Self { self.generate_request.stream = Some(stream); self } + /// Sets the suffix for fill-in-the-middle generation. pub fn suffix(mut self, suffix: String) -> Self { self.generate_request.suffix = Some(suffix); self } + /// Sets base64-encoded images for multimodal generation. pub fn images(mut self, images: Vec) -> Self { self.generate_request.images = images; self } + /// Sets a structured output format constraint (JSON or a JSON Schema). pub fn format>(mut self, value: T) -> Self { self.generate_request.format = Some(value.into()); self } + /// Enables or configures extended-thinking mode. pub fn think(mut self, think: Think) -> Self { self.generate_request.think = Some(think); self } + /// Sets runtime generation options. pub fn options(mut self, options: Options) -> Self { self.generate_request.options = Some(options); self } + /// Consumes the builder and returns the configured [`GenerateRequest`]. pub fn build(self) -> GenerateRequest { self.generate_request } } +/// A single chunk of a streaming text generation response. +/// +/// When streaming, each chunk contains a fragment of the generated text in the +/// [`response`](GenerateResponse::response) field. The final chunk has +/// [`done`](GenerateResponse::done) set to `true` and includes performance +/// statistics. #[derive(Debug, Serialize, Deserialize)] pub struct GenerateResponse { - /// Model name + /// The model that generated this response. pub model: String, - /// ISO 8601 timestamp of response creation + /// ISO 8601 timestamp of when this chunk was created. pub created_at: String, - /// The model's generated text response + /// The generated text fragment for this chunk. pub response: String, - /// The model's generated thinking output + /// The model's thinking/reasoning output, if thinking mode was enabled. pub thinking: Option, - /// Indicates whether generation has finished + /// `true` when this is the final chunk of the response. pub done: bool, - /// Reason the generation stopped + /// The reason generation stopped (e.g., `"stop"`). Only present in the + /// final chunk. pub done_reason: Option, - /// Time spent generating the response in nanoseconds + /// Total time spent generating the response, in nanoseconds. Only present + /// in the final chunk. pub total_duration: Option, - /// Time spent loading the model in nanoseconds + /// Time spent loading the model, in nanoseconds. Only present in the final + /// chunk. pub load_duration: Option, - /// Number of input tokens in the prompt + /// Number of tokens in the evaluated prompt. Only present in the final + /// chunk. pub prompt_eval_count: Option, - /// Time spent evaluating the prompt in nanoseconds + /// Time spent evaluating the prompt, in nanoseconds. Only present in the + /// final chunk. pub prompt_eval_duration: Option, - /// Number of output tokens generated in the response + /// Number of tokens generated in the response. Only present in the final + /// chunk. pub eval_count: Option, - /// Time spent generating tokens in nanoseconds + /// Time spent generating output tokens, in nanoseconds. Only present in + /// the final chunk. pub eval_duration: Option, } diff --git a/src/types/mod.rs b/src/types/mod.rs index 1054deb..734355c 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1,3 +1,20 @@ +//! Request and response types for the Ollama API. +//! +//! Each submodule corresponds to an API endpoint: +//! +//! | Module | Endpoint | Description | +//! |--------------|-------------------|------------------------------------------| +//! | [`chat`] | `POST /api/chat` | Multi-turn chat conversations | +//! | [`generate`] | `POST /api/generate` | Single-prompt text generation | +//! | [`pull`] | `POST /api/pull` | Download models from the registry | +//! | [`tags`] | `GET /api/tags` | List available models | +//! | [`ps`] | `GET /api/ps` | List currently loaded/running models | +//! | [`version`] | `GET /api/version` | Query the server version | +//! +//! The [`common`] module contains types shared across multiple endpoints, such as +//! [`Options`](common::Options) for generation parameters, [`Think`](common::Think) +//! for reasoning mode, and [`ModelDetails`](common::ModelDetails). + pub mod chat; pub mod common; pub mod generate; diff --git a/src/types/ps.rs b/src/types/ps.rs index fe629c7..6db0146 100644 --- a/src/types/ps.rs +++ b/src/types/ps.rs @@ -1,21 +1,39 @@ +//! Types for the running-models endpoint (`GET /api/ps`). +//! +//! The response from [`OllamaClient::ps()`](crate::OllamaClient::ps) is +//! deserialized into a [`PsResponse`]. + use serde::{Deserialize, Serialize}; use crate::types::common::ModelDetails; +/// Response from the `GET /api/ps` endpoint. +/// +/// Contains a list of models currently loaded in memory on the Ollama server. #[derive(Debug, Serialize, Deserialize)] pub struct PsResponse { + /// The currently loaded models. pub models: Vec, } +/// A model that is currently loaded and running on the server. #[derive(Debug, Serialize, Deserialize)] pub struct RunningModel { + /// The model name including tag (e.g., `"llama3:latest"`). pub name: String, + /// The model identifier. pub model: String, + /// Total model size in bytes. pub size: u64, + /// The SHA-256 digest of the model. pub digest: String, + /// Detailed model metadata. pub details: ModelDetails, + /// ISO 8601 timestamp of when the model will be unloaded from memory. pub expires_at: String, + /// Amount of VRAM the model occupies, in bytes. pub size_vram: u64, + /// The context length configured for this model instance. pub context_length: u32, } diff --git a/src/types/pull.rs b/src/types/pull.rs index df732fc..0da4bac 100644 --- a/src/types/pull.rs +++ b/src/types/pull.rs @@ -1,15 +1,56 @@ +//! Types for the model pull (download) endpoint (`POST /api/pull`). +//! +//! Use [`PullRequest::builder()`] to construct a request and pass it to +//! [`OllamaClient::pull()`](crate::OllamaClient::pull). The response is +//! streamed as a sequence of [`PullResponse`] status updates. +//! +//! # Examples +//! +//! ```no_run +//! # use ollama_rs::OllamaClient; +//! # use ollama_rs::types::pull::PullRequest; +//! # use futures_util::StreamExt; +//! # async fn run() -> ollama_rs::error::OllamaResult<()> { +//! let client = OllamaClient::default(); +//! let request = PullRequest::builder("llama3").build(); +//! +//! let mut stream = client.pull(request); +//! while let Some(chunk) = stream.next().await { +//! println!("{}", chunk?.status); +//! } +//! # Ok(()) +//! # } +//! ``` + use serde::{Deserialize, Serialize}; +/// A request to download a model from the Ollama registry (`POST /api/pull`). +/// +/// Construct via [`PullRequest::builder()`]. +/// +/// # Examples +/// +/// ``` +/// use ollama_rs::types::pull::PullRequest; +/// +/// let request = PullRequest::builder("llama3") +/// .stream(true) +/// .build(); +/// ``` #[derive(Debug, Serialize, Deserialize)] pub struct PullRequest { + /// The model name to pull (e.g., `"llama3"`, `"llama3:latest"`). pub model: String, + /// Allow insecure (HTTP) connections to the registry. #[serde(skip_serializing_if = "Option::is_none")] pub insecure: Option, + /// Whether to stream status updates. When `None`, the server default applies. #[serde(skip_serializing_if = "Option::is_none")] pub stream: Option, } impl PullRequest { + /// Returns a [`PullRequestBuilder`] for the given model name. pub fn builder>(model: M) -> PullRequestBuilder { PullRequestBuilder { pull_request: PullRequest { @@ -21,28 +62,37 @@ impl PullRequest { } } +/// A builder for constructing a [`PullRequest`]. +/// +/// Obtain a builder via [`PullRequest::builder()`]. pub struct PullRequestBuilder { pull_request: PullRequest, } impl PullRequestBuilder { + /// Sets whether to stream status updates. pub fn stream(mut self, stream: bool) -> Self { self.pull_request.stream = Some(stream); self } + /// Allows insecure (HTTP) connections to the model registry. pub fn insecure(mut self, insecure: bool) -> Self { self.pull_request.insecure = Some(insecure); self } + /// Consumes the builder and returns the configured [`PullRequest`]. pub fn build(self) -> PullRequest { self.pull_request } } +/// A streaming status update from the model pull operation. #[derive(Debug, Serialize, Deserialize)] pub struct PullResponse { + /// A human-readable status message (e.g., `"pulling manifest"`, + /// `"downloading sha256:..."`). pub status: String, } diff --git a/src/types/tags.rs b/src/types/tags.rs index 4c861de..06e9e27 100644 --- a/src/types/tags.rs +++ b/src/types/tags.rs @@ -1,19 +1,35 @@ +//! Types for the model listing endpoint (`GET /api/tags`). +//! +//! The response from [`OllamaClient::tags()`](crate::OllamaClient::tags) is +//! deserialized into a [`TagsResponse`]. + use serde::{Deserialize, Serialize}; use crate::types::common::ModelDetails; +/// Response from the `GET /api/tags` endpoint. +/// +/// Contains a list of all models available on the Ollama server. #[derive(Debug, Serialize, Deserialize)] pub struct TagsResponse { + /// The available models. pub models: Vec, } +/// An available model on the Ollama server. #[derive(Debug, Serialize, Deserialize)] pub struct Model { + /// The model name including tag (e.g., `"llama3:latest"`). pub name: String, + /// The model identifier. pub model: String, + /// ISO 8601 timestamp of when the model was last modified. pub modified_at: String, + /// Total model size in bytes. pub size: u64, + /// The SHA-256 digest of the model. pub digest: String, + /// Detailed model metadata. pub details: ModelDetails, } diff --git a/src/types/version.rs b/src/types/version.rs index ec92427..8dc69f1 100644 --- a/src/types/version.rs +++ b/src/types/version.rs @@ -1,7 +1,14 @@ +//! Types for the server version endpoint (`GET /api/version`). +//! +//! The response from [`OllamaClient::version()`](crate::OllamaClient::version) +//! is deserialized into a [`VersionResponse`]. + use serde::{Deserialize, Serialize}; +/// Response from the `GET /api/version` endpoint. #[derive(Debug, Serialize, Deserialize)] pub struct VersionResponse { + /// The Ollama server version string (e.g., `"0.6.2"`). pub version: String, }