Add comprehensive RustDocs for all public API items

- Add crate-level docs with usage examples for lib.rs
- Document OllamaClient, OllamaClientBuilder, and all public methods
- Document error module with OllamaError variants and OllamaResult
- Document types module with endpoint mapping table
- Document all types in common, chat, generate, ps, pull, tags, version
- Add doc examples for builders, constructors, and key types
- All 26 doc-tests pass
This commit is contained in:
2026-01-30 20:16:15 +00:00
parent c6450b774a
commit f15a2c53d6
10 changed files with 767 additions and 41 deletions

View File

@@ -1,13 +1,34 @@
//! Error types for the Ollama client.
//!
//! This module defines [`OllamaError`], the unified error type returned by all
//! client operations, and the [`OllamaResult<T>`] type alias for convenience.
use std::{error::Error, fmt::Display};
use tokio_util::codec::LinesCodecError;
/// A type alias for `Result<T, OllamaError>`.
///
/// Used throughout the crate as the standard return type for fallible operations.
pub type OllamaResult<T> = Result<T, OllamaError>;
/// Errors that can occur when communicating with the Ollama server.
///
/// This enum covers three failure categories:
///
/// - **Network** -- connection failures, timeouts, or HTTP error status codes.
/// - **Parsing** -- the server returned a response that could not be deserialized as JSON.
/// - **Streaming** -- an error occurred while reading a streaming response line-by-line.
///
/// All variants wrap their underlying error and implement [`std::error::Error`],
/// [`Display`], and the relevant [`From`] conversions so they work seamlessly with `?`.
#[derive(Debug)]
pub enum OllamaError {
/// An HTTP or connection-level error from [`reqwest`].
NetworkError(reqwest::Error),
/// A JSON deserialization error from [`serde_json`].
ResponseParseError(serde_json::Error),
/// An error from the line-delimited streaming codec.
LinesCodecError(LinesCodecError),
}

View File

@@ -1,3 +1,83 @@
//! # ollama-rs
//!
//! An async Rust client library for the [Ollama](https://ollama.com/) API.
//!
//! This crate provides a streaming-first interface for interacting with Ollama,
//! supporting text generation, multi-turn chat conversations, model management,
//! structured JSON output, and tool calling.
//!
//! ## Quick Start
//!
//! ```no_run
//! use ollama_rs::OllamaClient;
//! use ollama_rs::types::generate::GenerateRequest;
//! use futures_util::StreamExt;
//!
//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
//! let client = OllamaClient::default();
//!
//! let request = GenerateRequest::builder("llama3")
//! .prompt("Why is the sky blue?")
//! .build();
//!
//! let mut stream = client.generate(request);
//! while let Some(chunk) = stream.next().await {
//! let response = chunk?;
//! print!("{}", response.response);
//! }
//! # Ok(())
//! # }
//! ```
//!
//! ## Chat Conversations
//!
//! ```no_run
//! use ollama_rs::OllamaClient;
//! use ollama_rs::types::chat::{ChatRequest, Message};
//! use futures_util::StreamExt;
//!
//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
//! let client = OllamaClient::default();
//!
//! let request = ChatRequest::builder("llama3")
//! .messages(vec![
//! Message::system("You are a helpful assistant."),
//! Message::user("Hello!"),
//! ])
//! .build();
//!
//! let mut stream = client.chat(request);
//! while let Some(chunk) = stream.next().await {
//! let response = chunk?;
//! print!("{}", response.message.content);
//! }
//! # Ok(())
//! # }
//! ```
//!
//! ## Custom Client Configuration
//!
//! ```
//! use ollama_rs::OllamaClient;
//! use std::time::Duration;
//!
//! // Connect to a remote server with a custom timeout
//! let client = OllamaClient::builder("http://my-server:11434")
//! .connection_timeout(Duration::from_secs(60))
//! .build();
//! ```
//!
//! ## Features
//!
//! - **Streaming responses** -- all generation endpoints return [`futures_util::Stream`]s,
//! allowing token-by-token processing.
//! - **Builder pattern** -- all request types use builders for ergonomic construction.
//! - **Multi-turn chat** -- maintain conversation context with [`types::chat::ChatRequest`].
//! - **Structured output** -- request JSON responses conforming to a schema via the `format` field.
//! - **Tool calling** -- let the model invoke functions with [`types::chat::Tool`] definitions.
//! - **Thinking mode** -- enable extended reasoning with [`types::common::Think`].
//! - **Model management** -- list, pull, and inspect models.
use std::time::Duration;
use async_stream::stream;
@@ -24,8 +104,33 @@ use crate::{
pub mod error;
pub mod types;
/// Default connection timeout applied when no custom timeout is specified (30 seconds).
const DEFAULT_CONNECTION_TIMEOUT: Duration = Duration::from_secs(30);
/// An async client for communicating with an Ollama server.
///
/// `OllamaClient` is the primary entry point for this crate. It wraps an HTTP client
/// and provides methods for every Ollama API endpoint.
///
/// # Construction
///
/// There are three ways to create a client:
///
/// - [`OllamaClient::default()`] -- connects to `http://localhost:11434` with a 30-second timeout.
/// - [`OllamaClient::new()`] -- connects to a custom address with a 30-second timeout.
/// - [`OllamaClient::builder()`] -- full control over address and connection timeout.
///
/// # Examples
///
/// ```
/// use ollama_rs::OllamaClient;
///
/// // Default local connection
/// let client = OllamaClient::default();
///
/// // Custom server address
/// let client = OllamaClient::new("http://my-server:11434");
/// ```
#[derive(Clone)]
pub struct OllamaClient {
server_address: String,
@@ -39,6 +144,18 @@ impl Default for OllamaClient {
}
impl OllamaClient {
/// Creates a new client connected to the given server address.
///
/// Uses the default connection timeout of 30 seconds. For custom timeouts,
/// use [`OllamaClient::builder()`] instead.
///
/// # Examples
///
/// ```
/// use ollama_rs::OllamaClient;
///
/// let client = OllamaClient::new("http://localhost:11434");
/// ```
pub fn new<S: AsRef<str>>(server_address: S) -> Self {
Self {
server_address: server_address.as_ref().to_string(),
@@ -49,6 +166,18 @@ impl OllamaClient {
}
}
/// Returns an [`OllamaClientBuilder`] for constructing a client with custom settings.
///
/// # Examples
///
/// ```
/// use ollama_rs::OllamaClient;
/// use std::time::Duration;
///
/// let client = OllamaClient::builder("http://localhost:11434")
/// .connection_timeout(Duration::from_secs(60))
/// .build();
/// ```
pub fn builder<S: AsRef<str>>(server_address: S) -> OllamaClientBuilder {
OllamaClientBuilder {
server_address: server_address.as_ref().to_string(),
@@ -56,7 +185,14 @@ impl OllamaClient {
}
}
/// Retrieve the version of the Ollama
/// Retrieves the version of the connected Ollama server.
///
/// Calls `GET /api/version`.
///
/// # Errors
///
/// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns
/// a non-success status code.
pub async fn version(&self) -> OllamaResult<VersionResponse> {
let request_address = format!("{}/api/version", self.server_address);
Ok(self
@@ -69,7 +205,14 @@ impl OllamaClient {
.await?)
}
/// Fetch a list of models and their details
/// Lists all models available on the Ollama server along with their details.
///
/// Calls `GET /api/tags`.
///
/// # Errors
///
/// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns
/// a non-success status code.
pub async fn tags(&self) -> OllamaResult<TagsResponse> {
let request_address = format!("{}/api/tags", self.server_address);
info!("List models: {}", request_address);
@@ -83,7 +226,14 @@ impl OllamaClient {
.await?)
}
/// Retrieve a list of models that are currently running
/// Lists models that are currently loaded and running on the Ollama server.
///
/// Calls `GET /api/ps`.
///
/// # Errors
///
/// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns
/// a non-success status code.
pub async fn ps(&self) -> OllamaResult<PsResponse> {
let request_address = format!("{}/api/ps", self.server_address);
info!("List models: {}", request_address);
@@ -135,7 +285,33 @@ impl OllamaClient {
})
}
/// Generates a response for the provided prompt
/// Generates a text completion for the given prompt.
///
/// Returns a stream of [`GenerateResponse`] chunks. Each chunk contains a
/// fragment of the generated text. The final chunk has `done` set to `true`
/// and includes timing and token count statistics.
///
/// Calls `POST /api/generate`.
///
/// # Examples
///
/// ```no_run
/// # use ollama_rs::OllamaClient;
/// # use ollama_rs::types::generate::GenerateRequest;
/// # use futures_util::StreamExt;
/// # async fn run() -> ollama_rs::error::OllamaResult<()> {
/// let client = OllamaClient::default();
/// let request = GenerateRequest::builder("llama3")
/// .prompt("Explain quantum computing in one sentence.")
/// .build();
///
/// let mut stream = client.generate(request);
/// while let Some(chunk) = stream.next().await {
/// print!("{}", chunk?.response);
/// }
/// # Ok(())
/// # }
/// ```
pub fn generate(
&self,
request: GenerateRequest,
@@ -144,30 +320,96 @@ impl OllamaClient {
self.stream_response(request_address, request)
}
/// Generate the next chat message in a conversation between a user and an assistant.
/// Generates the next message in a multi-turn chat conversation.
///
/// Returns a stream of [`ChatResponse`] chunks. Each chunk contains a partial
/// [`Message`](types::chat::Message) from the assistant. The final chunk has
/// `done` set to `true`.
///
/// Calls `POST /api/chat`.
///
/// # Examples
///
/// ```no_run
/// # use ollama_rs::OllamaClient;
/// # use ollama_rs::types::chat::{ChatRequest, Message};
/// # use futures_util::StreamExt;
/// # async fn run() -> ollama_rs::error::OllamaResult<()> {
/// let client = OllamaClient::default();
/// let request = ChatRequest::builder("llama3")
/// .messages(vec![Message::user("What is 2 + 2?")])
/// .build();
///
/// let mut stream = client.chat(request);
/// while let Some(chunk) = stream.next().await {
/// print!("{}", chunk?.message.content);
/// }
/// # Ok(())
/// # }
/// ```
pub fn chat(&self, request: ChatRequest) -> impl Stream<Item = OllamaResult<ChatResponse>> {
let request_address = format!("{}/api/chat", self.server_address);
self.stream_response(request_address, request)
}
/// Pull a model
/// Downloads (pulls) a model from the Ollama registry.
///
/// Returns a stream of [`PullResponse`] chunks reporting the download status.
///
/// Calls `POST /api/pull`.
///
/// # Examples
///
/// ```no_run
/// # use ollama_rs::OllamaClient;
/// # use ollama_rs::types::pull::PullRequest;
/// # use futures_util::StreamExt;
/// # async fn run() -> ollama_rs::error::OllamaResult<()> {
/// let client = OllamaClient::default();
/// let request = PullRequest::builder("llama3").build();
///
/// let mut stream = client.pull(request);
/// while let Some(chunk) = stream.next().await {
/// println!("{}", chunk?.status);
/// }
/// # Ok(())
/// # }
/// ```
pub fn pull(&self, request: PullRequest) -> impl Stream<Item = OllamaResult<PullResponse>> {
let request_address = format!("{}/api/pull", self.server_address);
self.stream_response(request_address, request)
}
}
/// A builder for constructing an [`OllamaClient`] with custom configuration.
///
/// Obtain a builder via [`OllamaClient::builder()`].
///
/// # Examples
///
/// ```
/// use ollama_rs::OllamaClient;
/// use std::time::Duration;
///
/// let client = OllamaClient::builder("http://localhost:11434")
/// .connection_timeout(Duration::from_secs(10))
/// .build();
/// ```
pub struct OllamaClientBuilder {
server_address: String,
connection_timeout: Duration,
}
impl OllamaClientBuilder {
/// Sets the TCP connection timeout for the underlying HTTP client.
///
/// Defaults to 30 seconds if not specified.
pub fn connection_timeout(mut self, timeout: Duration) -> Self {
self.connection_timeout = timeout;
self
}
/// Consumes the builder and returns a configured [`OllamaClient`].
pub fn build(self) -> OllamaClient {
OllamaClient {
server_address: self.server_address,

View File

@@ -1,28 +1,84 @@
//! Types for the chat conversation endpoint (`POST /api/chat`).
//!
//! Use [`ChatRequest::builder()`] to construct a request and pass it to
//! [`OllamaClient::chat()`](crate::OllamaClient::chat). The response is
//! streamed as a sequence of [`ChatResponse`] chunks.
//!
//! # Examples
//!
//! ```no_run
//! # use ollama_rs::OllamaClient;
//! # use ollama_rs::types::chat::{ChatRequest, Message};
//! # use futures_util::StreamExt;
//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
//! let client = OllamaClient::default();
//!
//! let request = ChatRequest::builder("llama3")
//! .messages(vec![
//! Message::system("You are a helpful assistant."),
//! Message::user("What is Rust?"),
//! ])
//! .build();
//!
//! let mut stream = client.chat(request);
//! while let Some(chunk) = stream.next().await {
//! print!("{}", chunk?.message.content);
//! }
//! # Ok(())
//! # }
//! ```
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::error::OllamaResult;
use crate::types::common::{Options, Think};
/// The role of a participant in a chat conversation.
///
/// Serialized as a lowercase string: `"user"`, `"system"`, `"assistant"`, `"tool"`.
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Role {
/// A human user.
User,
/// A system prompt that sets the assistant's behavior.
System,
/// The AI assistant.
Assistant,
/// A tool response providing data back to the model.
Tool,
}
/// A single message in a chat conversation.
///
/// Use the convenience constructors [`Message::system()`], [`Message::user()`],
/// and [`Message::tool_response()`] to create messages for the common roles.
/// Assistant messages are typically received from the model via [`ChatResponse`].
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Message {
/// The text content of the message.
pub content: String,
/// The role of the message sender.
pub role: Role,
/// Tool calls requested by the assistant, if any.
///
/// Empty for non-assistant messages. Omitted from serialization when empty.
#[serde(skip_serializing_if = "Vec::is_empty")]
#[serde(default)]
pub tool_calls: Vec<ToolCall>,
}
impl Message {
/// Creates a system message that sets the assistant's behavior.
///
/// # Examples
///
/// ```
/// use ollama_rs::types::chat::Message;
///
/// let msg = Message::system("You are a helpful assistant.");
/// ```
pub fn system<T: Into<String>>(content: T) -> Self {
Self {
content: content.into(),
@@ -31,6 +87,15 @@ impl Message {
}
}
/// Creates a user message.
///
/// # Examples
///
/// ```
/// use ollama_rs::types::chat::Message;
///
/// let msg = Message::user("Hello, how are you?");
/// ```
pub fn user<T: Into<String>>(content: T) -> Self {
Self {
content: content.into(),
@@ -39,6 +104,23 @@ impl Message {
}
}
/// Creates a tool response message from a JSON value.
///
/// The value is serialized to a JSON string and placed in the message content.
///
/// # Errors
///
/// Returns [`OllamaError::ResponseParseError`](crate::error::OllamaError::ResponseParseError)
/// if the value cannot be serialized.
///
/// # Examples
///
/// ```
/// use ollama_rs::types::chat::Message;
/// use serde_json::json;
///
/// let msg = Message::tool_response(&json!({"temperature": 22.0})).unwrap();
/// ```
pub fn tool_response(content: &Value) -> OllamaResult<Self> {
Ok(Message {
content: serde_json::to_string(content)?,
@@ -48,45 +130,89 @@ impl Message {
}
}
/// A request to the chat endpoint (`POST /api/chat`).
///
/// Construct via [`ChatRequest::builder()`].
///
/// # Examples
///
/// ```
/// use ollama_rs::types::chat::{ChatRequest, Message};
///
/// let request = ChatRequest::builder("llama3")
/// .messages(vec![Message::user("Hello!")])
/// .stream(true)
/// .build();
/// ```
#[derive(Debug, Serialize, Deserialize)]
pub struct ChatRequest {
/// The model name to use for generation (e.g., `"llama3"`).
pub model: String,
/// The conversation history as a list of messages.
pub messages: Vec<Message>,
/// Whether to stream the response. When `None`, the server default applies.
#[serde(skip_serializing_if = "Option::is_none")]
pub stream: Option<bool>,
/// Runtime options that control text generation
/// Runtime options that control text generation behavior.
#[serde(skip_serializing_if = "Option::is_none")]
pub options: Option<Options>,
/// Tool definitions available for the model to call.
#[serde(skip_serializing_if = "Vec::is_empty")]
pub tools: Vec<Tool>,
/// A JSON schema to constrain the response format for structured output.
#[serde(skip_serializing_if = "Option::is_none")]
pub format: Option<Value>,
/// When set, returns separate thinking output in addition to content. Can be a boolean
/// (true/false) or a string ("high", "medium", "low") for supported models.
/// Controls extended-thinking (reasoning) mode. Can be a boolean
/// (`true`/`false`) or a level (`"high"`, `"medium"`, `"low"`) for
/// supported models.
#[serde(skip_serializing_if = "Option::is_none")]
pub think: Option<Think>,
}
impl ChatRequest {
/// Returns a [`ChatRequestBuilder`] for the given model.
pub fn builder<M: Into<String>>(model: M) -> ChatRequestBuilder {
ChatRequestBuilder::new(model)
}
}
/// A single chunk of a streaming chat response.
///
/// When streaming, each chunk contains a partial [`Message`]. The final chunk
/// has [`done`](ChatResponse::done) set to `true`.
#[derive(Debug, Serialize, Deserialize)]
pub struct ChatResponse {
/// The model that generated this response.
pub model: String,
/// ISO 8601 timestamp of when this chunk was created.
pub created_at: String,
/// The (partial) assistant message for this chunk.
pub message: Message,
/// `true` when this is the final chunk of the response.
pub done: bool,
}
/// A builder for constructing a [`ChatRequest`].
///
/// Obtain a builder via [`ChatRequest::builder()`].
///
/// # Examples
///
/// ```
/// use ollama_rs::types::chat::{ChatRequest, Message};
/// use ollama_rs::types::common::Options;
///
/// let request = ChatRequest::builder("llama3")
/// .messages(vec![Message::user("Hi")])
/// .options(Options::builder().temperature(0.5).build())
/// .build();
/// ```
pub struct ChatRequestBuilder {
chat_request: ChatRequest,
}
@@ -106,70 +232,114 @@ impl ChatRequestBuilder {
}
}
/// Sets the conversation history.
pub fn messages(mut self, messages: Vec<Message>) -> Self {
self.chat_request.messages = messages;
self
}
/// Sets runtime generation options.
pub fn options(mut self, options: Options) -> Self {
self.chat_request.options = Some(options);
self
}
/// Sets the tools available for the model to call.
pub fn tools(mut self, tools: Vec<Tool>) -> Self {
self.chat_request.tools = tools;
self
}
/// Sets whether to stream the response.
pub fn stream(mut self, stream: bool) -> Self {
self.chat_request.stream = Some(stream);
self
}
/// Sets a JSON schema to constrain the response format.
pub fn format(mut self, json_schema: Value) -> Self {
self.chat_request.format = Some(json_schema);
self
}
/// Enables or configures extended-thinking mode.
pub fn think(mut self, think: Think) -> Self {
self.chat_request.think = Some(think);
self
}
/// Consumes the builder and returns the configured [`ChatRequest`].
pub fn build(self) -> ChatRequest {
self.chat_request
}
}
/// A tool definition that the model can invoke during a chat.
///
/// # Examples
///
/// ```
/// use ollama_rs::types::chat::{Tool, ToolType, Function};
/// use serde_json::json;
///
/// let tool = Tool {
/// tool_type: ToolType::Function,
/// function: Function {
/// name: "get_weather".to_string(),
/// description: "Get current weather for a city".to_string(),
/// parameters: json!({
/// "type": "object",
/// "properties": {
/// "city": { "type": "string" }
/// },
/// "required": ["city"]
/// }),
/// },
/// };
/// ```
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Tool {
/// The type of tool (currently only `Function`).
#[serde(rename = "type")]
pub tool_type: ToolType,
/// The function definition.
pub function: Function,
}
/// The kind of tool. Currently only [`Function`](ToolType::Function) is supported.
#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ToolType {
/// A callable function.
Function,
}
/// A function definition for tool calling.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Function {
/// The function name the model will use to invoke this tool.
pub name: String,
/// A JSON Schema describing the function's parameters.
pub parameters: Value,
/// A description of what the function does, to guide the model.
pub description: String,
}
/// A tool call requested by the model in an assistant message.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ToolCall {
/// The function the model wants to invoke.
pub function: ToolCallFunction,
}
/// Details of a specific function call requested by the model.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ToolCallFunction {
/// The name of the function to call.
pub name: String,
/// The arguments to pass, as a JSON value.
pub arguments: Value,
/// The index of this tool call within the message (for parallel calls).
pub index: usize,
}

View File

@@ -1,65 +1,132 @@
//! Types shared across multiple Ollama API endpoints.
//!
//! This module provides:
//!
//! - [`Options`] / [`OptionsBuilder`] -- sampling and generation parameters.
//! - [`Think`] / [`ThinkLevel`] -- controls for extended-thinking (reasoning) mode.
//! - [`Stop`] -- stop-sequence configuration.
//! - [`ModelDetails`] -- metadata returned when listing models.
use serde::{Deserialize, Serialize};
/// Detailed metadata about a model, returned by the tags and ps endpoints.
#[derive(Debug, Serialize, Deserialize)]
pub struct ModelDetails {
/// The model file format (e.g., `"gguf"`).
pub format: String,
/// The primary model family (e.g., `"llama"`).
pub family: String,
/// Additional model families, if any (e.g., `["llama", "clip"]`).
pub families: Option<Vec<String>>,
/// Human-readable parameter count (e.g., `"8B"`).
pub parameter_size: String,
/// Quantization level (e.g., `"Q4_0"`).
pub quantization_level: String,
}
/// Controls extended-thinking (reasoning) mode for supported models.
///
/// Can be a simple boolean toggle or a named level. Serialized as an untagged
/// enum so `true`, `false`, `"high"`, `"medium"`, and `"low"` are all valid JSON
/// representations.
///
/// # Examples
///
/// ```
/// use ollama_rs::types::common::Think;
///
/// // Enable thinking
/// let think = Think::Bool(true);
///
/// // Use a specific thinking level
/// use ollama_rs::types::common::ThinkLevel;
/// let think = Think::Level(ThinkLevel::High);
/// ```
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
pub enum Think {
/// Enable (`true`) or disable (`false`) thinking mode.
Bool(bool),
/// Use a named thinking intensity level.
Level(ThinkLevel),
}
/// Named intensity levels for extended-thinking mode.
///
/// Serialized as lowercase strings: `"high"`, `"medium"`, `"low"`.
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ThinkLevel {
/// Maximum reasoning depth.
High,
/// Balanced reasoning depth.
Medium,
/// Minimal reasoning depth.
Low,
}
/// Runtime options that control text generation behavior.
///
/// All fields are optional. Only fields set to `Some` are included in the
/// serialized JSON request, letting the server apply its own defaults for
/// omitted parameters.
///
/// Use [`Options::builder()`] for ergonomic construction.
///
/// # Examples
///
/// ```
/// use ollama_rs::types::common::{Options, Stop};
///
/// let options = Options::builder()
/// .temperature(0.7)
/// .top_k(40)
/// .stop(Stop::Single("END".to_string()))
/// .build();
/// ```
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct Options {
/// Random seed used for reproducible outputs
/// Random seed for reproducible outputs.
#[serde(skip_serializing_if = "Option::is_none")]
pub seed: Option<u64>,
/// Controls randomness in generation (higher = more random)
/// Controls randomness in generation. Higher values (e.g., `1.5`) produce
/// more creative output; lower values (e.g., `0.2`) produce more
/// deterministic output.
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
/// Limits next token selection to the K most likely
/// Limits the next-token selection to the *K* most likely tokens.
#[serde(skip_serializing_if = "Option::is_none")]
pub top_k: Option<u32>,
/// Cumulative probability threshold for nucleus sampling
/// Cumulative probability threshold for nucleus sampling.
/// A value of `0.9` means only the smallest set of tokens whose cumulative
/// probability exceeds 90% are considered.
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
/// Minimum probability threshold for token selection
/// Minimum probability threshold for token selection.
/// Tokens with probability below this value are discarded.
#[serde(skip_serializing_if = "Option::is_none")]
pub min_p: Option<f32>,
/// Stop sequences that will halt generation
/// One or more stop sequences that will halt generation when produced.
#[serde(skip_serializing_if = "Option::is_none")]
pub stop: Option<Stop>,
/// Context length size (number of tokens)
/// Context window size in tokens. Determines how many tokens the model
/// can attend to at once.
#[serde(skip_serializing_if = "Option::is_none")]
pub num_ctx: Option<u32>,
/// Maximum number of tokens to generate
/// Maximum number of tokens to generate in the response.
#[serde(skip_serializing_if = "Option::is_none")]
pub num_predict: Option<u32>,
}
impl Options {
/// Returns an [`OptionsBuilder`] for constructing an `Options` value.
pub fn builder() -> OptionsBuilder {
OptionsBuilder {
options: Options::default(),
@@ -67,60 +134,98 @@ impl Options {
}
}
/// A builder for constructing [`Options`] with only the desired parameters set.
///
/// Obtain a builder via [`Options::builder()`].
///
/// # Examples
///
/// ```
/// use ollama_rs::types::common::Options;
///
/// let options = Options::builder()
/// .seed(42)
/// .temperature(0.8)
/// .num_predict(256)
/// .build();
/// ```
pub struct OptionsBuilder {
options: Options,
}
impl OptionsBuilder {
/// Sets the random seed for reproducible outputs.
pub fn seed(mut self, seed: u64) -> Self {
self.options.seed = Some(seed);
self
}
/// Sets the temperature for generation randomness.
pub fn temperature(mut self, temperature: f32) -> Self {
self.options.temperature = Some(temperature);
self
}
/// Sets the top-K sampling parameter.
pub fn top_k(mut self, top_k: u32) -> Self {
self.options.top_k = Some(top_k);
self
}
/// Sets the nucleus sampling probability threshold.
pub fn top_p(mut self, top_p: f32) -> Self {
self.options.top_p = Some(top_p);
self
}
/// Sets the minimum probability threshold for token selection.
pub fn min_p(mut self, min_p: f32) -> Self {
self.options.min_p = Some(min_p);
self
}
/// Sets one or more stop sequences.
pub fn stop(mut self, stop: Stop) -> Self {
self.options.stop = Some(stop);
self
}
/// Sets the context window size in tokens.
pub fn num_ctx(mut self, num_ctx: u32) -> Self {
self.options.num_ctx = Some(num_ctx);
self
}
/// Sets the maximum number of tokens to generate.
pub fn num_predict(mut self, num_predict: u32) -> Self {
self.options.num_predict = Some(num_predict);
self
}
/// Consumes the builder and returns the configured [`Options`].
pub fn build(self) -> Options {
self.options
}
}
/// Stop sequences that halt text generation when produced by the model.
///
/// Serialized as an untagged enum: a single string or an array of strings.
///
/// # Examples
///
/// ```
/// use ollama_rs::types::common::Stop;
///
/// let single = Stop::Single("END".to_string());
/// let multiple = Stop::Multiple(vec!["END".to_string(), "STOP".to_string()]);
/// ```
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
pub enum Stop {
/// A single stop sequence.
Single(String),
/// Multiple stop sequences.
Multiple(Vec<String>),
}

View File

@@ -1,55 +1,113 @@
//! Types for the text generation endpoint (`POST /api/generate`).
//!
//! Use [`GenerateRequest::builder()`] to construct a request and pass it to
//! [`OllamaClient::generate()`](crate::OllamaClient::generate). The response
//! is streamed as a sequence of [`GenerateResponse`] chunks.
//!
//! # Examples
//!
//! ```no_run
//! # use ollama_rs::OllamaClient;
//! # use ollama_rs::types::generate::GenerateRequest;
//! # use futures_util::StreamExt;
//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
//! let client = OllamaClient::default();
//!
//! let request = GenerateRequest::builder("llama3")
//! .prompt("Why is the sky blue?")
//! .system_prompt("Answer in one sentence.")
//! .build();
//!
//! let mut stream = client.generate(request);
//! while let Some(chunk) = stream.next().await {
//! print!("{}", chunk?.response);
//! }
//! # Ok(())
//! # }
//! ```
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::types::common::{Options, Think};
/// A request to the text generation endpoint (`POST /api/generate`).
///
/// Construct via [`GenerateRequest::builder()`].
///
/// # Examples
///
/// ```
/// use ollama_rs::types::generate::GenerateRequest;
///
/// let request = GenerateRequest::builder("llama3")
/// .prompt("Hello, world!")
/// .build();
/// ```
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerateRequest {
/// Model name
/// The model name to use for generation (e.g., `"llama3"`).
pub model: String,
/// Text for the model to generate a response from
/// The prompt text for the model to generate a response from.
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt: Option<String>,
/// Used for fill-in-the-middle models, text that appears after the user prompt and before the
/// model response
/// Text that appears after the user prompt and before the model response.
/// Used for fill-in-the-middle models.
#[serde(skip_serializing_if = "Option::is_none")]
pub suffix: Option<String>,
/// System prompt for the model to generate a response from
/// A system prompt that sets the model's behavior.
#[serde(skip_serializing_if = "Option::is_none")]
pub system: Option<String>,
/// When true, returns a stream of partial responses
/// Whether to stream the response. When `None`, the server default applies.
#[serde(skip_serializing_if = "Option::is_none")]
pub stream: Option<bool>,
/// Base64-encoded images for models that support image input
/// Base64-encoded images for multimodal models that support image input.
#[serde(skip_serializing_if = "Vec::is_empty")]
pub images: Vec<String>,
/// Structured output format for the model to generate a response from.
/// Supports either the string "json" or a JSON schema object
/// A structured output format constraint. Accepts the string `"json"` for
/// free-form JSON or a JSON Schema object for strict validation.
#[serde(skip_serializing_if = "Option::is_none")]
pub format: Option<Value>,
/// When true, returns separate thinking output in addition to content. Can be a boolean
/// (true/false) or a string ("high", "medium", "low") for supported models.
/// Controls extended-thinking (reasoning) mode. Can be a boolean
/// (`true`/`false`) or a level (`"high"`, `"medium"`, `"low"`) for
/// supported models.
#[serde(skip_serializing_if = "Option::is_none")]
pub think: Option<Think>,
/// Runtime options that control text generation
/// Runtime options that control text generation behavior.
#[serde(skip_serializing_if = "Option::is_none")]
pub options: Option<Options>,
}
impl GenerateRequest {
/// Returns a [`GenerateRequestBuilder`] for the given model.
pub fn builder<M: Into<String>>(model: M) -> GenerateRequestBuilder {
GenerateRequestBuilder::new(model)
}
}
/// A builder for constructing a [`GenerateRequest`].
///
/// Obtain a builder via [`GenerateRequest::builder()`].
///
/// # Examples
///
/// ```
/// use ollama_rs::types::generate::GenerateRequest;
/// use ollama_rs::types::common::Options;
///
/// let request = GenerateRequest::builder("llama3")
/// .prompt("Tell me a joke")
/// .options(Options::builder().temperature(1.0).build())
/// .build();
/// ```
pub struct GenerateRequestBuilder {
generate_request: GenerateRequest,
}
@@ -71,87 +129,109 @@ impl GenerateRequestBuilder {
}
}
/// Sets the system prompt that guides the model's behavior.
pub fn system_prompt<P: Into<String>>(mut self, system_prompt: P) -> Self {
self.generate_request.system = Some(system_prompt.into());
self
}
/// Sets the prompt text to generate a response from.
pub fn prompt<P: Into<String>>(mut self, prompt: P) -> Self {
self.generate_request.prompt = Some(prompt.into());
self
}
/// Sets whether to stream the response.
pub fn stream(mut self, stream: bool) -> Self {
self.generate_request.stream = Some(stream);
self
}
/// Sets the suffix for fill-in-the-middle generation.
pub fn suffix(mut self, suffix: String) -> Self {
self.generate_request.suffix = Some(suffix);
self
}
/// Sets base64-encoded images for multimodal generation.
pub fn images(mut self, images: Vec<String>) -> Self {
self.generate_request.images = images;
self
}
/// Sets a structured output format constraint (JSON or a JSON Schema).
pub fn format<T: Into<Value>>(mut self, value: T) -> Self {
self.generate_request.format = Some(value.into());
self
}
/// Enables or configures extended-thinking mode.
pub fn think(mut self, think: Think) -> Self {
self.generate_request.think = Some(think);
self
}
/// Sets runtime generation options.
pub fn options(mut self, options: Options) -> Self {
self.generate_request.options = Some(options);
self
}
/// Consumes the builder and returns the configured [`GenerateRequest`].
pub fn build(self) -> GenerateRequest {
self.generate_request
}
}
/// A single chunk of a streaming text generation response.
///
/// When streaming, each chunk contains a fragment of the generated text in the
/// [`response`](GenerateResponse::response) field. The final chunk has
/// [`done`](GenerateResponse::done) set to `true` and includes performance
/// statistics.
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerateResponse {
/// Model name
/// The model that generated this response.
pub model: String,
/// ISO 8601 timestamp of response creation
/// ISO 8601 timestamp of when this chunk was created.
pub created_at: String,
/// The model's generated text response
/// The generated text fragment for this chunk.
pub response: String,
/// The model's generated thinking output
/// The model's thinking/reasoning output, if thinking mode was enabled.
pub thinking: Option<String>,
/// Indicates whether generation has finished
/// `true` when this is the final chunk of the response.
pub done: bool,
/// Reason the generation stopped
/// The reason generation stopped (e.g., `"stop"`). Only present in the
/// final chunk.
pub done_reason: Option<String>,
/// Time spent generating the response in nanoseconds
/// Total time spent generating the response, in nanoseconds. Only present
/// in the final chunk.
pub total_duration: Option<u64>,
/// Time spent loading the model in nanoseconds
/// Time spent loading the model, in nanoseconds. Only present in the final
/// chunk.
pub load_duration: Option<u64>,
/// Number of input tokens in the prompt
/// Number of tokens in the evaluated prompt. Only present in the final
/// chunk.
pub prompt_eval_count: Option<u64>,
/// Time spent evaluating the prompt in nanoseconds
/// Time spent evaluating the prompt, in nanoseconds. Only present in the
/// final chunk.
pub prompt_eval_duration: Option<u64>,
/// Number of output tokens generated in the response
/// Number of tokens generated in the response. Only present in the final
/// chunk.
pub eval_count: Option<u64>,
/// Time spent generating tokens in nanoseconds
/// Time spent generating output tokens, in nanoseconds. Only present in
/// the final chunk.
pub eval_duration: Option<u64>,
}

View File

@@ -1,3 +1,20 @@
//! Request and response types for the Ollama API.
//!
//! Each submodule corresponds to an API endpoint:
//!
//! | Module | Endpoint | Description |
//! |--------------|-------------------|------------------------------------------|
//! | [`chat`] | `POST /api/chat` | Multi-turn chat conversations |
//! | [`generate`] | `POST /api/generate` | Single-prompt text generation |
//! | [`pull`] | `POST /api/pull` | Download models from the registry |
//! | [`tags`] | `GET /api/tags` | List available models |
//! | [`ps`] | `GET /api/ps` | List currently loaded/running models |
//! | [`version`] | `GET /api/version` | Query the server version |
//!
//! The [`common`] module contains types shared across multiple endpoints, such as
//! [`Options`](common::Options) for generation parameters, [`Think`](common::Think)
//! for reasoning mode, and [`ModelDetails`](common::ModelDetails).
pub mod chat;
pub mod common;
pub mod generate;

View File

@@ -1,21 +1,39 @@
//! Types for the running-models endpoint (`GET /api/ps`).
//!
//! The response from [`OllamaClient::ps()`](crate::OllamaClient::ps) is
//! deserialized into a [`PsResponse`].
use serde::{Deserialize, Serialize};
use crate::types::common::ModelDetails;
/// Response from the `GET /api/ps` endpoint.
///
/// Contains a list of models currently loaded in memory on the Ollama server.
#[derive(Debug, Serialize, Deserialize)]
pub struct PsResponse {
/// The currently loaded models.
pub models: Vec<RunningModel>,
}
/// A model that is currently loaded and running on the server.
#[derive(Debug, Serialize, Deserialize)]
pub struct RunningModel {
/// The model name including tag (e.g., `"llama3:latest"`).
pub name: String,
/// The model identifier.
pub model: String,
/// Total model size in bytes.
pub size: u64,
/// The SHA-256 digest of the model.
pub digest: String,
/// Detailed model metadata.
pub details: ModelDetails,
/// ISO 8601 timestamp of when the model will be unloaded from memory.
pub expires_at: String,
/// Amount of VRAM the model occupies, in bytes.
pub size_vram: u64,
/// The context length configured for this model instance.
pub context_length: u32,
}

View File

@@ -1,15 +1,56 @@
//! Types for the model pull (download) endpoint (`POST /api/pull`).
//!
//! Use [`PullRequest::builder()`] to construct a request and pass it to
//! [`OllamaClient::pull()`](crate::OllamaClient::pull). The response is
//! streamed as a sequence of [`PullResponse`] status updates.
//!
//! # Examples
//!
//! ```no_run
//! # use ollama_rs::OllamaClient;
//! # use ollama_rs::types::pull::PullRequest;
//! # use futures_util::StreamExt;
//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
//! let client = OllamaClient::default();
//! let request = PullRequest::builder("llama3").build();
//!
//! let mut stream = client.pull(request);
//! while let Some(chunk) = stream.next().await {
//! println!("{}", chunk?.status);
//! }
//! # Ok(())
//! # }
//! ```
use serde::{Deserialize, Serialize};
/// A request to download a model from the Ollama registry (`POST /api/pull`).
///
/// Construct via [`PullRequest::builder()`].
///
/// # Examples
///
/// ```
/// use ollama_rs::types::pull::PullRequest;
///
/// let request = PullRequest::builder("llama3")
/// .stream(true)
/// .build();
/// ```
#[derive(Debug, Serialize, Deserialize)]
pub struct PullRequest {
/// The model name to pull (e.g., `"llama3"`, `"llama3:latest"`).
pub model: String,
/// Allow insecure (HTTP) connections to the registry.
#[serde(skip_serializing_if = "Option::is_none")]
pub insecure: Option<bool>,
/// Whether to stream status updates. When `None`, the server default applies.
#[serde(skip_serializing_if = "Option::is_none")]
pub stream: Option<bool>,
}
impl PullRequest {
/// Returns a [`PullRequestBuilder`] for the given model name.
pub fn builder<M: Into<String>>(model: M) -> PullRequestBuilder {
PullRequestBuilder {
pull_request: PullRequest {
@@ -21,28 +62,37 @@ impl PullRequest {
}
}
/// A builder for constructing a [`PullRequest`].
///
/// Obtain a builder via [`PullRequest::builder()`].
pub struct PullRequestBuilder {
pull_request: PullRequest,
}
impl PullRequestBuilder {
/// Sets whether to stream status updates.
pub fn stream(mut self, stream: bool) -> Self {
self.pull_request.stream = Some(stream);
self
}
/// Allows insecure (HTTP) connections to the model registry.
pub fn insecure(mut self, insecure: bool) -> Self {
self.pull_request.insecure = Some(insecure);
self
}
/// Consumes the builder and returns the configured [`PullRequest`].
pub fn build(self) -> PullRequest {
self.pull_request
}
}
/// A streaming status update from the model pull operation.
#[derive(Debug, Serialize, Deserialize)]
pub struct PullResponse {
/// A human-readable status message (e.g., `"pulling manifest"`,
/// `"downloading sha256:..."`).
pub status: String,
}

View File

@@ -1,19 +1,35 @@
//! Types for the model listing endpoint (`GET /api/tags`).
//!
//! The response from [`OllamaClient::tags()`](crate::OllamaClient::tags) is
//! deserialized into a [`TagsResponse`].
use serde::{Deserialize, Serialize};
use crate::types::common::ModelDetails;
/// Response from the `GET /api/tags` endpoint.
///
/// Contains a list of all models available on the Ollama server.
#[derive(Debug, Serialize, Deserialize)]
pub struct TagsResponse {
/// The available models.
pub models: Vec<Model>,
}
/// An available model on the Ollama server.
#[derive(Debug, Serialize, Deserialize)]
pub struct Model {
/// The model name including tag (e.g., `"llama3:latest"`).
pub name: String,
/// The model identifier.
pub model: String,
/// ISO 8601 timestamp of when the model was last modified.
pub modified_at: String,
/// Total model size in bytes.
pub size: u64,
/// The SHA-256 digest of the model.
pub digest: String,
/// Detailed model metadata.
pub details: ModelDetails,
}

View File

@@ -1,7 +1,14 @@
//! Types for the server version endpoint (`GET /api/version`).
//!
//! The response from [`OllamaClient::version()`](crate::OllamaClient::version)
//! is deserialized into a [`VersionResponse`].
use serde::{Deserialize, Serialize};
/// Response from the `GET /api/version` endpoint.
#[derive(Debug, Serialize, Deserialize)]
pub struct VersionResponse {
/// The Ollama server version string (e.g., `"0.6.2"`).
pub version: String,
}