Add comprehensive RustDocs for all public API items

- Add crate-level docs with usage examples for lib.rs - Document OllamaClient, OllamaClientBuilder, and all public methods - Document error module with OllamaError variants and OllamaResult - Document types module with endpoint mapping table - Document all types in common, chat, generate, ps, pull, tags, version - Add doc examples for builders, constructors, and key types - All 26 doc-tests pass
2026-01-30 20:16:15 +00:00
parent c6450b774a
commit f15a2c53d6
10 changed files with 767 additions and 41 deletions
--- a/src/error.rs
+++ b/src/error.rs
@@ -1,13 +1,34 @@
+//! Error types for the Ollama client.
+//!
+//! This module defines [`OllamaError`], the unified error type returned by all
+//! client operations, and the [`OllamaResult<T>`] type alias for convenience.
+
 use std::{error::Error, fmt::Display};

 use tokio_util::codec::LinesCodecError;

+/// A type alias for `Result<T, OllamaError>`.
+///
+/// Used throughout the crate as the standard return type for fallible operations.
 pub type OllamaResult<T> = Result<T, OllamaError>;

+/// Errors that can occur when communicating with the Ollama server.
+///
+/// This enum covers three failure categories:
+///
+/// - **Network** -- connection failures, timeouts, or HTTP error status codes.
+/// - **Parsing** -- the server returned a response that could not be deserialized as JSON.
+/// - **Streaming** -- an error occurred while reading a streaming response line-by-line.
+///
+/// All variants wrap their underlying error and implement [`std::error::Error`],
+/// [`Display`], and the relevant [`From`] conversions so they work seamlessly with `?`.
 #[derive(Debug)]
 pub enum OllamaError {
+    /// An HTTP or connection-level error from [`reqwest`].
    NetworkError(reqwest::Error),
+    /// A JSON deserialization error from [`serde_json`].
    ResponseParseError(serde_json::Error),
+    /// An error from the line-delimited streaming codec.
    LinesCodecError(LinesCodecError),
 }

--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,83 @@
+//! # ollama-rs
+//!
+//! An async Rust client library for the [Ollama](https://ollama.com/) API.
+//!
+//! This crate provides a streaming-first interface for interacting with Ollama,
+//! supporting text generation, multi-turn chat conversations, model management,
+//! structured JSON output, and tool calling.
+//!
+//! ## Quick Start
+//!
+//! ```no_run
+//! use ollama_rs::OllamaClient;
+//! use ollama_rs::types::generate::GenerateRequest;
+//! use futures_util::StreamExt;
+//!
+//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
+//! let client = OllamaClient::default();
+//!
+//! let request = GenerateRequest::builder("llama3")
+//!     .prompt("Why is the sky blue?")
+//!     .build();
+//!
+//! let mut stream = client.generate(request);
+//! while let Some(chunk) = stream.next().await {
+//!     let response = chunk?;
+//!     print!("{}", response.response);
+//! }
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! ## Chat Conversations
+//!
+//! ```no_run
+//! use ollama_rs::OllamaClient;
+//! use ollama_rs::types::chat::{ChatRequest, Message};
+//! use futures_util::StreamExt;
+//!
+//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
+//! let client = OllamaClient::default();
+//!
+//! let request = ChatRequest::builder("llama3")
+//!     .messages(vec![
+//!         Message::system("You are a helpful assistant."),
+//!         Message::user("Hello!"),
+//!     ])
+//!     .build();
+//!
+//! let mut stream = client.chat(request);
+//! while let Some(chunk) = stream.next().await {
+//!     let response = chunk?;
+//!     print!("{}", response.message.content);
+//! }
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! ## Custom Client Configuration
+//!
+//! ```
+//! use ollama_rs::OllamaClient;
+//! use std::time::Duration;
+//!
+//! // Connect to a remote server with a custom timeout
+//! let client = OllamaClient::builder("http://my-server:11434")
+//!     .connection_timeout(Duration::from_secs(60))
+//!     .build();
+//! ```
+//!
+//! ## Features
+//!
+//! - **Streaming responses** -- all generation endpoints return [`futures_util::Stream`]s,
+//!   allowing token-by-token processing.
+//! - **Builder pattern** -- all request types use builders for ergonomic construction.
+//! - **Multi-turn chat** -- maintain conversation context with [`types::chat::ChatRequest`].
+//! - **Structured output** -- request JSON responses conforming to a schema via the `format` field.
+//! - **Tool calling** -- let the model invoke functions with [`types::chat::Tool`] definitions.
+//! - **Thinking mode** -- enable extended reasoning with [`types::common::Think`].
+//! - **Model management** -- list, pull, and inspect models.
+
 use std::time::Duration;

 use async_stream::stream;
@@ -24,8 +104,33 @@ use crate::{
 pub mod error;
 pub mod types;

+/// Default connection timeout applied when no custom timeout is specified (30 seconds).
 const DEFAULT_CONNECTION_TIMEOUT: Duration = Duration::from_secs(30);

+/// An async client for communicating with an Ollama server.
+///
+/// `OllamaClient` is the primary entry point for this crate. It wraps an HTTP client
+/// and provides methods for every Ollama API endpoint.
+///
+/// # Construction
+///
+/// There are three ways to create a client:
+///
+/// - [`OllamaClient::default()`] -- connects to `http://localhost:11434` with a 30-second timeout.
+/// - [`OllamaClient::new()`] -- connects to a custom address with a 30-second timeout.
+/// - [`OllamaClient::builder()`] -- full control over address and connection timeout.
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::OllamaClient;
+///
+/// // Default local connection
+/// let client = OllamaClient::default();
+///
+/// // Custom server address
+/// let client = OllamaClient::new("http://my-server:11434");
+/// ```
 #[derive(Clone)]
 pub struct OllamaClient {
    server_address: String,
@@ -39,6 +144,18 @@ impl Default for OllamaClient {
 }

 impl OllamaClient {
+    /// Creates a new client connected to the given server address.
+    ///
+    /// Uses the default connection timeout of 30 seconds. For custom timeouts,
+    /// use [`OllamaClient::builder()`] instead.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use ollama_rs::OllamaClient;
+    ///
+    /// let client = OllamaClient::new("http://localhost:11434");
+    /// ```
    pub fn new<S: AsRef<str>>(server_address: S) -> Self {
        Self {
            server_address: server_address.as_ref().to_string(),
@@ -49,6 +166,18 @@ impl OllamaClient {
        }
    }

+    /// Returns an [`OllamaClientBuilder`] for constructing a client with custom settings.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use ollama_rs::OllamaClient;
+    /// use std::time::Duration;
+    ///
+    /// let client = OllamaClient::builder("http://localhost:11434")
+    ///     .connection_timeout(Duration::from_secs(60))
+    ///     .build();
+    /// ```
    pub fn builder<S: AsRef<str>>(server_address: S) -> OllamaClientBuilder {
        OllamaClientBuilder {
            server_address: server_address.as_ref().to_string(),
@@ -56,7 +185,14 @@ impl OllamaClient {
        }
    }

-    /// Retrieve the version of the Ollama
+    /// Retrieves the version of the connected Ollama server.
+    ///
+    /// Calls `GET /api/version`.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns
+    /// a non-success status code.
    pub async fn version(&self) -> OllamaResult<VersionResponse> {
        let request_address = format!("{}/api/version", self.server_address);
        Ok(self
@@ -69,7 +205,14 @@ impl OllamaClient {
            .await?)
    }

-    /// Fetch a list of models and their details
+    /// Lists all models available on the Ollama server along with their details.
+    ///
+    /// Calls `GET /api/tags`.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns
+    /// a non-success status code.
    pub async fn tags(&self) -> OllamaResult<TagsResponse> {
        let request_address = format!("{}/api/tags", self.server_address);
        info!("List models: {}", request_address);
@@ -83,7 +226,14 @@ impl OllamaClient {
            .await?)
    }

-    /// Retrieve a list of models that are currently running
+    /// Lists models that are currently loaded and running on the Ollama server.
+    ///
+    /// Calls `GET /api/ps`.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`OllamaError::NetworkError`] if the server is unreachable or returns
+    /// a non-success status code.
    pub async fn ps(&self) -> OllamaResult<PsResponse> {
        let request_address = format!("{}/api/ps", self.server_address);
        info!("List models: {}", request_address);
@@ -135,7 +285,33 @@ impl OllamaClient {
        })
    }

-    /// Generates a response for the provided prompt
+    /// Generates a text completion for the given prompt.
+    ///
+    /// Returns a stream of [`GenerateResponse`] chunks. Each chunk contains a
+    /// fragment of the generated text. The final chunk has `done` set to `true`
+    /// and includes timing and token count statistics.
+    ///
+    /// Calls `POST /api/generate`.
+    ///
+    /// # Examples
+    ///
+    /// ```no_run
+    /// # use ollama_rs::OllamaClient;
+    /// # use ollama_rs::types::generate::GenerateRequest;
+    /// # use futures_util::StreamExt;
+    /// # async fn run() -> ollama_rs::error::OllamaResult<()> {
+    /// let client = OllamaClient::default();
+    /// let request = GenerateRequest::builder("llama3")
+    ///     .prompt("Explain quantum computing in one sentence.")
+    ///     .build();
+    ///
+    /// let mut stream = client.generate(request);
+    /// while let Some(chunk) = stream.next().await {
+    ///     print!("{}", chunk?.response);
+    /// }
+    /// # Ok(())
+    /// # }
+    /// ```
    pub fn generate(
        &self,
        request: GenerateRequest,
@@ -144,30 +320,96 @@ impl OllamaClient {
        self.stream_response(request_address, request)
    }

-    /// Generate the next chat message in a conversation between a user and an assistant.
+    /// Generates the next message in a multi-turn chat conversation.
+    ///
+    /// Returns a stream of [`ChatResponse`] chunks. Each chunk contains a partial
+    /// [`Message`](types::chat::Message) from the assistant. The final chunk has
+    /// `done` set to `true`.
+    ///
+    /// Calls `POST /api/chat`.
+    ///
+    /// # Examples
+    ///
+    /// ```no_run
+    /// # use ollama_rs::OllamaClient;
+    /// # use ollama_rs::types::chat::{ChatRequest, Message};
+    /// # use futures_util::StreamExt;
+    /// # async fn run() -> ollama_rs::error::OllamaResult<()> {
+    /// let client = OllamaClient::default();
+    /// let request = ChatRequest::builder("llama3")
+    ///     .messages(vec![Message::user("What is 2 + 2?")])
+    ///     .build();
+    ///
+    /// let mut stream = client.chat(request);
+    /// while let Some(chunk) = stream.next().await {
+    ///     print!("{}", chunk?.message.content);
+    /// }
+    /// # Ok(())
+    /// # }
+    /// ```
    pub fn chat(&self, request: ChatRequest) -> impl Stream<Item = OllamaResult<ChatResponse>> {
        let request_address = format!("{}/api/chat", self.server_address);
        self.stream_response(request_address, request)
    }

-    /// Pull a model
+    /// Downloads (pulls) a model from the Ollama registry.
+    ///
+    /// Returns a stream of [`PullResponse`] chunks reporting the download status.
+    ///
+    /// Calls `POST /api/pull`.
+    ///
+    /// # Examples
+    ///
+    /// ```no_run
+    /// # use ollama_rs::OllamaClient;
+    /// # use ollama_rs::types::pull::PullRequest;
+    /// # use futures_util::StreamExt;
+    /// # async fn run() -> ollama_rs::error::OllamaResult<()> {
+    /// let client = OllamaClient::default();
+    /// let request = PullRequest::builder("llama3").build();
+    ///
+    /// let mut stream = client.pull(request);
+    /// while let Some(chunk) = stream.next().await {
+    ///     println!("{}", chunk?.status);
+    /// }
+    /// # Ok(())
+    /// # }
+    /// ```
    pub fn pull(&self, request: PullRequest) -> impl Stream<Item = OllamaResult<PullResponse>> {
        let request_address = format!("{}/api/pull", self.server_address);
        self.stream_response(request_address, request)
    }
 }

+/// A builder for constructing an [`OllamaClient`] with custom configuration.
+///
+/// Obtain a builder via [`OllamaClient::builder()`].
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::OllamaClient;
+/// use std::time::Duration;
+///
+/// let client = OllamaClient::builder("http://localhost:11434")
+///     .connection_timeout(Duration::from_secs(10))
+///     .build();
+/// ```
 pub struct OllamaClientBuilder {
    server_address: String,
    connection_timeout: Duration,
 }

 impl OllamaClientBuilder {
+    /// Sets the TCP connection timeout for the underlying HTTP client.
+    ///
+    /// Defaults to 30 seconds if not specified.
    pub fn connection_timeout(mut self, timeout: Duration) -> Self {
        self.connection_timeout = timeout;
        self
    }

+    /// Consumes the builder and returns a configured [`OllamaClient`].
    pub fn build(self) -> OllamaClient {
        OllamaClient {
            server_address: self.server_address,
--- a/src/types/chat.rs
+++ b/src/types/chat.rs
@@ -1,28 +1,84 @@
+//! Types for the chat conversation endpoint (`POST /api/chat`).
+//!
+//! Use [`ChatRequest::builder()`] to construct a request and pass it to
+//! [`OllamaClient::chat()`](crate::OllamaClient::chat). The response is
+//! streamed as a sequence of [`ChatResponse`] chunks.
+//!
+//! # Examples
+//!
+//! ```no_run
+//! # use ollama_rs::OllamaClient;
+//! # use ollama_rs::types::chat::{ChatRequest, Message};
+//! # use futures_util::StreamExt;
+//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
+//! let client = OllamaClient::default();
+//!
+//! let request = ChatRequest::builder("llama3")
+//!     .messages(vec![
+//!         Message::system("You are a helpful assistant."),
+//!         Message::user("What is Rust?"),
+//!     ])
+//!     .build();
+//!
+//! let mut stream = client.chat(request);
+//! while let Some(chunk) = stream.next().await {
+//!     print!("{}", chunk?.message.content);
+//! }
+//! # Ok(())
+//! # }
+//! ```
+
 use serde::{Deserialize, Serialize};
 use serde_json::Value;

 use crate::error::OllamaResult;
 use crate::types::common::{Options, Think};

+/// The role of a participant in a chat conversation.
+///
+/// Serialized as a lowercase string: `"user"`, `"system"`, `"assistant"`, `"tool"`.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 #[serde(rename_all = "lowercase")]
 pub enum Role {
+    /// A human user.
    User,
+    /// A system prompt that sets the assistant's behavior.
    System,
+    /// The AI assistant.
    Assistant,
+    /// A tool response providing data back to the model.
    Tool,
 }

+/// A single message in a chat conversation.
+///
+/// Use the convenience constructors [`Message::system()`], [`Message::user()`],
+/// and [`Message::tool_response()`] to create messages for the common roles.
+/// Assistant messages are typically received from the model via [`ChatResponse`].
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct Message {
+    /// The text content of the message.
    pub content: String,
+    /// The role of the message sender.
    pub role: Role,
+    /// Tool calls requested by the assistant, if any.
+    ///
+    /// Empty for non-assistant messages. Omitted from serialization when empty.
    #[serde(skip_serializing_if = "Vec::is_empty")]
    #[serde(default)]
    pub tool_calls: Vec<ToolCall>,
 }

 impl Message {
+    /// Creates a system message that sets the assistant's behavior.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use ollama_rs::types::chat::Message;
+    ///
+    /// let msg = Message::system("You are a helpful assistant.");
+    /// ```
    pub fn system<T: Into<String>>(content: T) -> Self {
        Self {
            content: content.into(),
@@ -31,6 +87,15 @@ impl Message {
        }
    }

+    /// Creates a user message.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use ollama_rs::types::chat::Message;
+    ///
+    /// let msg = Message::user("Hello, how are you?");
+    /// ```
    pub fn user<T: Into<String>>(content: T) -> Self {
        Self {
            content: content.into(),
@@ -39,6 +104,23 @@ impl Message {
        }
    }

+    /// Creates a tool response message from a JSON value.
+    ///
+    /// The value is serialized to a JSON string and placed in the message content.
+    ///
+    /// # Errors
+    ///
+    /// Returns [`OllamaError::ResponseParseError`](crate::error::OllamaError::ResponseParseError)
+    /// if the value cannot be serialized.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use ollama_rs::types::chat::Message;
+    /// use serde_json::json;
+    ///
+    /// let msg = Message::tool_response(&json!({"temperature": 22.0})).unwrap();
+    /// ```
    pub fn tool_response(content: &Value) -> OllamaResult<Self> {
        Ok(Message {
            content: serde_json::to_string(content)?,
@@ -48,45 +130,89 @@ impl Message {
    }
 }

+/// A request to the chat endpoint (`POST /api/chat`).
+///
+/// Construct via [`ChatRequest::builder()`].
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::types::chat::{ChatRequest, Message};
+///
+/// let request = ChatRequest::builder("llama3")
+///     .messages(vec![Message::user("Hello!")])
+///     .stream(true)
+///     .build();
+/// ```
 #[derive(Debug, Serialize, Deserialize)]
 pub struct ChatRequest {
+    /// The model name to use for generation (e.g., `"llama3"`).
    pub model: String,

+    /// The conversation history as a list of messages.
    pub messages: Vec<Message>,

+    /// Whether to stream the response. When `None`, the server default applies.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stream: Option<bool>,

-    /// Runtime options that control text generation
+    /// Runtime options that control text generation behavior.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub options: Option<Options>,

+    /// Tool definitions available for the model to call.
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub tools: Vec<Tool>,

+    /// A JSON schema to constrain the response format for structured output.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub format: Option<Value>,

-    /// When set, returns separate thinking output in addition to content. Can be a boolean
-    /// (true/false) or a string ("high", "medium", "low") for supported models.
+    /// Controls extended-thinking (reasoning) mode. Can be a boolean
+    /// (`true`/`false`) or a level (`"high"`, `"medium"`, `"low"`) for
+    /// supported models.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub think: Option<Think>,
 }

 impl ChatRequest {
+    /// Returns a [`ChatRequestBuilder`] for the given model.
    pub fn builder<M: Into<String>>(model: M) -> ChatRequestBuilder {
        ChatRequestBuilder::new(model)
    }
 }

+/// A single chunk of a streaming chat response.
+///
+/// When streaming, each chunk contains a partial [`Message`]. The final chunk
+/// has [`done`](ChatResponse::done) set to `true`.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct ChatResponse {
+    /// The model that generated this response.
    pub model: String,
+    /// ISO 8601 timestamp of when this chunk was created.
    pub created_at: String,
+    /// The (partial) assistant message for this chunk.
    pub message: Message,
+    /// `true` when this is the final chunk of the response.
    pub done: bool,
 }

+/// A builder for constructing a [`ChatRequest`].
+///
+/// Obtain a builder via [`ChatRequest::builder()`].
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::types::chat::{ChatRequest, Message};
+/// use ollama_rs::types::common::Options;
+///
+/// let request = ChatRequest::builder("llama3")
+///     .messages(vec![Message::user("Hi")])
+///     .options(Options::builder().temperature(0.5).build())
+///     .build();
+/// ```
 pub struct ChatRequestBuilder {
    chat_request: ChatRequest,
 }
@@ -106,70 +232,114 @@ impl ChatRequestBuilder {
        }
    }

+    /// Sets the conversation history.
    pub fn messages(mut self, messages: Vec<Message>) -> Self {
        self.chat_request.messages = messages;
        self
    }

+    /// Sets runtime generation options.
    pub fn options(mut self, options: Options) -> Self {
        self.chat_request.options = Some(options);
        self
    }

+    /// Sets the tools available for the model to call.
    pub fn tools(mut self, tools: Vec<Tool>) -> Self {
        self.chat_request.tools = tools;
        self
    }

+    /// Sets whether to stream the response.
    pub fn stream(mut self, stream: bool) -> Self {
        self.chat_request.stream = Some(stream);
        self
    }

+    /// Sets a JSON schema to constrain the response format.
    pub fn format(mut self, json_schema: Value) -> Self {
        self.chat_request.format = Some(json_schema);
        self
    }

+    /// Enables or configures extended-thinking mode.
    pub fn think(mut self, think: Think) -> Self {
        self.chat_request.think = Some(think);
        self
    }

+    /// Consumes the builder and returns the configured [`ChatRequest`].
    pub fn build(self) -> ChatRequest {
        self.chat_request
    }
 }

+/// A tool definition that the model can invoke during a chat.
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::types::chat::{Tool, ToolType, Function};
+/// use serde_json::json;
+///
+/// let tool = Tool {
+///     tool_type: ToolType::Function,
+///     function: Function {
+///         name: "get_weather".to_string(),
+///         description: "Get current weather for a city".to_string(),
+///         parameters: json!({
+///             "type": "object",
+///             "properties": {
+///                 "city": { "type": "string" }
+///             },
+///             "required": ["city"]
+///         }),
+///     },
+/// };
+/// ```
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct Tool {
+    /// The type of tool (currently only `Function`).
    #[serde(rename = "type")]
    pub tool_type: ToolType,
+    /// The function definition.
    pub function: Function,
 }

+/// The kind of tool. Currently only [`Function`](ToolType::Function) is supported.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 #[serde(rename_all = "lowercase")]
 pub enum ToolType {
+    /// A callable function.
    Function,
 }

+/// A function definition for tool calling.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct Function {
+    /// The function name the model will use to invoke this tool.
    pub name: String,
+    /// A JSON Schema describing the function's parameters.
    pub parameters: Value,
+    /// A description of what the function does, to guide the model.
    pub description: String,
 }

+/// A tool call requested by the model in an assistant message.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct ToolCall {
+    /// The function the model wants to invoke.
    pub function: ToolCallFunction,
 }

+/// Details of a specific function call requested by the model.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct ToolCallFunction {
+    /// The name of the function to call.
    pub name: String,
+    /// The arguments to pass, as a JSON value.
    pub arguments: Value,
+    /// The index of this tool call within the message (for parallel calls).
    pub index: usize,
 }

--- a/src/types/common.rs
+++ b/src/types/common.rs
@@ -1,65 +1,132 @@
+//! Types shared across multiple Ollama API endpoints.
+//!
+//! This module provides:
+//!
+//! - [`Options`] / [`OptionsBuilder`] -- sampling and generation parameters.
+//! - [`Think`] / [`ThinkLevel`] -- controls for extended-thinking (reasoning) mode.
+//! - [`Stop`] -- stop-sequence configuration.
+//! - [`ModelDetails`] -- metadata returned when listing models.
+
 use serde::{Deserialize, Serialize};

+/// Detailed metadata about a model, returned by the tags and ps endpoints.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct ModelDetails {
+    /// The model file format (e.g., `"gguf"`).
    pub format: String,
+    /// The primary model family (e.g., `"llama"`).
    pub family: String,
+    /// Additional model families, if any (e.g., `["llama", "clip"]`).
    pub families: Option<Vec<String>>,
+    /// Human-readable parameter count (e.g., `"8B"`).
    pub parameter_size: String,
+    /// Quantization level (e.g., `"Q4_0"`).
    pub quantization_level: String,
 }

+/// Controls extended-thinking (reasoning) mode for supported models.
+///
+/// Can be a simple boolean toggle or a named level. Serialized as an untagged
+/// enum so `true`, `false`, `"high"`, `"medium"`, and `"low"` are all valid JSON
+/// representations.
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::types::common::Think;
+///
+/// // Enable thinking
+/// let think = Think::Bool(true);
+///
+/// // Use a specific thinking level
+/// use ollama_rs::types::common::ThinkLevel;
+/// let think = Think::Level(ThinkLevel::High);
+/// ```
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum Think {
+    /// Enable (`true`) or disable (`false`) thinking mode.
    Bool(bool),
+    /// Use a named thinking intensity level.
    Level(ThinkLevel),
 }

+/// Named intensity levels for extended-thinking mode.
+///
+/// Serialized as lowercase strings: `"high"`, `"medium"`, `"low"`.
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(rename_all = "lowercase")]
 pub enum ThinkLevel {
+    /// Maximum reasoning depth.
    High,
+    /// Balanced reasoning depth.
    Medium,
+    /// Minimal reasoning depth.
    Low,
 }

+/// Runtime options that control text generation behavior.
+///
+/// All fields are optional. Only fields set to `Some` are included in the
+/// serialized JSON request, letting the server apply its own defaults for
+/// omitted parameters.
+///
+/// Use [`Options::builder()`] for ergonomic construction.
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::types::common::{Options, Stop};
+///
+/// let options = Options::builder()
+///     .temperature(0.7)
+///     .top_k(40)
+///     .stop(Stop::Single("END".to_string()))
+///     .build();
+/// ```
 #[derive(Debug, Default, Serialize, Deserialize)]
 pub struct Options {
-    /// Random seed used for reproducible outputs
+    /// Random seed for reproducible outputs.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub seed: Option<u64>,

-    /// Controls randomness in generation (higher = more random)
+    /// Controls randomness in generation. Higher values (e.g., `1.5`) produce
+    /// more creative output; lower values (e.g., `0.2`) produce more
+    /// deterministic output.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f32>,

-    /// Limits next token selection to the K most likely
+    /// Limits the next-token selection to the *K* most likely tokens.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_k: Option<u32>,

-    /// Cumulative probability threshold for nucleus sampling
+    /// Cumulative probability threshold for nucleus sampling.
+    /// A value of `0.9` means only the smallest set of tokens whose cumulative
+    /// probability exceeds 90% are considered.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f32>,

-    /// Minimum probability threshold for token selection
+    /// Minimum probability threshold for token selection.
+    /// Tokens with probability below this value are discarded.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub min_p: Option<f32>,

-    /// Stop sequences that will halt generation
+    /// One or more stop sequences that will halt generation when produced.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stop: Option<Stop>,

-    /// Context length size (number of tokens)
+    /// Context window size in tokens. Determines how many tokens the model
+    /// can attend to at once.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub num_ctx: Option<u32>,

-    /// Maximum number of tokens to generate
+    /// Maximum number of tokens to generate in the response.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub num_predict: Option<u32>,
 }

 impl Options {
+    /// Returns an [`OptionsBuilder`] for constructing an `Options` value.
    pub fn builder() -> OptionsBuilder {
        OptionsBuilder {
            options: Options::default(),
@@ -67,60 +134,98 @@ impl Options {
    }
 }

+/// A builder for constructing [`Options`] with only the desired parameters set.
+///
+/// Obtain a builder via [`Options::builder()`].
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::types::common::Options;
+///
+/// let options = Options::builder()
+///     .seed(42)
+///     .temperature(0.8)
+///     .num_predict(256)
+///     .build();
+/// ```
 pub struct OptionsBuilder {
    options: Options,
 }

 impl OptionsBuilder {
+    /// Sets the random seed for reproducible outputs.
    pub fn seed(mut self, seed: u64) -> Self {
        self.options.seed = Some(seed);
        self
    }

+    /// Sets the temperature for generation randomness.
    pub fn temperature(mut self, temperature: f32) -> Self {
        self.options.temperature = Some(temperature);
        self
    }

+    /// Sets the top-K sampling parameter.
    pub fn top_k(mut self, top_k: u32) -> Self {
        self.options.top_k = Some(top_k);
        self
    }

+    /// Sets the nucleus sampling probability threshold.
    pub fn top_p(mut self, top_p: f32) -> Self {
        self.options.top_p = Some(top_p);
        self
    }

+    /// Sets the minimum probability threshold for token selection.
    pub fn min_p(mut self, min_p: f32) -> Self {
        self.options.min_p = Some(min_p);
        self
    }

+    /// Sets one or more stop sequences.
    pub fn stop(mut self, stop: Stop) -> Self {
        self.options.stop = Some(stop);
        self
    }

+    /// Sets the context window size in tokens.
    pub fn num_ctx(mut self, num_ctx: u32) -> Self {
        self.options.num_ctx = Some(num_ctx);
        self
    }

+    /// Sets the maximum number of tokens to generate.
    pub fn num_predict(mut self, num_predict: u32) -> Self {
        self.options.num_predict = Some(num_predict);
        self
    }

+    /// Consumes the builder and returns the configured [`Options`].
    pub fn build(self) -> Options {
        self.options
    }
 }

+/// Stop sequences that halt text generation when produced by the model.
+///
+/// Serialized as an untagged enum: a single string or an array of strings.
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::types::common::Stop;
+///
+/// let single = Stop::Single("END".to_string());
+/// let multiple = Stop::Multiple(vec!["END".to_string(), "STOP".to_string()]);
+/// ```
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum Stop {
+    /// A single stop sequence.
    Single(String),
+    /// Multiple stop sequences.
    Multiple(Vec<String>),
 }

--- a/src/types/generate.rs
+++ b/src/types/generate.rs
@@ -1,55 +1,113 @@
+//! Types for the text generation endpoint (`POST /api/generate`).
+//!
+//! Use [`GenerateRequest::builder()`] to construct a request and pass it to
+//! [`OllamaClient::generate()`](crate::OllamaClient::generate). The response
+//! is streamed as a sequence of [`GenerateResponse`] chunks.
+//!
+//! # Examples
+//!
+//! ```no_run
+//! # use ollama_rs::OllamaClient;
+//! # use ollama_rs::types::generate::GenerateRequest;
+//! # use futures_util::StreamExt;
+//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
+//! let client = OllamaClient::default();
+//!
+//! let request = GenerateRequest::builder("llama3")
+//!     .prompt("Why is the sky blue?")
+//!     .system_prompt("Answer in one sentence.")
+//!     .build();
+//!
+//! let mut stream = client.generate(request);
+//! while let Some(chunk) = stream.next().await {
+//!     print!("{}", chunk?.response);
+//! }
+//! # Ok(())
+//! # }
+//! ```
+
 use serde::{Deserialize, Serialize};
 use serde_json::Value;

 use crate::types::common::{Options, Think};

+/// A request to the text generation endpoint (`POST /api/generate`).
+///
+/// Construct via [`GenerateRequest::builder()`].
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::types::generate::GenerateRequest;
+///
+/// let request = GenerateRequest::builder("llama3")
+///     .prompt("Hello, world!")
+///     .build();
+/// ```
 #[derive(Debug, Serialize, Deserialize)]
 pub struct GenerateRequest {
-    /// Model name
+    /// The model name to use for generation (e.g., `"llama3"`).
    pub model: String,

-    /// Text for the model to generate a response from
+    /// The prompt text for the model to generate a response from.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub prompt: Option<String>,

-    /// Used for fill-in-the-middle models, text that appears after the user prompt and before the
-    /// model response
+    /// Text that appears after the user prompt and before the model response.
+    /// Used for fill-in-the-middle models.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub suffix: Option<String>,

-    /// System prompt for the model to generate a response from
+    /// A system prompt that sets the model's behavior.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub system: Option<String>,

-    /// When true, returns a stream of partial responses
+    /// Whether to stream the response. When `None`, the server default applies.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stream: Option<bool>,

-    /// Base64-encoded images for models that support image input
+    /// Base64-encoded images for multimodal models that support image input.
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub images: Vec<String>,

-    /// Structured output format for the model to generate a response from.
-    /// Supports either the string "json" or a JSON schema object
+    /// A structured output format constraint. Accepts the string `"json"` for
+    /// free-form JSON or a JSON Schema object for strict validation.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub format: Option<Value>,

-    /// When true, returns separate thinking output in addition to content. Can be a boolean
-    /// (true/false) or a string ("high", "medium", "low") for supported models.
+    /// Controls extended-thinking (reasoning) mode. Can be a boolean
+    /// (`true`/`false`) or a level (`"high"`, `"medium"`, `"low"`) for
+    /// supported models.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub think: Option<Think>,

-    /// Runtime options that control text generation
+    /// Runtime options that control text generation behavior.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub options: Option<Options>,
 }

 impl GenerateRequest {
+    /// Returns a [`GenerateRequestBuilder`] for the given model.
    pub fn builder<M: Into<String>>(model: M) -> GenerateRequestBuilder {
        GenerateRequestBuilder::new(model)
    }
 }

+/// A builder for constructing a [`GenerateRequest`].
+///
+/// Obtain a builder via [`GenerateRequest::builder()`].
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::types::generate::GenerateRequest;
+/// use ollama_rs::types::common::Options;
+///
+/// let request = GenerateRequest::builder("llama3")
+///     .prompt("Tell me a joke")
+///     .options(Options::builder().temperature(1.0).build())
+///     .build();
+/// ```
 pub struct GenerateRequestBuilder {
    generate_request: GenerateRequest,
 }
@@ -71,87 +129,109 @@ impl GenerateRequestBuilder {
        }
    }

+    /// Sets the system prompt that guides the model's behavior.
    pub fn system_prompt<P: Into<String>>(mut self, system_prompt: P) -> Self {
        self.generate_request.system = Some(system_prompt.into());
        self
    }

+    /// Sets the prompt text to generate a response from.
    pub fn prompt<P: Into<String>>(mut self, prompt: P) -> Self {
        self.generate_request.prompt = Some(prompt.into());
        self
    }

+    /// Sets whether to stream the response.
    pub fn stream(mut self, stream: bool) -> Self {
        self.generate_request.stream = Some(stream);
        self
    }

+    /// Sets the suffix for fill-in-the-middle generation.
    pub fn suffix(mut self, suffix: String) -> Self {
        self.generate_request.suffix = Some(suffix);
        self
    }

+    /// Sets base64-encoded images for multimodal generation.
    pub fn images(mut self, images: Vec<String>) -> Self {
        self.generate_request.images = images;
        self
    }

+    /// Sets a structured output format constraint (JSON or a JSON Schema).
    pub fn format<T: Into<Value>>(mut self, value: T) -> Self {
        self.generate_request.format = Some(value.into());
        self
    }

+    /// Enables or configures extended-thinking mode.
    pub fn think(mut self, think: Think) -> Self {
        self.generate_request.think = Some(think);
        self
    }

+    /// Sets runtime generation options.
    pub fn options(mut self, options: Options) -> Self {
        self.generate_request.options = Some(options);
        self
    }

+    /// Consumes the builder and returns the configured [`GenerateRequest`].
    pub fn build(self) -> GenerateRequest {
        self.generate_request
    }
 }

+/// A single chunk of a streaming text generation response.
+///
+/// When streaming, each chunk contains a fragment of the generated text in the
+/// [`response`](GenerateResponse::response) field. The final chunk has
+/// [`done`](GenerateResponse::done) set to `true` and includes performance
+/// statistics.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct GenerateResponse {
-    /// Model name
+    /// The model that generated this response.
    pub model: String,

-    /// ISO 8601 timestamp of response creation
+    /// ISO 8601 timestamp of when this chunk was created.
    pub created_at: String,

-    /// The model's generated text response
+    /// The generated text fragment for this chunk.
    pub response: String,

-    /// The model's generated thinking output
+    /// The model's thinking/reasoning output, if thinking mode was enabled.
    pub thinking: Option<String>,

-    /// Indicates whether generation has finished
+    /// `true` when this is the final chunk of the response.
    pub done: bool,

-    /// Reason the generation stopped
+    /// The reason generation stopped (e.g., `"stop"`). Only present in the
+    /// final chunk.
    pub done_reason: Option<String>,

-    /// Time spent generating the response in nanoseconds
+    /// Total time spent generating the response, in nanoseconds. Only present
+    /// in the final chunk.
    pub total_duration: Option<u64>,

-    /// Time spent loading the model in nanoseconds
+    /// Time spent loading the model, in nanoseconds. Only present in the final
+    /// chunk.
    pub load_duration: Option<u64>,

-    /// Number of input tokens in the prompt
+    /// Number of tokens in the evaluated prompt. Only present in the final
+    /// chunk.
    pub prompt_eval_count: Option<u64>,

-    /// Time spent evaluating the prompt in nanoseconds
+    /// Time spent evaluating the prompt, in nanoseconds. Only present in the
+    /// final chunk.
    pub prompt_eval_duration: Option<u64>,

-    /// Number of output tokens generated in the response
+    /// Number of tokens generated in the response. Only present in the final
+    /// chunk.
    pub eval_count: Option<u64>,

-    /// Time spent generating tokens in nanoseconds
+    /// Time spent generating output tokens, in nanoseconds. Only present in
+    /// the final chunk.
    pub eval_duration: Option<u64>,
 }

--- a/src/types/mod.rs
+++ b/src/types/mod.rs
@@ -1,3 +1,20 @@
+//! Request and response types for the Ollama API.
+//!
+//! Each submodule corresponds to an API endpoint:
+//!
+//! | Module       | Endpoint          | Description                              |
+//! |--------------|-------------------|------------------------------------------|
+//! | [`chat`]     | `POST /api/chat`     | Multi-turn chat conversations         |
+//! | [`generate`] | `POST /api/generate` | Single-prompt text generation         |
+//! | [`pull`]     | `POST /api/pull`     | Download models from the registry     |
+//! | [`tags`]     | `GET /api/tags`      | List available models                 |
+//! | [`ps`]       | `GET /api/ps`        | List currently loaded/running models  |
+//! | [`version`]  | `GET /api/version`   | Query the server version              |
+//!
+//! The [`common`] module contains types shared across multiple endpoints, such as
+//! [`Options`](common::Options) for generation parameters, [`Think`](common::Think)
+//! for reasoning mode, and [`ModelDetails`](common::ModelDetails).
+
 pub mod chat;
 pub mod common;
 pub mod generate;
--- a/src/types/ps.rs
+++ b/src/types/ps.rs
@@ -1,21 +1,39 @@
+//! Types for the running-models endpoint (`GET /api/ps`).
+//!
+//! The response from [`OllamaClient::ps()`](crate::OllamaClient::ps) is
+//! deserialized into a [`PsResponse`].
+
 use serde::{Deserialize, Serialize};

 use crate::types::common::ModelDetails;

+/// Response from the `GET /api/ps` endpoint.
+///
+/// Contains a list of models currently loaded in memory on the Ollama server.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct PsResponse {
+    /// The currently loaded models.
    pub models: Vec<RunningModel>,
 }

+/// A model that is currently loaded and running on the server.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct RunningModel {
+    /// The model name including tag (e.g., `"llama3:latest"`).
    pub name: String,
+    /// The model identifier.
    pub model: String,
+    /// Total model size in bytes.
    pub size: u64,
+    /// The SHA-256 digest of the model.
    pub digest: String,
+    /// Detailed model metadata.
    pub details: ModelDetails,
+    /// ISO 8601 timestamp of when the model will be unloaded from memory.
    pub expires_at: String,
+    /// Amount of VRAM the model occupies, in bytes.
    pub size_vram: u64,
+    /// The context length configured for this model instance.
    pub context_length: u32,
 }

--- a/src/types/pull.rs
+++ b/src/types/pull.rs
@@ -1,15 +1,56 @@
+//! Types for the model pull (download) endpoint (`POST /api/pull`).
+//!
+//! Use [`PullRequest::builder()`] to construct a request and pass it to
+//! [`OllamaClient::pull()`](crate::OllamaClient::pull). The response is
+//! streamed as a sequence of [`PullResponse`] status updates.
+//!
+//! # Examples
+//!
+//! ```no_run
+//! # use ollama_rs::OllamaClient;
+//! # use ollama_rs::types::pull::PullRequest;
+//! # use futures_util::StreamExt;
+//! # async fn run() -> ollama_rs::error::OllamaResult<()> {
+//! let client = OllamaClient::default();
+//! let request = PullRequest::builder("llama3").build();
+//!
+//! let mut stream = client.pull(request);
+//! while let Some(chunk) = stream.next().await {
+//!     println!("{}", chunk?.status);
+//! }
+//! # Ok(())
+//! # }
+//! ```
+
 use serde::{Deserialize, Serialize};

+/// A request to download a model from the Ollama registry (`POST /api/pull`).
+///
+/// Construct via [`PullRequest::builder()`].
+///
+/// # Examples
+///
+/// ```
+/// use ollama_rs::types::pull::PullRequest;
+///
+/// let request = PullRequest::builder("llama3")
+///     .stream(true)
+///     .build();
+/// ```
 #[derive(Debug, Serialize, Deserialize)]
 pub struct PullRequest {
+    /// The model name to pull (e.g., `"llama3"`, `"llama3:latest"`).
    pub model: String,
+    /// Allow insecure (HTTP) connections to the registry.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub insecure: Option<bool>,
+    /// Whether to stream status updates. When `None`, the server default applies.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stream: Option<bool>,
 }

 impl PullRequest {
+    /// Returns a [`PullRequestBuilder`] for the given model name.
    pub fn builder<M: Into<String>>(model: M) -> PullRequestBuilder {
        PullRequestBuilder {
            pull_request: PullRequest {
@@ -21,28 +62,37 @@ impl PullRequest {
    }
 }

+/// A builder for constructing a [`PullRequest`].
+///
+/// Obtain a builder via [`PullRequest::builder()`].
 pub struct PullRequestBuilder {
    pull_request: PullRequest,
 }

 impl PullRequestBuilder {
+    /// Sets whether to stream status updates.
    pub fn stream(mut self, stream: bool) -> Self {
        self.pull_request.stream = Some(stream);
        self
    }

+    /// Allows insecure (HTTP) connections to the model registry.
    pub fn insecure(mut self, insecure: bool) -> Self {
        self.pull_request.insecure = Some(insecure);
        self
    }

+    /// Consumes the builder and returns the configured [`PullRequest`].
    pub fn build(self) -> PullRequest {
        self.pull_request
    }
 }

+/// A streaming status update from the model pull operation.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct PullResponse {
+    /// A human-readable status message (e.g., `"pulling manifest"`,
+    /// `"downloading sha256:..."`).
    pub status: String,
 }

--- a/src/types/tags.rs
+++ b/src/types/tags.rs
@@ -1,19 +1,35 @@
+//! Types for the model listing endpoint (`GET /api/tags`).
+//!
+//! The response from [`OllamaClient::tags()`](crate::OllamaClient::tags) is
+//! deserialized into a [`TagsResponse`].
+
 use serde::{Deserialize, Serialize};

 use crate::types::common::ModelDetails;

+/// Response from the `GET /api/tags` endpoint.
+///
+/// Contains a list of all models available on the Ollama server.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct TagsResponse {
+    /// The available models.
    pub models: Vec<Model>,
 }

+/// An available model on the Ollama server.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct Model {
+    /// The model name including tag (e.g., `"llama3:latest"`).
    pub name: String,
+    /// The model identifier.
    pub model: String,
+    /// ISO 8601 timestamp of when the model was last modified.
    pub modified_at: String,
+    /// Total model size in bytes.
    pub size: u64,
+    /// The SHA-256 digest of the model.
    pub digest: String,
+    /// Detailed model metadata.
    pub details: ModelDetails,
 }

--- a/src/types/version.rs
+++ b/src/types/version.rs
@@ -1,7 +1,14 @@
+//! Types for the server version endpoint (`GET /api/version`).
+//!
+//! The response from [`OllamaClient::version()`](crate::OllamaClient::version)
+//! is deserialized into a [`VersionResponse`].
+
 use serde::{Deserialize, Serialize};

+/// Response from the `GET /api/version` endpoint.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct VersionResponse {
+    /// The Ollama server version string (e.g., `"0.6.2"`).
    pub version: String,
 }