use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize)] pub struct GenerateRequest { /// Model name pub model: String, #[serde(skip_serializing_if = "Option::is_none")] /// Text for the model to generate a response from pub prompt: Option, #[serde(skip_serializing_if = "Option::is_none")] /// Used for fill-in-the-middle models, text that appears after the user prompt and before the /// model response pub suffix: Option, /// System prompt for the model to generate a response from #[serde(skip_serializing_if = "Option::is_none")] pub system: Option, } impl GenerateRequest { pub fn builder>(model: M) -> GenerateRequestBuilder { GenerateRequestBuilder::new(model) } } pub struct GenerateRequestBuilder { generate_request: GenerateRequest, } impl GenerateRequestBuilder { fn new>(model: M) -> Self { Self { generate_request: GenerateRequest { model: model.into(), prompt: None, suffix: None, system: None, }, } } pub fn prompt>(mut self, prompt: P) -> Self { self.generate_request.prompt = Some(prompt.into()); self } pub fn build(self) -> GenerateRequest { self.generate_request } } #[derive(Debug, Serialize, Deserialize)] pub struct GenerateResponse { /// Model name pub model: String, /// ISO 8601 timestamp of response creation pub created_at: String, /// The model's generated text response pub response: String, /// The model's generated thinking output pub thinking: Option, /// Indicates whether generation has finished pub done: bool, /// Reason the generation stopped pub done_reason: Option, /// Time spent generating the response in nanoseconds pub total_duration: Option, /// Time spent loading the model in nanoseconds pub load_duration: Option, /// Number of input tokens in the prompt pub prompt_eval_count: Option, /// Time spent evaluating the prompt in nanoseconds pub prompt_eval_duration: Option, /// Number of output tokens generated in the response pub eval_count: Option, /// Time spent generating tokens in nanoseconds pub eval_duration: Option, }