mirror of
https://gitea.hainer-ernst.de/rasmus/burn-stablediffusion-vibecode.git
synced 2026-06-11 02:09:21 +00:00
Replace helper functions with native burn functions
This commit is contained in:
@@ -1,30 +1,20 @@
|
||||
pub mod load;
|
||||
|
||||
use burn::{
|
||||
config::Config,
|
||||
config::Config,
|
||||
module::{Module, Param},
|
||||
tensor::{
|
||||
backend::Backend,
|
||||
Tensor,
|
||||
Int,
|
||||
Float,
|
||||
BasicOps,
|
||||
Data,
|
||||
Distribution,
|
||||
},
|
||||
tensor::{backend::Backend, BasicOps, Data, Distribution, Float, Int, Tensor},
|
||||
};
|
||||
|
||||
use num_traits::ToPrimitive;
|
||||
|
||||
use super::autoencoder::{Autoencoder, AutoencoderConfig};
|
||||
use super::clip::{CLIPConfig, CLIP};
|
||||
use super::unet::{UNet, UNetConfig};
|
||||
use super::clip::{CLIP, CLIPConfig};
|
||||
use crate::tokenizer::SimpleTokenizer;
|
||||
|
||||
#[derive(Config)]
|
||||
pub struct StableDiffusionConfig {
|
||||
|
||||
}
|
||||
pub struct StableDiffusionConfig {}
|
||||
|
||||
impl StableDiffusionConfig {
|
||||
pub fn init<B: Backend>(&self) -> StableDiffusion<B> {
|
||||
@@ -36,29 +26,40 @@ impl StableDiffusionConfig {
|
||||
let clip = CLIPConfig::new(49408, 768, 12, 77, 12).init();
|
||||
|
||||
StableDiffusion {
|
||||
n_steps,
|
||||
alpha_cumulative_products,
|
||||
autoencoder,
|
||||
diffusion,
|
||||
clip,
|
||||
n_steps,
|
||||
alpha_cumulative_products,
|
||||
autoencoder,
|
||||
diffusion,
|
||||
clip,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct StableDiffusion<B: Backend> {
|
||||
n_steps: usize,
|
||||
alpha_cumulative_products: Param<Tensor<B, 1>>,
|
||||
autoencoder: Autoencoder<B>,
|
||||
diffusion: UNet<B>,
|
||||
clip: CLIP<B>,
|
||||
n_steps: usize,
|
||||
alpha_cumulative_products: Param<Tensor<B, 1>>,
|
||||
autoencoder: Autoencoder<B>,
|
||||
diffusion: UNet<B>,
|
||||
clip: CLIP<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> StableDiffusion<B> {
|
||||
pub fn sample_image(&self, context: Tensor<B, 3>, unconditional_context: Tensor<B, 2>, unconditional_guidance_scale: f64, n_steps: usize) -> Vec<Vec<u8>> {
|
||||
pub fn sample_image(
|
||||
&self,
|
||||
context: Tensor<B, 3>,
|
||||
unconditional_context: Tensor<B, 2>,
|
||||
unconditional_guidance_scale: f64,
|
||||
n_steps: usize,
|
||||
) -> Vec<Vec<u8>> {
|
||||
let [n_batch, _, _] = context.dims();
|
||||
|
||||
let latent = self.sample_latent(context, unconditional_context, unconditional_guidance_scale, n_steps);
|
||||
let latent = self.sample_latent(
|
||||
context,
|
||||
unconditional_context,
|
||||
unconditional_guidance_scale,
|
||||
n_steps,
|
||||
);
|
||||
self.latent_to_image(latent)
|
||||
}
|
||||
|
||||
@@ -71,7 +72,7 @@ impl<B: Backend> StableDiffusion<B> {
|
||||
let width = 512;
|
||||
let num_elements_per_image = n_channel * height * width;
|
||||
|
||||
// correct size and scale and reorder to
|
||||
// correct size and scale and reorder to
|
||||
let image = (image + 1.0) / 2.0;
|
||||
let image = image
|
||||
.reshape([n_batch, n_channel, height, width])
|
||||
@@ -79,19 +80,29 @@ impl<B: Backend> StableDiffusion<B> {
|
||||
.swap_dims(2, 3)
|
||||
.mul_scalar(255.0);
|
||||
|
||||
let flattened: Vec<_> = image.
|
||||
into_data().
|
||||
value;
|
||||
let flattened: Vec<_> = image.into_data().value;
|
||||
|
||||
(0..n_batch).into_iter().map(|b| {
|
||||
let start = b * num_elements_per_image;
|
||||
let end = start + num_elements_per_image;
|
||||
(0..n_batch)
|
||||
.into_iter()
|
||||
.map(|b| {
|
||||
let start = b * num_elements_per_image;
|
||||
let end = start + num_elements_per_image;
|
||||
|
||||
flattened[start..end].into_iter().map(|v| v.to_f64().unwrap().min(255.0).max(0.0).to_u8().unwrap()).collect()
|
||||
}).collect()
|
||||
flattened[start..end]
|
||||
.into_iter()
|
||||
.map(|v| v.to_f64().unwrap().min(255.0).max(0.0).to_u8().unwrap())
|
||||
.collect()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn sample_latent(&self, context: Tensor<B, 3>, unconditional_context: Tensor<B, 2>, unconditional_guidance_scale: f64, n_steps: usize) -> Tensor<B, 4> {
|
||||
pub fn sample_latent(
|
||||
&self,
|
||||
context: Tensor<B, 3>,
|
||||
unconditional_context: Tensor<B, 2>,
|
||||
unconditional_guidance_scale: f64,
|
||||
n_steps: usize,
|
||||
) -> Tensor<B, 4> {
|
||||
let device = context.device();
|
||||
|
||||
let step_size = self.n_steps / n_steps;
|
||||
@@ -99,7 +110,8 @@ impl<B: Backend> StableDiffusion<B> {
|
||||
let [n_batches, _, _] = context.dims();
|
||||
|
||||
let gen_noise = || {
|
||||
Tensor::random([n_batches, 4, 64, 64], Distribution::Normal(0.0, 1.0)).to_device(&device)
|
||||
Tensor::random([n_batches, 4, 64, 64], Distribution::Normal(0.0, 1.0))
|
||||
.to_device(&device)
|
||||
};
|
||||
|
||||
let sigma = 0.0; // Use deterministic diffusion
|
||||
@@ -107,10 +119,21 @@ impl<B: Backend> StableDiffusion<B> {
|
||||
let mut latent = gen_noise();
|
||||
|
||||
for t in (0..self.n_steps).rev().step_by(step_size) {
|
||||
let current_alpha: f64 = self.alpha_cumulative_products.val().slice([t..t + 1]).into_scalar().to_f64().unwrap();
|
||||
let current_alpha: f64 = self
|
||||
.alpha_cumulative_products
|
||||
.val()
|
||||
.slice([t..t + 1])
|
||||
.into_scalar()
|
||||
.to_f64()
|
||||
.unwrap();
|
||||
let prev_alpha: f64 = if t >= step_size {
|
||||
let i = t - step_size;
|
||||
self.alpha_cumulative_products.val().slice([i..i + 1]).into_scalar().to_f64().unwrap()
|
||||
self.alpha_cumulative_products
|
||||
.val()
|
||||
.slice([i..i + 1])
|
||||
.into_scalar()
|
||||
.to_f64()
|
||||
.unwrap()
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
@@ -118,7 +141,13 @@ impl<B: Backend> StableDiffusion<B> {
|
||||
let sqrt_noise = (1.0 - current_alpha).sqrt();
|
||||
|
||||
let timestep = Tensor::from_ints([t as i32]).to_device(&device);
|
||||
let pred_noise = self.forward_diffuser(latent.clone(), timestep, context.clone(), unconditional_context.clone(), unconditional_guidance_scale);
|
||||
let pred_noise = self.forward_diffuser(
|
||||
latent.clone(),
|
||||
timestep,
|
||||
context.clone(),
|
||||
unconditional_context.clone(),
|
||||
unconditional_guidance_scale,
|
||||
);
|
||||
let predx0 = (latent - pred_noise.clone() * sqrt_noise) / current_alpha.sqrt();
|
||||
let dir_latent = pred_noise * (1.0 - prev_alpha - sigma * sigma).sqrt();
|
||||
|
||||
@@ -129,32 +158,36 @@ impl<B: Backend> StableDiffusion<B> {
|
||||
latent
|
||||
}
|
||||
|
||||
fn forward_diffuser(&self, latent: Tensor<B, 4>, timestep: Tensor<B, 1, Int>, context: Tensor<B, 3>, unconditional_context: Tensor<B, 2>, unconditional_guidance_scale: f64) -> Tensor<B, 4> {
|
||||
fn forward_diffuser(
|
||||
&self,
|
||||
latent: Tensor<B, 4>,
|
||||
timestep: Tensor<B, 1, Int>,
|
||||
context: Tensor<B, 3>,
|
||||
unconditional_context: Tensor<B, 2>,
|
||||
unconditional_guidance_scale: f64,
|
||||
) -> Tensor<B, 4> {
|
||||
let [n_batch, _, _, _] = latent.dims();
|
||||
//let latent = latent.repeat(0, 2);
|
||||
|
||||
let unconditional_latent = self.diffusion.forward(
|
||||
latent.clone(),
|
||||
timestep.clone(),
|
||||
unconditional_context.unsqueeze().repeat(0, n_batch)
|
||||
latent.clone(),
|
||||
timestep.clone(),
|
||||
unconditional_context.unsqueeze().repeat(0, n_batch),
|
||||
);
|
||||
|
||||
let conditional_latent = self.diffusion.forward(
|
||||
latent,
|
||||
timestep,
|
||||
context
|
||||
);
|
||||
let conditional_latent = self.diffusion.forward(latent, timestep, context);
|
||||
|
||||
/*let latent = self.diffusion.forward(
|
||||
latent.repeat(0, 2),
|
||||
timestep.repeat(0, 2),
|
||||
latent.repeat(0, 2),
|
||||
timestep.repeat(0, 2),
|
||||
Tensor::cat(vec![unconditional_context.unsqueeze::<3>(), context], 0)
|
||||
);
|
||||
|
||||
let unconditional_latent = latent.clone().slice([0..n_batch]);
|
||||
let conditional_latent = latent.slice([n_batch..2 * n_batch]);*/
|
||||
|
||||
unconditional_latent.clone() + (conditional_latent - unconditional_latent) * unconditional_guidance_scale
|
||||
unconditional_latent.clone()
|
||||
+ (conditional_latent - unconditional_latent) * unconditional_guidance_scale
|
||||
}
|
||||
|
||||
pub fn unconditional_context(&self, tokenizer: &SimpleTokenizer) -> Tensor<B, 2> {
|
||||
@@ -164,17 +197,25 @@ impl<B: Backend> StableDiffusion<B> {
|
||||
pub fn context(&self, tokenizer: &SimpleTokenizer, text: &str) -> Tensor<B, 3> {
|
||||
let device = &self.clip.devices()[0];
|
||||
let text = format!("<|startoftext|>{}<|endoftext|>", text);
|
||||
let tokenized: Vec<_> = tokenizer.encode(&text).into_iter().map(|v| v as i32).collect();
|
||||
let tokenized: Vec<_> = tokenizer
|
||||
.encode(&text)
|
||||
.into_iter()
|
||||
.map(|v| v as i32)
|
||||
.collect();
|
||||
|
||||
self.clip.forward(Tensor::from_ints(&tokenized[..]).to_device(device).unsqueeze())
|
||||
self.clip.forward(
|
||||
Tensor::from_ints(&tokenized[..])
|
||||
.to_device(device)
|
||||
.unsqueeze(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
use crate::helper::to_float;
|
||||
use std::f64::consts::PI;
|
||||
|
||||
fn cosine_schedule<B: Backend>(n_steps: usize) -> Tensor<B, 1> {
|
||||
to_float(Tensor::arange(1..n_steps + 1))
|
||||
Tensor::arange(1..n_steps + 1)
|
||||
.float()
|
||||
.mul_scalar(PI * 0.5 / n_steps as f64)
|
||||
.cos()
|
||||
}
|
||||
@@ -185,12 +226,12 @@ fn offset_cosine_schedule<B: Backend>(n_steps: usize) -> Tensor<B, 1> {
|
||||
let start_angle = max_signal_rate.acos();
|
||||
let end_angle = min_signal_rate.acos();
|
||||
|
||||
let times = Tensor::arange(1..n_steps + 1);
|
||||
let times = Tensor::arange(1..n_steps + 1).float();
|
||||
|
||||
let diffusion_angles = to_float(times) * ( (end_angle - start_angle) / n_steps as f64) + start_angle;
|
||||
let diffusion_angles = times * ((end_angle - start_angle) / n_steps as f64) + start_angle;
|
||||
diffusion_angles.cos()
|
||||
}
|
||||
|
||||
fn offset_cosine_schedule_cumprod<B: Backend>(n_steps: usize) -> Tensor<B, 1> {
|
||||
offset_cosine_schedule::<B>(n_steps).powf(2.0)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user