Add first successful sampling implementation

This commit is contained in:
Gadersd
2023-08-04 17:01:44 -04:00
committed by Ben_Kosytorz
parent b794e9a9ec
commit 8e7a8d9be4
9 changed files with 42 additions and 34 deletions

View File

@@ -1,5 +1,3 @@
#![feature(generic_const_exprs)]
pub mod model;
pub mod tokenizer;
pub mod helper;

View File

@@ -65,16 +65,23 @@ fn main() {
let output = unet.forward(input, timesteps, context);*/
//print_tensor(output);
println!("Loading tokenizer...");
let tokenizer = SimpleTokenizer::new().unwrap();
println!("Loading Stable Diffusion...");
let sd: StableDiffusion<Backend> = load_stable_diffusion("params", &device).unwrap();
let sd = sd.to_device(&device);
let unconditional_guidance_scale = 7.5;
let unconditional_context = sd.unconditional_context(&tokenizer);
let context = sd.context(&tokenizer, "A rainbow pony is flying.").unsqueeze();
let context = sd.context(&tokenizer, "A wine glass filled with pink flower petals.").unsqueeze();
let n_steps = 5;
let n_steps = 100;
println!("Sampling images...");
let images = sd.sample_image(context, unconditional_context, unconditional_guidance_scale, n_steps);
println!("Saving images...");
save_images(&images, "image_samples/", 512, 512).unwrap();
}

View File

@@ -35,7 +35,7 @@ pub fn qkv_attention<B: Backend>(q: Tensor<B, 3>, k: Tensor<B, 3>, v: Tensor<B,
return o;
}
pub fn attn_decoder_mask<B: Backend>(seq_length: usize) -> Tensor<B, 2> {
pub fn attn_decoder_mask<B: Backend>(seq_length: usize, device: &B::Device) -> Tensor<B, 2> {
let mut mask = Tensor::<B, 2>::zeros([seq_length, seq_length]);
for i in 0..(seq_length - 1) {
@@ -43,5 +43,5 @@ pub fn attn_decoder_mask<B: Backend>(seq_length: usize) -> Tensor<B, 2> {
mask = mask.slice_assign([i..i + 1, i + 1..seq_length], values);
}
return mask;
return mask.to_device(device);
}

View File

@@ -43,14 +43,6 @@ impl AutoencoderConfig {
}
fn print_tensor<B: Backend>(x: Tensor<B, 4>) {
let [_, channels, height, width] = x.dims();
let channels = channels.min(10);
let data = x.slice([0..1, 0..channels, 0..height, 0..width]).into_data();
println!("{:?}", data);
}
#[derive(Module, Debug)]
pub struct Autoencoder<B: Backend> {
encoder: Encoder<B>,

View File

@@ -59,7 +59,7 @@ impl<B: Backend> CLIP<B> {
pub fn forward(&self, x: Tensor<B, 2, Int>) -> Tensor<B, 3> {
let [n_batch, seq_len] = x.dims();
let mask = attn_decoder_mask(seq_len);
let mask = attn_decoder_mask(seq_len, &x.device());
let embedded = self.token_embedding.forward(x)
+ self.position_embedding.val().slice([0..seq_len]).unsqueeze();

View File

@@ -85,19 +85,21 @@ impl<B: Backend> StableDiffusion<B> {
let start = b * num_elements_per_image;
let end = start + num_elements_per_image;
flattened[start..end].into_iter().map(|v| v.to_u8().unwrap()).collect()
flattened[start..end].into_iter().map(|v| v.to_f64().unwrap().min(255.0).max(0.0).to_u8().unwrap()).collect()
}).collect()
}
pub fn sample_latent(&self, context: Tensor<B, 3>, unconditional_context: Tensor<B, 2>, unconditional_guidance_scale: f64, n_steps: usize) -> Tensor<B, 4> {
assert!(self.n_steps % n_steps == 0);
let device = context.device();
let step_size = self.n_steps / n_steps;
let [n_batches, _, _] = context.dims();
let gen_noise = || {
Tensor::random([n_batches, 4, 64, 64], Distribution::Normal(0.0, 1.0) )
Tensor::random([n_batches, 4, 64, 64], Distribution::Normal(0.0, 1.0)).to_device(&device)
};
let sigma = 0.0; // Use deterministic diffusion
@@ -114,7 +116,7 @@ impl<B: Backend> StableDiffusion<B> {
let sqrt_noise = (1.0 - current_alpha).sqrt();
let timestep = Tensor::from_ints([t as i32]);
let timestep = Tensor::from_ints([t as i32]).to_device(&device);
let pred_noise = self.forward_diffuser(latent.clone(), timestep, context.clone(), unconditional_context.clone(), unconditional_guidance_scale);
let predx0 = (latent - pred_noise.clone() * sqrt_noise) / current_alpha.sqrt();
@@ -128,17 +130,29 @@ impl<B: Backend> StableDiffusion<B> {
}
fn forward_diffuser(&self, latent: Tensor<B, 4>, timestep: Tensor<B, 1, Int>, context: Tensor<B, 3>, unconditional_context: Tensor<B, 2>, unconditional_guidance_scale: f64) -> Tensor<B, 4> {
let [n_batch, n_channel, height, width] = latent.dims();
let latent = latent.repeat(0, 2);
///let [n_batch, n_channel, height, width] = latent.dims();
//let latent = latent.repeat(0, 2);
let latent = self.diffusion.forward(
let unconditional_latent = self.diffusion.forward(
latent.clone(),
timestep.clone(),
unconditional_context.unsqueeze()
);
let conditional_latent = self.diffusion.forward(
latent,
timestep,
context
);
/*let latent = self.diffusion.forward(
latent.repeat(0, 2),
timestep.repeat(0, 2),
Tensor::cat(vec![unconditional_context.unsqueeze::<3>(), context], 0)
);
let unconditional_latent = latent.clone().slice([0..n_batch]);
let conditional_latent = latent.slice([n_batch..2 * n_batch]);
let conditional_latent = latent.slice([n_batch..2 * n_batch]);*/
unconditional_latent.clone() + (conditional_latent - unconditional_latent) * unconditional_guidance_scale
}
@@ -148,10 +162,11 @@ impl<B: Backend> StableDiffusion<B> {
}
pub fn context(&self, tokenizer: &SimpleTokenizer, text: &str) -> Tensor<B, 3> {
let device = &self.devices()[0];
let text = format!("<|startoftext|>{}<|endoftext|>", text);
let tokenized: Vec<_> = tokenizer.encode(&text).into_iter().map(|v| v as i32).collect();
self.clip.forward(Tensor::from_ints(&tokenized[..]).unsqueeze())
self.clip.forward(Tensor::from_ints(&tokenized[..]).to_device(device).unsqueeze())
}
}

View File

@@ -113,7 +113,6 @@ impl<B: Backend> UNet<B> {
// input blocks
for block in self.input_blocks.as_array() {
println!("{:?}", x.clone().flatten::<1>(0, 3).slice([0..100]).into_data());
x = block.forward(x, emb.clone(), context.clone());
saved_inputs.push(x.clone())
}