diff --git a/src/bin/sample/main.rs b/src/bin/sample/main.rs index 4d0179e..6754a3e 100644 --- a/src/bin/sample/main.rs +++ b/src/bin/sample/main.rs @@ -61,7 +61,6 @@ fn main() { process::exit(1); }) }; - let sd = sd.to_device(&device); diff --git a/src/model/autoencoder/mod.rs b/src/model/autoencoder/mod.rs index 8daec5b..d3dbea7 100644 --- a/src/model/autoencoder/mod.rs +++ b/src/model/autoencoder/mod.rs @@ -348,6 +348,7 @@ pub struct PaddedConv2d { impl PaddedConv2d { fn forward(&self, x: Tensor) -> Tensor { + println!("{} {} {:?} {:?}", self.kernel_size, self.stride, self.padding, self.padding_actual); let [n_batch, n_channel, height, width] = x.dims(); let desired_height = (self.padding.pad_top + self.padding.pad_bottom + height - self.kernel_size) / self.stride + 1; diff --git a/src/model/stablediffusion/mod.rs b/src/model/stablediffusion/mod.rs index c3958bc..5390590 100644 --- a/src/model/stablediffusion/mod.rs +++ b/src/model/stablediffusion/mod.rs @@ -114,7 +114,6 @@ impl StableDiffusion { let timestep = Tensor::from_ints([t as i32]).to_device(&device); let pred_noise = self.forward_diffuser(latent.clone(), timestep, context.clone(), unconditional_context.clone(), unconditional_guidance_scale); - let predx0 = (latent - pred_noise.clone() * sqrt_noise) / current_alpha.sqrt(); let dir_latent = pred_noise * (1.0 - prev_alpha - sigma * sigma).sqrt(); diff --git a/src/model/unet/mod.rs b/src/model/unet/mod.rs index 033cc9b..d22c170 100644 --- a/src/model/unet/mod.rs +++ b/src/model/unet/mod.rs @@ -369,7 +369,6 @@ pub struct UpsampleConfig { impl UpsampleConfig { fn init(&self) -> Upsample { let conv = Conv2dConfig::new([self.n_channels, self.n_channels], [3, 3]) - .with_stride([2, 2]) .with_padding(PaddingConfig2d::Explicit(1, 1)) .init(); @@ -493,7 +492,7 @@ pub struct TransformerBlockConfig { impl TransformerBlockConfig { fn init(&self) -> TransformerBlock { let norm1 = nn::LayerNormConfig::new(self.n_state).init(); - let attn1 = MultiHeadAttentionConfig::new(self.n_state, self.n_context_state, self.n_head).init(); + let attn1 = MultiHeadAttentionConfig::new(self.n_state, self.n_state, self.n_head).init(); let norm2 = nn::LayerNormConfig::new(self.n_state).init(); let attn2 = MultiHeadAttentionConfig::new(self.n_state, self.n_context_state, self.n_head).init(); let norm3 = nn::LayerNormConfig::new(self.n_state).init();