Use wgpu by default and ndarray for convert

This commit is contained in:
Gadersd
2023-08-08 15:32:21 -04:00
committed by Ben_Kosytorz
parent 0101e8f930
commit d4afd71fda
5 changed files with 20 additions and 26 deletions

View File

@@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features] [features]
default = ["torch-backend"] default = ["wgpu-backend"]
torch-backend = ["burn-tch"] torch-backend = ["burn-tch"]
wgpu-backend = ["burn-wgpu"] wgpu-backend = ["burn-wgpu"]
@@ -22,6 +22,7 @@ optional = true
[dependencies] [dependencies]
burn = { git = "https://github.com/burn-rs/burn.git" } burn = { git = "https://github.com/burn-rs/burn.git" }
burn-ndarray = { package = "burn-ndarray", git = "https://github.com/burn-rs/burn.git" }
serde = {version = "1.0.171", features = ["std", "derive"]} serde = {version = "1.0.171", features = ["std", "derive"]}
npy = "0.4.0" npy = "0.4.0"
num-traits = "0.2.15" num-traits = "0.2.15"

View File

@@ -20,18 +20,19 @@ Start by downloading the SDv1-4.bin model provided on HuggingFace.
wget https://huggingface.co/Gadersd/Stable-Diffusion-Burn/resolve/main/V1/SDv1-4.bin wget https://huggingface.co/Gadersd/Stable-Diffusion-Burn/resolve/main/V1/SDv1-4.bin
``` ```
Next, set the appropriate CUDA version. It may be possible to run the model using wgpu without the need for torch in the future using `cargo run --features wgpu-backend...` but currently wgpu doesn't support buffer sizes large enough for Stable Diffusion.
```bash
export TORCH_CUDA_VERSION=cu113
```
### Step 2: Run the Sample Binary ### Step 2: Run the Sample Binary
Invoke the sample binary provided in the rust code, as shown below: Invoke the sample binary provided in the rust code. By default, wgpu is used which requires a gpu with at least 10 GB of VRAM (will be lower in the future), but torch can be used with the `torch-backend` feature and can run on a 6 GB gpu.
```bash ```bash
# wgpu (NEEDS >= 10 GB VRAM)
# Arguments: <model_type(burn or dump)> <model> <unconditional_guidance_scale> <n_diffusion_steps> <prompt> <output_image> # Arguments: <model_type(burn or dump)> <model> <unconditional_guidance_scale> <n_diffusion_steps> <prompt> <output_image>
cargo run --release --bin sample burn SDv1-4 7.5 20 "An ancient mossy stone." img cargo run --release --bin sample burn SDv1-4 7.5 20 "An ancient mossy stone." img
# torch (at least 6 GB VRAM, possibly less)
export TORCH_CUDA_VERSION=cu113
# Arguments: <model_type(burn or dump)> <model> <unconditional_guidance_scale> <n_diffusion_steps> <prompt> <output_image>
cargo run --release --features torch-backend --bin sample burn SDv1-4 7.5 20 "An ancient mossy stone." img
``` ```
This command will generate an image according to the provided prompt, which will be saved as 'img0.png'. This command will generate an image according to the provided prompt, which will be saved as 'img0.png'.

View File

@@ -14,13 +14,7 @@ use burn::{
}, },
}; };
cfg_if::cfg_if! { use burn_ndarray::{NdArrayBackend, NdArrayDevice};
if #[cfg(feature = "torch-backend")] {
use burn_tch::{TchBackend, TchDevice};
} else if #[cfg(feature = "wgpu-backend")] {
use burn_wgpu::{WgpuBackend, WgpuDevice, AutoGraphicsApi};
}
}
use burn::record::{self, Recorder, BinFileRecorder, FullPrecisionSettings}; use burn::record::{self, Recorder, BinFileRecorder, FullPrecisionSettings};
@@ -43,15 +37,8 @@ fn save_model_file<B: Backend>(model: StableDiffusion<B>, name: &str) -> Result<
} }
fn main() { fn main() {
cfg_if::cfg_if! { type Backend = NdArrayBackend<f32>;
if #[cfg(feature = "torch-backend")] { let device = NdArrayDevice::Cpu;
type Backend = TchBackend<f32>;
let device = TchDevice::Cpu;
} else if #[cfg(feature = "wgpu-backend")] {
type Backend = WgpuBackend<AutoGraphicsApi, f32, i32>;
let device = WgpuDevice::CPU;
}
}
let args: Vec<String> = env::args().collect(); let args: Vec<String> = env::args().collect();
if args.len() != 3 { if args.len() != 3 {

View File

@@ -74,11 +74,11 @@ fn main() {
process::exit(1); process::exit(1);
}) })
}; };
let sd = sd.to_device(&device); let sd = sd.to_device(&device);
let unconditional_context = sd.unconditional_context(&tokenizer); let unconditional_context = sd.unconditional_context(&tokenizer);
let context = sd.context(&tokenizer, prompt).unsqueeze().repeat(0, 2); // generate 2 samples let context = sd.context(&tokenizer, prompt).unsqueeze::<3>();//.repeat(0, 2); // generate 2 samples
println!("Sampling image..."); println!("Sampling image...");
let images = sd.sample_image(context, unconditional_context, unconditional_guidance_scale, n_steps); let images = sd.sample_image(context, unconditional_context, unconditional_guidance_scale, n_steps);

View File

@@ -59,6 +59,11 @@ impl<B: Backend> StableDiffusion<B> {
let [n_batch, _, _] = context.dims(); let [n_batch, _, _] = context.dims();
let latent = self.sample_latent(context, unconditional_context, unconditional_guidance_scale, n_steps); let latent = self.sample_latent(context, unconditional_context, unconditional_guidance_scale, n_steps);
self.latent_to_image(latent)
}
pub fn latent_to_image(&self, latent: Tensor<B, 4>) -> Vec<Vec<u8>> {
let [n_batch, _, _, _] = latent.dims();
let image = self.autoencoder.decode_latent(latent * (1.0 / 0.18215)); let image = self.autoencoder.decode_latent(latent * (1.0 / 0.18215));
let n_channel = 3; let n_channel = 3;
@@ -157,7 +162,7 @@ impl<B: Backend> StableDiffusion<B> {
} }
pub fn context(&self, tokenizer: &SimpleTokenizer, text: &str) -> Tensor<B, 3> { pub fn context(&self, tokenizer: &SimpleTokenizer, text: &str) -> Tensor<B, 3> {
let device = &self.devices()[0]; let device = &self.clip.devices()[0];
let text = format!("<|startoftext|>{}<|endoftext|>", text); let text = format!("<|startoftext|>{}<|endoftext|>", text);
let tokenized: Vec<_> = tokenizer.encode(&text).into_iter().map(|v| v as i32).collect(); let tokenized: Vec<_> = tokenizer.encode(&text).into_iter().map(|v| v as i32).collect();