Skip to content

Commit 6727d1c

Browse files
committed
add --clip-on-cpu
1 parent df28af9 commit 6727d1c

File tree

5 files changed

+85
-26
lines changed

5 files changed

+85
-26
lines changed

clip.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ struct CLIPEncoder : public GGMLBlock {
567567
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x, int clip_skip = -1, bool mask = true) {
568568
// x: [N, n_token, d_model]
569569
int layer_idx = n_layer - 1;
570-
LOG_DEBUG("clip_skip %d", clip_skip);
570+
// LOG_DEBUG("clip_skip %d", clip_skip);
571571
if (clip_skip > 0) {
572572
layer_idx = n_layer - clip_skip;
573573
}

examples/cli/main.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ struct SDParams {
100100
bool vae_tiling = false;
101101
bool control_net_cpu = false;
102102
bool normalize_input = false;
103+
bool clip_on_cpu = false;
103104
bool vae_on_cpu = false;
104105
bool canny_preprocess = false;
105106
int upscale_repeats = 1;
@@ -123,6 +124,7 @@ void print_params(SDParams params) {
123124
printf(" output_path: %s\n", params.output_path.c_str());
124125
printf(" init_img: %s\n", params.input_path.c_str());
125126
printf(" control_image: %s\n", params.control_image_path.c_str());
127+
printf(" clip on cpu: %s\n", params.clip_on_cpu ? "true" : "false");
126128
printf(" controlnet cpu: %s\n", params.control_net_cpu ? "true" : "false");
127129
printf(" vae decoder on cpu:%s\n", params.vae_on_cpu ? "true" : "false");
128130
printf(" strength(control): %.2f\n", params.control_strength);
@@ -396,6 +398,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
396398
params.control_net_cpu = true;
397399
} else if (arg == "--normalize-input") {
398400
params.normalize_input = true;
401+
} else if (arg == "--clip-on-cpu") {
402+
params.clip_on_cpu = true; // will slow down get_learned_condiotion but necessary for low MEM GPUs
399403
} else if (arg == "--vae-on-cpu") {
400404
params.vae_on_cpu = true; // will slow down latent decoding but necessary for low MEM GPUs
401405
} else if (arg == "--canny") {
@@ -658,6 +662,7 @@ int main(int argc, const char* argv[]) {
658662
params.wtype,
659663
params.rng_type,
660664
params.schedule,
665+
params.clip_on_cpu,
661666
params.control_net_cpu,
662667
params.vae_on_cpu);
663668

ggml_extend.hpp

+9-3
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,10 @@ struct GGMLModule {
829829

830830
// compute the required memory
831831
size_t compute_buffer_size = ggml_gallocr_get_buffer_size(compute_allocr, 0);
832-
LOG_DEBUG("%s compute buffer size: %.2f MB", get_desc().c_str(), compute_buffer_size / 1024.0 / 1024.0);
832+
LOG_DEBUG("%s compute buffer size: %.2f MB(%s)",
833+
get_desc().c_str(),
834+
compute_buffer_size / 1024.0 / 1024.0,
835+
ggml_backend_is_cpu(backend) ? "RAM" : "VRAM");
833836
return true;
834837
}
835838

@@ -874,8 +877,11 @@ struct GGMLModule {
874877
return false;
875878
}
876879
size_t params_buffer_size = ggml_backend_buffer_get_size(params_buffer);
877-
LOG_DEBUG("%s params backend buffer size = % 6.2f MB (%i tensors)",
878-
get_desc().c_str(), params_buffer_size / (1024.0 * 1024.0), num_tensors);
880+
LOG_DEBUG("%s params backend buffer size = % 6.2f MB(%s) (%i tensors)",
881+
get_desc().c_str(),
882+
params_buffer_size / (1024.0 * 1024.0),
883+
ggml_backend_is_cpu(backend) ? "RAM" : "VRAM",
884+
num_tensors);
879885
return true;
880886
}
881887

stable-diffusion.cpp

+69-22
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,11 @@ void calculate_alphas_cumprod(float* alphas_cumprod,
6565

6666
class StableDiffusionGGML {
6767
public:
68-
ggml_backend_t backend = NULL; // general backend
69-
ggml_type model_data_type = GGML_TYPE_COUNT;
68+
ggml_backend_t backend = NULL; // general backend
69+
ggml_backend_t clip_backend = NULL;
70+
ggml_backend_t control_net_backend = NULL;
71+
ggml_backend_t vae_backend = NULL;
72+
ggml_type model_data_type = GGML_TYPE_COUNT;
7073

7174
SDVersion version;
7275
bool vae_decode_only = false;
@@ -120,6 +123,9 @@ class StableDiffusionGGML {
120123

121124
~StableDiffusionGGML() {
122125
ggml_backend_free(backend);
126+
ggml_backend_free(clip_backend);
127+
ggml_backend_free(control_net_backend);
128+
ggml_backend_free(vae_backend);
123129
}
124130

125131
bool load_from_file(const std::string& model_path,
@@ -131,6 +137,7 @@ class StableDiffusionGGML {
131137
bool vae_tiling_,
132138
ggml_type wtype,
133139
schedule_t schedule,
140+
bool clip_on_cpu,
134141
bool control_net_cpu,
135142
bool vae_on_cpu) {
136143
use_tiny_autoencoder = taesd_path.size() > 0;
@@ -212,7 +219,12 @@ class StableDiffusionGGML {
212219
first_stage_model->alloc_params_buffer();
213220
first_stage_model->get_param_tensors(tensors, "first_stage_model");
214221
} else {
215-
cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(backend, model_data_type, version);
222+
clip_backend = backend;
223+
if (clip_on_cpu && !ggml_backend_is_cpu(backend)) {
224+
LOG_INFO("CLIP: Using CPU backend");
225+
clip_backend = ggml_backend_cpu_init();
226+
}
227+
cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(clip_backend, model_data_type, version);
216228
cond_stage_model->alloc_params_buffer();
217229
cond_stage_model->get_param_tensors(tensors, "cond_stage_model.");
218230

@@ -228,7 +240,6 @@ class StableDiffusionGGML {
228240
}
229241

230242
if (!use_tiny_autoencoder) {
231-
ggml_backend_t vae_backend = NULL;
232243
if (vae_on_cpu && !ggml_backend_is_cpu(backend)) {
233244
LOG_INFO("VAE Autoencoder: Using CPU backend");
234245
vae_backend = ggml_backend_cpu_init();
@@ -244,19 +255,19 @@ class StableDiffusionGGML {
244255
// first_stage_model->get_param_tensors(tensors, "first_stage_model.");
245256

246257
if (control_net_path.size() > 0) {
247-
ggml_backend_t cn_backend = NULL;
258+
ggml_backend_t controlnet_backend = NULL;
248259
if (control_net_cpu && !ggml_backend_is_cpu(backend)) {
249260
LOG_DEBUG("ControlNet: Using CPU backend");
250-
cn_backend = ggml_backend_cpu_init();
261+
controlnet_backend = ggml_backend_cpu_init();
251262
} else {
252-
cn_backend = backend;
263+
controlnet_backend = backend;
253264
}
254-
control_net = std::make_shared<ControlNet>(cn_backend, model_data_type, version);
265+
control_net = std::make_shared<ControlNet>(controlnet_backend, model_data_type, version);
255266
}
256267

257-
pmid_model = std::make_shared<PhotoMakerIDEncoder>(backend, GGML_TYPE_F32, version);
268+
pmid_model = std::make_shared<PhotoMakerIDEncoder>(clip_backend, model_data_type, version);
258269
if (id_embeddings_path.size() > 0) {
259-
pmid_lora = std::make_shared<LoraModel>(backend, GGML_TYPE_F32, id_embeddings_path, "");
270+
pmid_lora = std::make_shared<LoraModel>(backend, model_data_type, id_embeddings_path, "");
260271
if (!pmid_lora->load_from_file(true)) {
261272
LOG_WARN("load photomaker lora tensors from %s failed", id_embeddings_path.c_str());
262273
return false;
@@ -359,15 +370,49 @@ class StableDiffusionGGML {
359370
pmid_params_mem_size = pmid_model->get_params_mem_size();
360371
}
361372

362-
size_t total_params_size = clip_params_mem_size + clip_params_mem_size +
363-
clip_params_mem_size + control_net_params_mem_size + pmid_params_mem_size;
364-
LOG_INFO("total params memory size = %.2fMB (clip %.2fMB, unet %.2fMB, vae %.2fMB, controlnet %.2fMB, pmid %.2fMB)",
365-
total_params_size / 1024.0 / 1024.0,
366-
clip_params_mem_size / 1024.0 / 1024.0,
367-
unet_params_mem_size / 1024.0 / 1024.0,
368-
vae_params_mem_size / 1024.0 / 1024.0,
369-
control_net_params_mem_size / 1024.0 / 1024.0,
370-
pmid_params_mem_size / 1024.0 / 1024.0);
373+
size_t total_params_ram_size = 0;
374+
size_t total_params_vram_size = 0;
375+
if (ggml_backend_is_cpu(clip_backend)) {
376+
total_params_ram_size += clip_params_mem_size + pmid_params_mem_size;
377+
} else {
378+
total_params_vram_size += clip_params_mem_size + pmid_params_mem_size;
379+
}
380+
381+
if (ggml_backend_is_cpu(backend)) {
382+
total_params_ram_size += unet_params_mem_size;
383+
} else {
384+
total_params_vram_size += unet_params_mem_size;
385+
}
386+
387+
if (ggml_backend_is_cpu(vae_backend)) {
388+
total_params_ram_size += vae_params_mem_size;
389+
} else {
390+
total_params_vram_size += vae_params_mem_size;
391+
}
392+
393+
if (ggml_backend_is_cpu(control_net_backend)) {
394+
total_params_ram_size += control_net_params_mem_size;
395+
} else {
396+
total_params_vram_size += control_net_params_mem_size;
397+
}
398+
399+
size_t total_params_size = total_params_ram_size + total_params_vram_size;
400+
LOG_INFO(
401+
"total params memory size = %.2fMB (VRAM %.2fMB, RAM %.2fMB): "
402+
"clip %.2fMB(%s), unet %.2fMB(%s), vae %.2fMB(%s), controlnet %.2fMB(%s), pmid %.2fMB(%s)",
403+
total_params_size / 1024.0 / 1024.0,
404+
total_params_vram_size / 1024.0 / 1024.0,
405+
total_params_ram_size / 1024.0 / 1024.0,
406+
clip_params_mem_size / 1024.0 / 1024.0,
407+
ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM",
408+
unet_params_mem_size / 1024.0 / 1024.0,
409+
ggml_backend_is_cpu(backend) ? "RAM" : "VRAM",
410+
vae_params_mem_size / 1024.0 / 1024.0,
411+
ggml_backend_is_cpu(vae_backend) ? "RAM" : "VRAM",
412+
control_net_params_mem_size / 1024.0 / 1024.0,
413+
ggml_backend_is_cpu(control_net_backend) ? "RAM" : "VRAM",
414+
pmid_params_mem_size / 1024.0 / 1024.0,
415+
ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM");
371416
}
372417

373418
int64_t t1 = ggml_time_ms();
@@ -1435,6 +1480,7 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
14351480
enum sd_type_t wtype,
14361481
enum rng_type_t rng_type,
14371482
enum schedule_t s,
1483+
bool keep_clip_on_cpu,
14381484
bool keep_control_net_cpu,
14391485
bool keep_vae_on_cpu) {
14401486
sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t));
@@ -1467,6 +1513,7 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
14671513
vae_tiling,
14681514
(ggml_type)wtype,
14691515
s,
1516+
keep_clip_on_cpu,
14701517
keep_control_net_cpu,
14711518
keep_vae_on_cpu)) {
14721519
delete sd_ctx->sd;
@@ -1601,11 +1648,11 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
16011648
int32_t w = input_id_images[0]->width;
16021649
int32_t h = input_id_images[0]->height;
16031650
int32_t channels = input_id_images[0]->channel;
1604-
int32_t num_input_images = input_id_images.size();
1651+
int32_t num_input_images = (int32_t)input_id_images.size();
16051652
init_img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, w, h, channels, num_input_images);
16061653
// TODO: move these to somewhere else and be user settable
1607-
float mean[] = {0.48145466, 0.4578275, 0.40821073};
1608-
float std[] = {0.26862954, 0.26130258, 0.27577711};
1654+
float mean[] = {0.48145466f, 0.4578275f, 0.40821073f};
1655+
float std[] = {0.26862954f, 0.26130258f, 0.27577711f};
16091656
for (int i = 0; i < num_input_images; i++) {
16101657
sd_image_t* init_image = input_id_images[i];
16111658
if (normalize_input)

stable-diffusion.h

+1
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ SD_API sd_ctx_t* new_sd_ctx(const char* model_path,
125125
enum sd_type_t wtype,
126126
enum rng_type_t rng_type,
127127
enum schedule_t s,
128+
bool keep_clip_on_cpu,
128129
bool keep_control_net_cpu,
129130
bool keep_vae_on_cpu);
130131

0 commit comments

Comments
 (0)