@@ -65,8 +65,11 @@ void calculate_alphas_cumprod(float* alphas_cumprod,
65
65
66
66
class StableDiffusionGGML {
67
67
public:
68
- ggml_backend_t backend = NULL ; // general backend
69
- ggml_type model_data_type = GGML_TYPE_COUNT;
68
+ ggml_backend_t backend = NULL ; // general backend
69
+ ggml_backend_t clip_backend = NULL ;
70
+ ggml_backend_t control_net_backend = NULL ;
71
+ ggml_backend_t vae_backend = NULL ;
72
+ ggml_type model_data_type = GGML_TYPE_COUNT;
70
73
71
74
SDVersion version;
72
75
bool vae_decode_only = false ;
@@ -120,6 +123,9 @@ class StableDiffusionGGML {
120
123
121
124
~StableDiffusionGGML () {
122
125
ggml_backend_free (backend);
126
+ ggml_backend_free (clip_backend);
127
+ ggml_backend_free (control_net_backend);
128
+ ggml_backend_free (vae_backend);
123
129
}
124
130
125
131
bool load_from_file (const std::string& model_path,
@@ -131,6 +137,7 @@ class StableDiffusionGGML {
131
137
bool vae_tiling_,
132
138
ggml_type wtype,
133
139
schedule_t schedule,
140
+ bool clip_on_cpu,
134
141
bool control_net_cpu,
135
142
bool vae_on_cpu) {
136
143
use_tiny_autoencoder = taesd_path.size () > 0 ;
@@ -212,7 +219,12 @@ class StableDiffusionGGML {
212
219
first_stage_model->alloc_params_buffer ();
213
220
first_stage_model->get_param_tensors (tensors, " first_stage_model" );
214
221
} else {
215
- cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(backend, model_data_type, version);
222
+ clip_backend = backend;
223
+ if (clip_on_cpu && !ggml_backend_is_cpu (backend)) {
224
+ LOG_INFO (" CLIP: Using CPU backend" );
225
+ clip_backend = ggml_backend_cpu_init ();
226
+ }
227
+ cond_stage_model = std::make_shared<FrozenCLIPEmbedderWithCustomWords>(clip_backend, model_data_type, version);
216
228
cond_stage_model->alloc_params_buffer ();
217
229
cond_stage_model->get_param_tensors (tensors, " cond_stage_model." );
218
230
@@ -228,7 +240,6 @@ class StableDiffusionGGML {
228
240
}
229
241
230
242
if (!use_tiny_autoencoder) {
231
- ggml_backend_t vae_backend = NULL ;
232
243
if (vae_on_cpu && !ggml_backend_is_cpu (backend)) {
233
244
LOG_INFO (" VAE Autoencoder: Using CPU backend" );
234
245
vae_backend = ggml_backend_cpu_init ();
@@ -244,19 +255,19 @@ class StableDiffusionGGML {
244
255
// first_stage_model->get_param_tensors(tensors, "first_stage_model.");
245
256
246
257
if (control_net_path.size () > 0 ) {
247
- ggml_backend_t cn_backend = NULL ;
258
+ ggml_backend_t controlnet_backend = NULL ;
248
259
if (control_net_cpu && !ggml_backend_is_cpu (backend)) {
249
260
LOG_DEBUG (" ControlNet: Using CPU backend" );
250
- cn_backend = ggml_backend_cpu_init ();
261
+ controlnet_backend = ggml_backend_cpu_init ();
251
262
} else {
252
- cn_backend = backend;
263
+ controlnet_backend = backend;
253
264
}
254
- control_net = std::make_shared<ControlNet>(cn_backend , model_data_type, version);
265
+ control_net = std::make_shared<ControlNet>(controlnet_backend , model_data_type, version);
255
266
}
256
267
257
- pmid_model = std::make_shared<PhotoMakerIDEncoder>(backend, GGML_TYPE_F32 , version);
268
+ pmid_model = std::make_shared<PhotoMakerIDEncoder>(clip_backend, model_data_type , version);
258
269
if (id_embeddings_path.size () > 0 ) {
259
- pmid_lora = std::make_shared<LoraModel>(backend, GGML_TYPE_F32 , id_embeddings_path, " " );
270
+ pmid_lora = std::make_shared<LoraModel>(backend, model_data_type , id_embeddings_path, " " );
260
271
if (!pmid_lora->load_from_file (true )) {
261
272
LOG_WARN (" load photomaker lora tensors from %s failed" , id_embeddings_path.c_str ());
262
273
return false ;
@@ -359,15 +370,49 @@ class StableDiffusionGGML {
359
370
pmid_params_mem_size = pmid_model->get_params_mem_size ();
360
371
}
361
372
362
- size_t total_params_size = clip_params_mem_size + clip_params_mem_size +
363
- clip_params_mem_size + control_net_params_mem_size + pmid_params_mem_size;
364
- LOG_INFO (" total params memory size = %.2fMB (clip %.2fMB, unet %.2fMB, vae %.2fMB, controlnet %.2fMB, pmid %.2fMB)" ,
365
- total_params_size / 1024.0 / 1024.0 ,
366
- clip_params_mem_size / 1024.0 / 1024.0 ,
367
- unet_params_mem_size / 1024.0 / 1024.0 ,
368
- vae_params_mem_size / 1024.0 / 1024.0 ,
369
- control_net_params_mem_size / 1024.0 / 1024.0 ,
370
- pmid_params_mem_size / 1024.0 / 1024.0 );
373
+ size_t total_params_ram_size = 0 ;
374
+ size_t total_params_vram_size = 0 ;
375
+ if (ggml_backend_is_cpu (clip_backend)) {
376
+ total_params_ram_size += clip_params_mem_size + pmid_params_mem_size;
377
+ } else {
378
+ total_params_vram_size += clip_params_mem_size + pmid_params_mem_size;
379
+ }
380
+
381
+ if (ggml_backend_is_cpu (backend)) {
382
+ total_params_ram_size += unet_params_mem_size;
383
+ } else {
384
+ total_params_vram_size += unet_params_mem_size;
385
+ }
386
+
387
+ if (ggml_backend_is_cpu (vae_backend)) {
388
+ total_params_ram_size += vae_params_mem_size;
389
+ } else {
390
+ total_params_vram_size += vae_params_mem_size;
391
+ }
392
+
393
+ if (ggml_backend_is_cpu (control_net_backend)) {
394
+ total_params_ram_size += control_net_params_mem_size;
395
+ } else {
396
+ total_params_vram_size += control_net_params_mem_size;
397
+ }
398
+
399
+ size_t total_params_size = total_params_ram_size + total_params_vram_size;
400
+ LOG_INFO (
401
+ " total params memory size = %.2fMB (VRAM %.2fMB, RAM %.2fMB): "
402
+ " clip %.2fMB(%s), unet %.2fMB(%s), vae %.2fMB(%s), controlnet %.2fMB(%s), pmid %.2fMB(%s)" ,
403
+ total_params_size / 1024.0 / 1024.0 ,
404
+ total_params_vram_size / 1024.0 / 1024.0 ,
405
+ total_params_ram_size / 1024.0 / 1024.0 ,
406
+ clip_params_mem_size / 1024.0 / 1024.0 ,
407
+ ggml_backend_is_cpu (clip_backend) ? " RAM" : " VRAM" ,
408
+ unet_params_mem_size / 1024.0 / 1024.0 ,
409
+ ggml_backend_is_cpu (backend) ? " RAM" : " VRAM" ,
410
+ vae_params_mem_size / 1024.0 / 1024.0 ,
411
+ ggml_backend_is_cpu (vae_backend) ? " RAM" : " VRAM" ,
412
+ control_net_params_mem_size / 1024.0 / 1024.0 ,
413
+ ggml_backend_is_cpu (control_net_backend) ? " RAM" : " VRAM" ,
414
+ pmid_params_mem_size / 1024.0 / 1024.0 ,
415
+ ggml_backend_is_cpu (clip_backend) ? " RAM" : " VRAM" );
371
416
}
372
417
373
418
int64_t t1 = ggml_time_ms ();
@@ -1435,6 +1480,7 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
1435
1480
enum sd_type_t wtype,
1436
1481
enum rng_type_t rng_type,
1437
1482
enum schedule_t s,
1483
+ bool keep_clip_on_cpu,
1438
1484
bool keep_control_net_cpu,
1439
1485
bool keep_vae_on_cpu) {
1440
1486
sd_ctx_t * sd_ctx = (sd_ctx_t *)malloc (sizeof (sd_ctx_t ));
@@ -1467,6 +1513,7 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
1467
1513
vae_tiling,
1468
1514
(ggml_type)wtype,
1469
1515
s,
1516
+ keep_clip_on_cpu,
1470
1517
keep_control_net_cpu,
1471
1518
keep_vae_on_cpu)) {
1472
1519
delete sd_ctx->sd ;
@@ -1601,11 +1648,11 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx,
1601
1648
int32_t w = input_id_images[0 ]->width ;
1602
1649
int32_t h = input_id_images[0 ]->height ;
1603
1650
int32_t channels = input_id_images[0 ]->channel ;
1604
- int32_t num_input_images = input_id_images.size ();
1651
+ int32_t num_input_images = ( int32_t ) input_id_images.size ();
1605
1652
init_img = ggml_new_tensor_4d (work_ctx, GGML_TYPE_F32, w, h, channels, num_input_images);
1606
1653
// TODO: move these to somewhere else and be user settable
1607
- float mean[] = {0.48145466 , 0.4578275 , 0.40821073 };
1608
- float std[] = {0.26862954 , 0.26130258 , 0.27577711 };
1654
+ float mean[] = {0 .48145466f , 0 .4578275f , 0 .40821073f };
1655
+ float std[] = {0 .26862954f , 0 .26130258f , 0 .27577711f };
1609
1656
for (int i = 0 ; i < num_input_images; i++) {
1610
1657
sd_image_t * init_image = input_id_images[i];
1611
1658
if (normalize_input)
0 commit comments