Skip to content

Commit b636886

Browse files
authored
feat: introduce GGMLBlock and implement SVD(Broken) (#159)
* introduce GGMLBlock and implement SVD(Broken) * add sdxl vae warning
1 parent 349439f commit b636886

20 files changed

+3960
-3641
lines changed

CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ endif()
6060
set(SD_LIB stable-diffusion)
6161

6262
add_library(${SD_LIB} stable-diffusion.h stable-diffusion.cpp model.h model.cpp util.h util.cpp upscaler.cpp
63-
ggml_extend.hpp clip.hpp common.hpp unet.hpp tae.hpp esrgan.hpp lora.hpp denoiser.hpp rng.hpp rng_philox.hpp)
63+
ggml_extend.hpp clip.hpp common.hpp unet.hpp tae.hpp esrgan.hpp lora.hpp denoiser.hpp rng.hpp rng_philox.hpp
64+
control.hpp preprocessing.hpp)
6465

6566
if(BUILD_SHARED_LIBS)
6667
message("Build shared library")

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ Thank you to all the people who have already contributed to stable-diffusion.cpp
329329
- [stable-diffusion](https://github.com/CompVis/stable-diffusion)
330330
- [stable-diffusion-stability-ai](https://github.com/Stability-AI/stablediffusion)
331331
- [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
332+
- [ComfyUI](https://github.com/comfyanonymous/ComfyUI)
332333
- [k-diffusion](https://github.com/crowsonkb/k-diffusion)
333334
- [latent-consistency-model](https://github.com/luosiallen/latent-consistency-model)
334335
- [generative-models](https://github.com/Stability-AI/generative-models/)

clip.hpp

+506-418
Large diffs are not rendered by default.

common.hpp

+420-434
Large diffs are not rendered by default.

control.hpp

+330-561
Large diffs are not rendered by default.

esrgan.hpp

+127-339
Large diffs are not rendered by default.

examples/cli/main.cpp

+70-17
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,14 @@ const char* schedule_str[] = {
4343
const char* modes_str[] = {
4444
"txt2img",
4545
"img2img",
46+
"img2vid",
4647
"convert",
4748
};
4849

4950
enum SDMode {
5051
TXT2IMG,
5152
IMG2IMG,
53+
IMG2VID,
5254
CONVERT,
5355
MODE_COUNT
5456
};
@@ -71,12 +73,18 @@ struct SDParams {
7173

7274
std::string prompt;
7375
std::string negative_prompt;
76+
float min_cfg = 1.0f;
7477
float cfg_scale = 7.0f;
7578
int clip_skip = -1; // <= 0 represents unspecified
7679
int width = 512;
7780
int height = 512;
7881
int batch_count = 1;
7982

83+
int video_frames = 6;
84+
int motion_bucket_id = 127;
85+
int fps = 6;
86+
float augmentation_level = 0.f;
87+
8088
sample_method_t sample_method = EULER_A;
8189
schedule_t schedule = DEFAULT;
8290
int sample_steps = 20;
@@ -108,6 +116,7 @@ void print_params(SDParams params) {
108116
printf(" strength(control): %.2f\n", params.control_strength);
109117
printf(" prompt: %s\n", params.prompt.c_str());
110118
printf(" negative_prompt: %s\n", params.negative_prompt.c_str());
119+
printf(" min_cfg: %.2f\n", params.min_cfg);
111120
printf(" cfg_scale: %.2f\n", params.cfg_scale);
112121
printf(" clip_skip: %d\n", params.clip_skip);
113122
printf(" width: %d\n", params.width);
@@ -190,7 +199,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
190199
}
191200
}
192201
if (mode_found == -1) {
193-
fprintf(stderr, "error: invalid mode %s, must be one of [txt2img, img2img]\n",
202+
fprintf(stderr,
203+
"error: invalid mode %s, must be one of [txt2img, img2img, img2vid, convert]\n",
194204
mode_selected);
195205
exit(1);
196206
}
@@ -420,7 +430,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
420430
params.n_threads = get_num_physical_cores();
421431
}
422432

423-
if (params.mode != CONVERT && params.prompt.length() == 0) {
433+
if (params.mode != CONVERT && params.mode != IMG2VID && params.prompt.length() == 0) {
424434
fprintf(stderr, "error: the following arguments are required: prompt\n");
425435
print_usage(argc, argv);
426436
exit(1);
@@ -432,7 +442,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
432442
exit(1);
433443
}
434444

435-
if (params.mode == IMG2IMG && params.input_path.length() == 0) {
445+
if ((params.mode == IMG2IMG || params.mode == IMG2VID) && params.input_path.length() == 0) {
436446
fprintf(stderr, "error: when using the img2img mode, the following arguments are required: init-img\n");
437447
print_usage(argc, argv);
438448
exit(1);
@@ -539,9 +549,14 @@ int main(int argc, const char* argv[]) {
539549
}
540550
}
541551

552+
if (params.mode == IMG2VID) {
553+
fprintf(stderr, "SVD support is broken, do not use it!!!\n");
554+
return 1;
555+
}
556+
542557
bool vae_decode_only = true;
543558
uint8_t* input_image_buffer = NULL;
544-
if (params.mode == IMG2IMG) {
559+
if (params.mode == IMG2IMG || params.mode == IMG2VID) {
545560
vae_decode_only = false;
546561

547562
int c = 0;
@@ -625,19 +640,57 @@ int main(int argc, const char* argv[]) {
625640
3,
626641
input_image_buffer};
627642

628-
results = img2img(sd_ctx,
629-
input_image,
630-
params.prompt.c_str(),
631-
params.negative_prompt.c_str(),
632-
params.clip_skip,
633-
params.cfg_scale,
634-
params.width,
635-
params.height,
636-
params.sample_method,
637-
params.sample_steps,
638-
params.strength,
639-
params.seed,
640-
params.batch_count);
643+
if (params.mode == IMG2VID) {
644+
results = img2vid(sd_ctx,
645+
input_image,
646+
params.width,
647+
params.height,
648+
params.video_frames,
649+
params.motion_bucket_id,
650+
params.fps,
651+
params.augmentation_level,
652+
params.min_cfg,
653+
params.cfg_scale,
654+
params.sample_method,
655+
params.sample_steps,
656+
params.strength,
657+
params.seed);
658+
if (results == NULL) {
659+
printf("generate failed\n");
660+
free_sd_ctx(sd_ctx);
661+
return 1;
662+
}
663+
size_t last = params.output_path.find_last_of(".");
664+
std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path;
665+
for (int i = 0; i < params.video_frames; i++) {
666+
if (results[i].data == NULL) {
667+
continue;
668+
}
669+
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png";
670+
stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
671+
results[i].data, 0, get_image_params(params, params.seed + i).c_str());
672+
printf("save result image to '%s'\n", final_image_path.c_str());
673+
free(results[i].data);
674+
results[i].data = NULL;
675+
}
676+
free(results);
677+
free_sd_ctx(sd_ctx);
678+
return 0;
679+
} else {
680+
results = img2img(sd_ctx,
681+
input_image,
682+
params.prompt.c_str(),
683+
params.negative_prompt.c_str(),
684+
params.clip_skip,
685+
params.cfg_scale,
686+
params.width,
687+
params.height,
688+
params.sample_method,
689+
params.sample_steps,
690+
params.strength,
691+
params.seed,
692+
params.batch_count);
693+
}
641694
}
642695

643696
if (results == NULL) {

ggml

Submodule ggml updated from 2f3b12f to 9a5ce30

0 commit comments

Comments
 (0)