Skip to content

Commit 5b7d179

Browse files
committed
Merge branch 'master' into upscale_repeats
2 parents 5a194f8 + b636886 commit 5b7d179

20 files changed

+3960
-3641
lines changed

CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ endif()
6060
set(SD_LIB stable-diffusion)
6161

6262
add_library(${SD_LIB} stable-diffusion.h stable-diffusion.cpp model.h model.cpp util.h util.cpp upscaler.cpp
63-
ggml_extend.hpp clip.hpp common.hpp unet.hpp tae.hpp esrgan.hpp lora.hpp denoiser.hpp rng.hpp rng_philox.hpp)
63+
ggml_extend.hpp clip.hpp common.hpp unet.hpp tae.hpp esrgan.hpp lora.hpp denoiser.hpp rng.hpp rng_philox.hpp
64+
control.hpp preprocessing.hpp)
6465

6566
if(BUILD_SHARED_LIBS)
6667
message("Build shared library")

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ Thank you to all the people who have already contributed to stable-diffusion.cpp
333333
- [stable-diffusion](https://github.com/CompVis/stable-diffusion)
334334
- [stable-diffusion-stability-ai](https://github.com/Stability-AI/stablediffusion)
335335
- [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
336+
- [ComfyUI](https://github.com/comfyanonymous/ComfyUI)
336337
- [k-diffusion](https://github.com/crowsonkb/k-diffusion)
337338
- [latent-consistency-model](https://github.com/luosiallen/latent-consistency-model)
338339
- [generative-models](https://github.com/Stability-AI/generative-models/)

clip.hpp

+506-418
Large diffs are not rendered by default.

common.hpp

+420-434
Large diffs are not rendered by default.

control.hpp

+330-561
Large diffs are not rendered by default.

esrgan.hpp

+127-339
Large diffs are not rendered by default.

examples/cli/main.cpp

+70-17
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,14 @@ const char* schedule_str[] = {
4343
const char* modes_str[] = {
4444
"txt2img",
4545
"img2img",
46+
"img2vid",
4647
"convert",
4748
};
4849

4950
enum SDMode {
5051
TXT2IMG,
5152
IMG2IMG,
53+
IMG2VID,
5254
CONVERT,
5355
MODE_COUNT
5456
};
@@ -71,12 +73,18 @@ struct SDParams {
7173

7274
std::string prompt;
7375
std::string negative_prompt;
76+
float min_cfg = 1.0f;
7477
float cfg_scale = 7.0f;
7578
int clip_skip = -1; // <= 0 represents unspecified
7679
int width = 512;
7780
int height = 512;
7881
int batch_count = 1;
7982

83+
int video_frames = 6;
84+
int motion_bucket_id = 127;
85+
int fps = 6;
86+
float augmentation_level = 0.f;
87+
8088
sample_method_t sample_method = EULER_A;
8189
schedule_t schedule = DEFAULT;
8290
int sample_steps = 20;
@@ -109,6 +117,7 @@ void print_params(SDParams params) {
109117
printf(" strength(control): %.2f\n", params.control_strength);
110118
printf(" prompt: %s\n", params.prompt.c_str());
111119
printf(" negative_prompt: %s\n", params.negative_prompt.c_str());
120+
printf(" min_cfg: %.2f\n", params.min_cfg);
112121
printf(" cfg_scale: %.2f\n", params.cfg_scale);
113122
printf(" clip_skip: %d\n", params.clip_skip);
114123
printf(" width: %d\n", params.width);
@@ -193,7 +202,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
193202
}
194203
}
195204
if (mode_found == -1) {
196-
fprintf(stderr, "error: invalid mode %s, must be one of [txt2img, img2img]\n",
205+
fprintf(stderr,
206+
"error: invalid mode %s, must be one of [txt2img, img2img, img2vid, convert]\n",
197207
mode_selected);
198208
exit(1);
199209
}
@@ -433,7 +443,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
433443
params.n_threads = get_num_physical_cores();
434444
}
435445

436-
if (params.mode != CONVERT && params.prompt.length() == 0) {
446+
if (params.mode != CONVERT && params.mode != IMG2VID && params.prompt.length() == 0) {
437447
fprintf(stderr, "error: the following arguments are required: prompt\n");
438448
print_usage(argc, argv);
439449
exit(1);
@@ -445,7 +455,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
445455
exit(1);
446456
}
447457

448-
if (params.mode == IMG2IMG && params.input_path.length() == 0) {
458+
if ((params.mode == IMG2IMG || params.mode == IMG2VID) && params.input_path.length() == 0) {
449459
fprintf(stderr, "error: when using the img2img mode, the following arguments are required: init-img\n");
450460
print_usage(argc, argv);
451461
exit(1);
@@ -552,9 +562,14 @@ int main(int argc, const char* argv[]) {
552562
}
553563
}
554564

565+
if (params.mode == IMG2VID) {
566+
fprintf(stderr, "SVD support is broken, do not use it!!!\n");
567+
return 1;
568+
}
569+
555570
bool vae_decode_only = true;
556571
uint8_t* input_image_buffer = NULL;
557-
if (params.mode == IMG2IMG) {
572+
if (params.mode == IMG2IMG || params.mode == IMG2VID) {
558573
vae_decode_only = false;
559574

560575
int c = 0;
@@ -638,19 +653,57 @@ int main(int argc, const char* argv[]) {
638653
3,
639654
input_image_buffer};
640655

641-
results = img2img(sd_ctx,
642-
input_image,
643-
params.prompt.c_str(),
644-
params.negative_prompt.c_str(),
645-
params.clip_skip,
646-
params.cfg_scale,
647-
params.width,
648-
params.height,
649-
params.sample_method,
650-
params.sample_steps,
651-
params.strength,
652-
params.seed,
653-
params.batch_count);
656+
if (params.mode == IMG2VID) {
657+
results = img2vid(sd_ctx,
658+
input_image,
659+
params.width,
660+
params.height,
661+
params.video_frames,
662+
params.motion_bucket_id,
663+
params.fps,
664+
params.augmentation_level,
665+
params.min_cfg,
666+
params.cfg_scale,
667+
params.sample_method,
668+
params.sample_steps,
669+
params.strength,
670+
params.seed);
671+
if (results == NULL) {
672+
printf("generate failed\n");
673+
free_sd_ctx(sd_ctx);
674+
return 1;
675+
}
676+
size_t last = params.output_path.find_last_of(".");
677+
std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path;
678+
for (int i = 0; i < params.video_frames; i++) {
679+
if (results[i].data == NULL) {
680+
continue;
681+
}
682+
std::string final_image_path = i > 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png";
683+
stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel,
684+
results[i].data, 0, get_image_params(params, params.seed + i).c_str());
685+
printf("save result image to '%s'\n", final_image_path.c_str());
686+
free(results[i].data);
687+
results[i].data = NULL;
688+
}
689+
free(results);
690+
free_sd_ctx(sd_ctx);
691+
return 0;
692+
} else {
693+
results = img2img(sd_ctx,
694+
input_image,
695+
params.prompt.c_str(),
696+
params.negative_prompt.c_str(),
697+
params.clip_skip,
698+
params.cfg_scale,
699+
params.width,
700+
params.height,
701+
params.sample_method,
702+
params.sample_steps,
703+
params.strength,
704+
params.seed,
705+
params.batch_count);
706+
}
654707
}
655708

656709
if (results == NULL) {

ggml

Submodule ggml updated from 2f3b12f to 9a5ce30

0 commit comments

Comments
 (0)