@@ -43,12 +43,14 @@ const char* schedule_str[] = {
43
43
const char * modes_str[] = {
44
44
" txt2img" ,
45
45
" img2img" ,
46
+ " img2vid" ,
46
47
" convert" ,
47
48
};
48
49
49
50
enum SDMode {
50
51
TXT2IMG,
51
52
IMG2IMG,
53
+ IMG2VID,
52
54
CONVERT,
53
55
MODE_COUNT
54
56
};
@@ -71,12 +73,18 @@ struct SDParams {
71
73
72
74
std::string prompt;
73
75
std::string negative_prompt;
76
+ float min_cfg = 1 .0f ;
74
77
float cfg_scale = 7 .0f ;
75
78
int clip_skip = -1 ; // <= 0 represents unspecified
76
79
int width = 512 ;
77
80
int height = 512 ;
78
81
int batch_count = 1 ;
79
82
83
+ int video_frames = 6 ;
84
+ int motion_bucket_id = 127 ;
85
+ int fps = 6 ;
86
+ float augmentation_level = 0 .f;
87
+
80
88
sample_method_t sample_method = EULER_A;
81
89
schedule_t schedule = DEFAULT;
82
90
int sample_steps = 20 ;
@@ -108,6 +116,7 @@ void print_params(SDParams params) {
108
116
printf (" strength(control): %.2f\n " , params.control_strength );
109
117
printf (" prompt: %s\n " , params.prompt .c_str ());
110
118
printf (" negative_prompt: %s\n " , params.negative_prompt .c_str ());
119
+ printf (" min_cfg: %.2f\n " , params.min_cfg );
111
120
printf (" cfg_scale: %.2f\n " , params.cfg_scale );
112
121
printf (" clip_skip: %d\n " , params.clip_skip );
113
122
printf (" width: %d\n " , params.width );
@@ -190,7 +199,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
190
199
}
191
200
}
192
201
if (mode_found == -1 ) {
193
- fprintf (stderr, " error: invalid mode %s, must be one of [txt2img, img2img]\n " ,
202
+ fprintf (stderr,
203
+ " error: invalid mode %s, must be one of [txt2img, img2img, img2vid, convert]\n " ,
194
204
mode_selected);
195
205
exit (1 );
196
206
}
@@ -420,7 +430,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
420
430
params.n_threads = get_num_physical_cores ();
421
431
}
422
432
423
- if (params.mode != CONVERT && params.prompt .length () == 0 ) {
433
+ if (params.mode != CONVERT && params.mode != IMG2VID && params. prompt .length () == 0 ) {
424
434
fprintf (stderr, " error: the following arguments are required: prompt\n " );
425
435
print_usage (argc, argv);
426
436
exit (1 );
@@ -432,7 +442,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
432
442
exit (1 );
433
443
}
434
444
435
- if (params.mode == IMG2IMG && params.input_path .length () == 0 ) {
445
+ if (( params.mode == IMG2IMG || params. mode == IMG2VID) && params.input_path .length () == 0 ) {
436
446
fprintf (stderr, " error: when using the img2img mode, the following arguments are required: init-img\n " );
437
447
print_usage (argc, argv);
438
448
exit (1 );
@@ -539,9 +549,14 @@ int main(int argc, const char* argv[]) {
539
549
}
540
550
}
541
551
552
+ if (params.mode == IMG2VID) {
553
+ fprintf (stderr, " SVD support is broken, do not use it!!!\n " );
554
+ return 1 ;
555
+ }
556
+
542
557
bool vae_decode_only = true ;
543
558
uint8_t * input_image_buffer = NULL ;
544
- if (params.mode == IMG2IMG) {
559
+ if (params.mode == IMG2IMG || params. mode == IMG2VID ) {
545
560
vae_decode_only = false ;
546
561
547
562
int c = 0 ;
@@ -625,19 +640,57 @@ int main(int argc, const char* argv[]) {
625
640
3 ,
626
641
input_image_buffer};
627
642
628
- results = img2img (sd_ctx,
629
- input_image,
630
- params.prompt .c_str (),
631
- params.negative_prompt .c_str (),
632
- params.clip_skip ,
633
- params.cfg_scale ,
634
- params.width ,
635
- params.height ,
636
- params.sample_method ,
637
- params.sample_steps ,
638
- params.strength ,
639
- params.seed ,
640
- params.batch_count );
643
+ if (params.mode == IMG2VID) {
644
+ results = img2vid (sd_ctx,
645
+ input_image,
646
+ params.width ,
647
+ params.height ,
648
+ params.video_frames ,
649
+ params.motion_bucket_id ,
650
+ params.fps ,
651
+ params.augmentation_level ,
652
+ params.min_cfg ,
653
+ params.cfg_scale ,
654
+ params.sample_method ,
655
+ params.sample_steps ,
656
+ params.strength ,
657
+ params.seed );
658
+ if (results == NULL ) {
659
+ printf (" generate failed\n " );
660
+ free_sd_ctx (sd_ctx);
661
+ return 1 ;
662
+ }
663
+ size_t last = params.output_path .find_last_of (" ." );
664
+ std::string dummy_name = last != std::string::npos ? params.output_path .substr (0 , last) : params.output_path ;
665
+ for (int i = 0 ; i < params.video_frames ; i++) {
666
+ if (results[i].data == NULL ) {
667
+ continue ;
668
+ }
669
+ std::string final_image_path = i > 0 ? dummy_name + " _" + std::to_string (i + 1 ) + " .png" : dummy_name + " .png" ;
670
+ stbi_write_png (final_image_path.c_str (), results[i].width , results[i].height , results[i].channel ,
671
+ results[i].data , 0 , get_image_params (params, params.seed + i).c_str ());
672
+ printf (" save result image to '%s'\n " , final_image_path.c_str ());
673
+ free (results[i].data );
674
+ results[i].data = NULL ;
675
+ }
676
+ free (results);
677
+ free_sd_ctx (sd_ctx);
678
+ return 0 ;
679
+ } else {
680
+ results = img2img (sd_ctx,
681
+ input_image,
682
+ params.prompt .c_str (),
683
+ params.negative_prompt .c_str (),
684
+ params.clip_skip ,
685
+ params.cfg_scale ,
686
+ params.width ,
687
+ params.height ,
688
+ params.sample_method ,
689
+ params.sample_steps ,
690
+ params.strength ,
691
+ params.seed ,
692
+ params.batch_count );
693
+ }
641
694
}
642
695
643
696
if (results == NULL ) {
0 commit comments