@@ -43,12 +43,14 @@ const char* schedule_str[] = {
43
43
const char * modes_str[] = {
44
44
" txt2img" ,
45
45
" img2img" ,
46
+ " img2vid" ,
46
47
" convert" ,
47
48
};
48
49
49
50
enum SDMode {
50
51
TXT2IMG,
51
52
IMG2IMG,
53
+ IMG2VID,
52
54
CONVERT,
53
55
MODE_COUNT
54
56
};
@@ -71,12 +73,18 @@ struct SDParams {
71
73
72
74
std::string prompt;
73
75
std::string negative_prompt;
76
+ float min_cfg = 1 .0f ;
74
77
float cfg_scale = 7 .0f ;
75
78
int clip_skip = -1 ; // <= 0 represents unspecified
76
79
int width = 512 ;
77
80
int height = 512 ;
78
81
int batch_count = 1 ;
79
82
83
+ int video_frames = 6 ;
84
+ int motion_bucket_id = 127 ;
85
+ int fps = 6 ;
86
+ float augmentation_level = 0 .f;
87
+
80
88
sample_method_t sample_method = EULER_A;
81
89
schedule_t schedule = DEFAULT;
82
90
int sample_steps = 20 ;
@@ -109,6 +117,7 @@ void print_params(SDParams params) {
109
117
printf (" strength(control): %.2f\n " , params.control_strength );
110
118
printf (" prompt: %s\n " , params.prompt .c_str ());
111
119
printf (" negative_prompt: %s\n " , params.negative_prompt .c_str ());
120
+ printf (" min_cfg: %.2f\n " , params.min_cfg );
112
121
printf (" cfg_scale: %.2f\n " , params.cfg_scale );
113
122
printf (" clip_skip: %d\n " , params.clip_skip );
114
123
printf (" width: %d\n " , params.width );
@@ -193,7 +202,8 @@ void parse_args(int argc, const char** argv, SDParams& params) {
193
202
}
194
203
}
195
204
if (mode_found == -1 ) {
196
- fprintf (stderr, " error: invalid mode %s, must be one of [txt2img, img2img]\n " ,
205
+ fprintf (stderr,
206
+ " error: invalid mode %s, must be one of [txt2img, img2img, img2vid, convert]\n " ,
197
207
mode_selected);
198
208
exit (1 );
199
209
}
@@ -433,7 +443,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
433
443
params.n_threads = get_num_physical_cores ();
434
444
}
435
445
436
- if (params.mode != CONVERT && params.prompt .length () == 0 ) {
446
+ if (params.mode != CONVERT && params.mode != IMG2VID && params. prompt .length () == 0 ) {
437
447
fprintf (stderr, " error: the following arguments are required: prompt\n " );
438
448
print_usage (argc, argv);
439
449
exit (1 );
@@ -445,7 +455,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
445
455
exit (1 );
446
456
}
447
457
448
- if (params.mode == IMG2IMG && params.input_path .length () == 0 ) {
458
+ if (( params.mode == IMG2IMG || params. mode == IMG2VID) && params.input_path .length () == 0 ) {
449
459
fprintf (stderr, " error: when using the img2img mode, the following arguments are required: init-img\n " );
450
460
print_usage (argc, argv);
451
461
exit (1 );
@@ -552,9 +562,14 @@ int main(int argc, const char* argv[]) {
552
562
}
553
563
}
554
564
565
+ if (params.mode == IMG2VID) {
566
+ fprintf (stderr, " SVD support is broken, do not use it!!!\n " );
567
+ return 1 ;
568
+ }
569
+
555
570
bool vae_decode_only = true ;
556
571
uint8_t * input_image_buffer = NULL ;
557
- if (params.mode == IMG2IMG) {
572
+ if (params.mode == IMG2IMG || params. mode == IMG2VID ) {
558
573
vae_decode_only = false ;
559
574
560
575
int c = 0 ;
@@ -638,19 +653,57 @@ int main(int argc, const char* argv[]) {
638
653
3 ,
639
654
input_image_buffer};
640
655
641
- results = img2img (sd_ctx,
642
- input_image,
643
- params.prompt .c_str (),
644
- params.negative_prompt .c_str (),
645
- params.clip_skip ,
646
- params.cfg_scale ,
647
- params.width ,
648
- params.height ,
649
- params.sample_method ,
650
- params.sample_steps ,
651
- params.strength ,
652
- params.seed ,
653
- params.batch_count );
656
+ if (params.mode == IMG2VID) {
657
+ results = img2vid (sd_ctx,
658
+ input_image,
659
+ params.width ,
660
+ params.height ,
661
+ params.video_frames ,
662
+ params.motion_bucket_id ,
663
+ params.fps ,
664
+ params.augmentation_level ,
665
+ params.min_cfg ,
666
+ params.cfg_scale ,
667
+ params.sample_method ,
668
+ params.sample_steps ,
669
+ params.strength ,
670
+ params.seed );
671
+ if (results == NULL ) {
672
+ printf (" generate failed\n " );
673
+ free_sd_ctx (sd_ctx);
674
+ return 1 ;
675
+ }
676
+ size_t last = params.output_path .find_last_of (" ." );
677
+ std::string dummy_name = last != std::string::npos ? params.output_path .substr (0 , last) : params.output_path ;
678
+ for (int i = 0 ; i < params.video_frames ; i++) {
679
+ if (results[i].data == NULL ) {
680
+ continue ;
681
+ }
682
+ std::string final_image_path = i > 0 ? dummy_name + " _" + std::to_string (i + 1 ) + " .png" : dummy_name + " .png" ;
683
+ stbi_write_png (final_image_path.c_str (), results[i].width , results[i].height , results[i].channel ,
684
+ results[i].data , 0 , get_image_params (params, params.seed + i).c_str ());
685
+ printf (" save result image to '%s'\n " , final_image_path.c_str ());
686
+ free (results[i].data );
687
+ results[i].data = NULL ;
688
+ }
689
+ free (results);
690
+ free_sd_ctx (sd_ctx);
691
+ return 0 ;
692
+ } else {
693
+ results = img2img (sd_ctx,
694
+ input_image,
695
+ params.prompt .c_str (),
696
+ params.negative_prompt .c_str (),
697
+ params.clip_skip ,
698
+ params.cfg_scale ,
699
+ params.width ,
700
+ params.height ,
701
+ params.sample_method ,
702
+ params.sample_steps ,
703
+ params.strength ,
704
+ params.seed ,
705
+ params.batch_count );
706
+ }
654
707
}
655
708
656
709
if (results == NULL ) {
0 commit comments