Skip to content

Commit e913170

Browse files
committed
server: update
1 parent 5785ec4 commit e913170

File tree

1 file changed

+185
-22
lines changed

1 file changed

+185
-22
lines changed

examples/server/main.cpp

+185-22
Original file line numberDiff line numberDiff line change
@@ -66,17 +66,21 @@ struct SDParams {
6666
// models
6767
std::string model_path;
6868
std::string clip_l_path;
69+
std::string clip_g_path;
6970
std::string t5xxl_path;
7071
std::string diffusion_model_path;
7172
std::string vae_path;
72-
// std::string taesd_path;
73+
std::string taesd_path;
74+
std::string esrgan_path;
75+
std::string controlnet_path;
7376
std::string embeddings_path;
7477
std::string stacked_id_embeddings_path;
75-
std::string lora_model_dir;
76-
78+
std::string input_id_images_path;
7779
sd_type_t wtype = SD_TYPE_COUNT;
80+
std::string lora_model_dir;
7881
std::string output_path = "output.png";
7982
std::string input_path;
83+
std::string control_image_path;
8084

8185
std::string prompt;
8286
std::string negative_prompt;
@@ -93,17 +97,22 @@ struct SDParams {
9397
schedule_t schedule = DEFAULT;
9498
int sample_steps = 20;
9599
float strength = 0.75f;
100+
float control_strength = 0.9f;
96101
rng_type_t rng_type = CUDA_RNG;
97102
int64_t seed = 42;
98103
bool verbose = false;
99104
bool vae_tiling = false;
105+
bool control_net_cpu = false;
100106
bool normalize_input = false;
101107
bool clip_on_cpu = false;
102108
bool vae_on_cpu = false;
109+
bool diffusion_flash_attn = false;
103110
bool color = false;
104111

105-
// Photomaker params
106-
std::string input_id_images_path;
112+
std::vector<int> skip_layers = {7, 8, 9};
113+
float slg_scale = 0.;
114+
float skip_layer_start = 0.01;
115+
float skip_layer_end = 0.2;
107116

108117
// server things
109118
int port = 8080;
@@ -113,24 +122,34 @@ struct SDParams {
113122
void print_params(SDParams params) {
114123
printf("Option: \n");
115124
printf(" n_threads: %d\n", params.n_threads);
125+
printf(" mode: server\n");
116126
printf(" model_path: %s\n", params.model_path.c_str());
117127
printf(" wtype: %s\n", params.wtype < SD_TYPE_COUNT ? sd_type_name(params.wtype) : "unspecified");
118128
printf(" clip_l_path: %s\n", params.clip_l_path.c_str());
129+
printf(" clip_g_path: %s\n", params.clip_g_path.c_str());
119130
printf(" t5xxl_path: %s\n", params.t5xxl_path.c_str());
120131
printf(" diffusion_model_path: %s\n", params.diffusion_model_path.c_str());
121132
printf(" vae_path: %s\n", params.vae_path.c_str());
122-
// printf(" taesd_path: %s\n", params.taesd_path.c_str());
133+
printf(" taesd_path: %s\n", params.taesd_path.c_str());
134+
printf(" controlnet_path: %s\n", params.controlnet_path.c_str());
123135
printf(" embeddings_path: %s\n", params.embeddings_path.c_str());
124136
printf(" stacked_id_embeddings_path: %s\n", params.stacked_id_embeddings_path.c_str());
137+
printf(" input_id_images_path: %s\n", params.input_id_images_path.c_str());
125138
printf(" style ratio: %.2f\n", params.style_ratio);
126-
printf(" normzalize input image : %s\n", params.normalize_input ? "true" : "false");
139+
printf(" normalize input image : %s\n", params.normalize_input ? "true" : "false");
127140
printf(" output_path: %s\n", params.output_path.c_str());
141+
printf(" init_img: %s\n", params.input_path.c_str());
142+
printf(" control_image: %s\n", params.control_image_path.c_str());
128143
printf(" clip on cpu: %s\n", params.clip_on_cpu ? "true" : "false");
144+
printf(" controlnet cpu: %s\n", params.control_net_cpu ? "true" : "false");
129145
printf(" vae decoder on cpu:%s\n", params.vae_on_cpu ? "true" : "false");
146+
printf(" diffusion flash attention:%s\n", params.diffusion_flash_attn ? "true" : "false");
147+
printf(" strength(control): %.2f\n", params.control_strength);
130148
printf(" prompt: %s\n", params.prompt.c_str());
131149
printf(" negative_prompt: %s\n", params.negative_prompt.c_str());
132150
printf(" min_cfg: %.2f\n", params.min_cfg);
133151
printf(" cfg_scale: %.2f\n", params.cfg_scale);
152+
printf(" slg_scale: %.2f\n", params.slg_scale);
134153
printf(" guidance: %.2f\n", params.guidance);
135154
printf(" clip_skip: %d\n", params.clip_skip);
136155
printf(" width: %d\n", params.width);
@@ -150,40 +169,59 @@ void print_usage(int argc, const char* argv[]) {
150169
printf("\n");
151170
printf("arguments:\n");
152171
printf(" -h, --help show this help message and exit\n");
153-
printf(" -M, --mode [MODEL] run mode (txt2img or img2img or convert, default: txt2img)\n");
154-
printf(" -t, --threads N number of threads to use during computation (default: -1).\n");
172+
printf(" -t, --threads N number of threads to use during computation (default: -1)\n");
155173
printf(" If threads <= 0, then threads will be set to the number of CPU physical cores\n");
156174
printf(" -m, --model [MODEL] path to full model\n");
157175
printf(" --diffusion-model path to the standalone diffusion model\n");
158176
printf(" --clip_l path to the clip-l text encoder\n");
159-
printf(" --t5xxl path to the the t5xxl text encoder.\n");
177+
printf(" --clip_g path to the clip-g text encoder\n");
178+
printf(" --t5xxl path to the the t5xxl text encoder\n");
160179
printf(" --vae [VAE] path to vae\n");
161-
printf(" --embd-dir [EMBEDDING_PATH] path to embeddings.\n");
180+
printf(" --taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)\n");
181+
printf(" --control-net [CONTROL_PATH] path to control net model\n");
182+
printf(" --embd-dir [EMBEDDING_PATH] path to embeddings\n");
183+
printf(" --stacked-id-embd-dir [DIR] path to PHOTOMAKER stacked id embeddings\n");
184+
printf(" --input-id-images-dir [DIR] path to PHOTOMAKER input id images dir\n");
185+
printf(" --normalize-input normalize PHOTOMAKER input id images\n");
186+
// printf(" --upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now\n");
187+
// printf(" --upscale-repeats Run the ESRGAN upscaler this many times (default 1)\n");
162188
printf(" --type [TYPE] weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_k, q3_k, q4_k)\n");
163-
printf(" If not specified, the default is the type of the weight file.\n");
189+
printf(" If not specified, the default is the type of the weight file\n");
164190
printf(" --lora-model-dir [DIR] lora model directory\n");
191+
printf(" --control-image [IMAGE] path to image condition, control net\n");
165192
printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
166193
printf(" -p, --prompt [PROMPT] the prompt to render\n");
167194
printf(" -n, --negative-prompt PROMPT the negative prompt (default: \"\")\n");
168195
printf(" --cfg-scale SCALE unconditional guidance scale: (default: 7.0)\n");
196+
printf(" --slg-scale SCALE skip layer guidance (SLG) scale, only for DiT models: (default: 0)\n");
197+
printf(" 0 means disabled, a value of 2.5 is nice for sd3.5 medium\n");
198+
printf(" --skip_layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
199+
printf(" --skip_layer_start START SLG enabling point: (default: 0.01)\n");
200+
printf(" --skip_layer_end END SLG disabling point: (default: 0.2)\n");
201+
printf(" SLG will be enabled at step int([STEPS]*[START]) and disabled at int([STEPS]*[END])\n");
169202
printf(" --strength STRENGTH strength for noising/unnoising (default: 0.75)\n");
170203
printf(" --style-ratio STYLE-RATIO strength for keeping input identity (default: 20%%)\n");
171204
printf(" --control-strength STRENGTH strength to apply Control Net (default: 0.9)\n");
172205
printf(" 1.0 corresponds to full destruction of information in init image\n");
173206
printf(" -H, --height H image height, in pixel space (default: 512)\n");
174207
printf(" -W, --width W image width, in pixel space (default: 512)\n");
175-
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, lcm}\n");
208+
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm}\n");
176209
printf(" sampling method (default: \"euler_a\")\n");
177210
printf(" --steps STEPS number of sample steps (default: 20)\n");
178211
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
179212
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
180-
printf(" -b, --batch-count COUNT number of images to generate.\n");
181-
printf(" --schedule {discrete, karras, ays} Denoiser sigma schedule (default: discrete)\n");
213+
printf(" -b, --batch-count COUNT number of images to generate\n");
214+
printf(" --schedule {discrete, karras, exponential, ays, gits} Denoiser sigma schedule (default: discrete)\n");
182215
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
183216
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
184217
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
185218
printf(" --vae-on-cpu keep vae in cpu (for low vram)\n");
186-
printf(" --clip-on-cpu keep clip in cpu (for low vram).\n");
219+
printf(" --clip-on-cpu keep clip in cpu (for low vram)\n");
220+
printf(" --diffusion-fa use flash attention in the diffusion model (for low vram)\n");
221+
printf(" Might lower quality, since it implies converting k and v to f16.\n");
222+
printf(" This might crash if it is not supported by the backend.\n");
223+
printf(" --control-net-cpu keep controlnet in cpu (for low vram)\n");
224+
printf(" --canny apply canny preprocessor (edge detection)\n");
187225
printf(" --color Colors the logging tags according to level\n");
188226
printf(" -v, --verbose print extra info\n");
189227
printf(" --port port used for server (default: 8080)\n");
@@ -214,6 +252,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
214252
break;
215253
}
216254
params.clip_l_path = argv[i];
255+
} else if (arg == "--clip_g") {
256+
if (++i >= argc) {
257+
invalid_arg = true;
258+
break;
259+
}
260+
params.clip_g_path = argv[i];
217261
} else if (arg == "--t5xxl") {
218262
if (++i >= argc) {
219263
invalid_arg = true;
@@ -232,7 +276,42 @@ void parse_args(int argc, const char** argv, SDParams& params) {
232276
break;
233277
}
234278
params.vae_path = argv[i];
235-
// TODO Tiny AE
279+
} else if (arg == "--taesd") {
280+
if (++i >= argc) {
281+
invalid_arg = true;
282+
break;
283+
}
284+
params.taesd_path = argv[i];
285+
} else if (arg == "--control-net") {
286+
if (++i >= argc) {
287+
invalid_arg = true;
288+
break;
289+
}
290+
params.controlnet_path = argv[i];
291+
} else if (arg == "--upscale-model") {
292+
if (++i >= argc) {
293+
invalid_arg = true;
294+
break;
295+
}
296+
params.esrgan_path = argv[i];
297+
} else if (arg == "--embd-dir") {
298+
if (++i >= argc) {
299+
invalid_arg = true;
300+
break;
301+
}
302+
params.embeddings_path = argv[i];
303+
} else if (arg == "--stacked-id-embd-dir") {
304+
if (++i >= argc) {
305+
invalid_arg = true;
306+
break;
307+
}
308+
params.stacked_id_embeddings_path = argv[i];
309+
} else if (arg == "--input-id-images-dir") {
310+
if (++i >= argc) {
311+
invalid_arg = true;
312+
break;
313+
}
314+
params.input_id_images_path = argv[i];
236315
} else if (arg == "--type") {
237316
if (++i >= argc) {
238317
invalid_arg = true;
@@ -270,6 +349,18 @@ void parse_args(int argc, const char** argv, SDParams& params) {
270349
break;
271350
}
272351
params.lora_model_dir = argv[i];
352+
} else if (arg == "-i" || arg == "--init-img") {
353+
if (++i >= argc) {
354+
invalid_arg = true;
355+
break;
356+
}
357+
params.input_path = argv[i];
358+
} else if (arg == "--control-image") {
359+
if (++i >= argc) {
360+
invalid_arg = true;
361+
break;
362+
}
363+
params.control_image_path = argv[i];
273364
} else if (arg == "-o" || arg == "--output") {
274365
if (++i >= argc) {
275366
invalid_arg = true;
@@ -312,6 +403,12 @@ void parse_args(int argc, const char** argv, SDParams& params) {
312403
break;
313404
}
314405
params.style_ratio = std::stof(argv[i]);
406+
} else if (arg == "--control-strength") {
407+
if (++i >= argc) {
408+
invalid_arg = true;
409+
break;
410+
}
411+
params.control_strength = std::stof(argv[i]);
315412
} else if (arg == "-H" || arg == "--height") {
316413
if (++i >= argc) {
317414
invalid_arg = true;
@@ -338,12 +435,16 @@ void parse_args(int argc, const char** argv, SDParams& params) {
338435
params.clip_skip = std::stoi(argv[i]);
339436
} else if (arg == "--vae-tiling") {
340437
params.vae_tiling = true;
438+
} else if (arg == "--control-net-cpu") {
439+
params.control_net_cpu = true;
341440
} else if (arg == "--normalize-input") {
342441
params.normalize_input = true;
343442
} else if (arg == "--clip-on-cpu") {
344443
params.clip_on_cpu = true; // will slow down get_learned_condiotion but necessary for low MEM GPUs
345444
} else if (arg == "--vae-on-cpu") {
346445
params.vae_on_cpu = true; // will slow down latent decoding but necessary for low MEM GPUs
446+
} else if (arg == "--diffusion-fa") {
447+
params.diffusion_flash_attn = true; // can reduce MEM significantly
347448
} else if (arg == "-b" || arg == "--batch-count") {
348449
if (++i >= argc) {
349450
invalid_arg = true;
@@ -411,6 +512,61 @@ void parse_args(int argc, const char** argv, SDParams& params) {
411512
params.verbose = true;
412513
} else if (arg == "--color") {
413514
params.color = true;
515+
} else if (arg == "--slg-scale") {
516+
if (++i >= argc) {
517+
invalid_arg = true;
518+
break;
519+
}
520+
params.slg_scale = std::stof(argv[i]);
521+
} else if (arg == "--skip-layers") {
522+
if (++i >= argc) {
523+
invalid_arg = true;
524+
break;
525+
}
526+
if (argv[i][0] != '[') {
527+
invalid_arg = true;
528+
break;
529+
}
530+
std::string layers_str = argv[i];
531+
while (layers_str.back() != ']') {
532+
if (++i >= argc) {
533+
invalid_arg = true;
534+
break;
535+
}
536+
layers_str += " " + std::string(argv[i]);
537+
}
538+
layers_str = layers_str.substr(1, layers_str.size() - 2);
539+
540+
std::regex regex("[, ]+");
541+
std::sregex_token_iterator iter(layers_str.begin(), layers_str.end(), regex, -1);
542+
std::sregex_token_iterator end;
543+
std::vector<std::string> tokens(iter, end);
544+
std::vector<int> layers;
545+
for (const auto& token : tokens) {
546+
try {
547+
layers.push_back(std::stoi(token));
548+
} catch (const std::invalid_argument& e) {
549+
invalid_arg = true;
550+
break;
551+
}
552+
}
553+
params.skip_layers = layers;
554+
555+
if (invalid_arg) {
556+
break;
557+
}
558+
} else if (arg == "--skip-layer-start") {
559+
if (++i >= argc) {
560+
invalid_arg = true;
561+
break;
562+
}
563+
params.skip_layer_start = std::stof(argv[i]);
564+
} else if (arg == "--skip-layer-end") {
565+
if (++i >= argc) {
566+
invalid_arg = true;
567+
break;
568+
}
569+
params.skip_layer_end = std::stof(argv[i]);
414570
} else if (arg == "--port") {
415571
if (++i >= argc) {
416572
invalid_arg = true;
@@ -716,11 +872,12 @@ int main(int argc, const char* argv[]) {
716872

717873
sd_ctx_t* sd_ctx = new_sd_ctx(params.model_path.c_str(),
718874
params.clip_l_path.c_str(),
875+
params.clip_g_path.c_str(),
719876
params.t5xxl_path.c_str(),
720877
params.diffusion_model_path.c_str(),
721878
params.vae_path.c_str(),
722-
"",
723-
"",
879+
params.taesd_path.c_str(),
880+
params.controlnet_path.c_str(),
724881
params.lora_model_dir.c_str(),
725882
params.embeddings_path.c_str(),
726883
params.stacked_id_embeddings_path.c_str(),
@@ -732,8 +889,9 @@ int main(int argc, const char* argv[]) {
732889
params.rng_type,
733890
params.schedule,
734891
params.clip_on_cpu,
735-
true,
736-
params.vae_on_cpu);
892+
params.control_net_cpu,
893+
params.vae_on_cpu,
894+
params.diffusion_flash_attn);
737895

738896
if (sd_ctx == NULL) {
739897
printf("new_sd_ctx_t failed\n");
@@ -787,7 +945,12 @@ int main(int argc, const char* argv[]) {
787945
1,
788946
params.style_ratio,
789947
params.normalize_input,
790-
params.input_id_images_path.c_str());
948+
params.input_id_images_path.c_str(),
949+
params.skip_layers.data(),
950+
params.skip_layers.size(),
951+
params.slg_scale,
952+
params.skip_layer_start,
953+
params.skip_layer_end);
791954

792955
if (results == NULL) {
793956
printf("generate failed\n");

0 commit comments

Comments
 (0)