Skip to content

Commit be6cd1a

Browse files
committed
sync: update ggml
1 parent e1384de commit be6cd1a

9 files changed

+33
-32
lines changed

CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
3434

3535
if(SD_CUBLAS)
3636
message("Use CUBLAS as backend stable-diffusion")
37-
set(GGML_CUBLAS ON)
37+
set(GGML_CUDA ON)
3838
add_definitions(-DSD_USE_CUBLAS)
3939
endif()
4040

clip.hpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -679,8 +679,8 @@ class CLIPVisionEmbeddings : public GGMLBlock {
679679
class_embedding = ggml_repeat(ctx, class_embed_weight, class_embedding); // [N, embed_dim]
680680
class_embedding = ggml_reshape_4d(ctx, class_embedding, 1, embed_dim, 1, N); // [N, 1, embed_dim, 1]
681681

682-
struct ggml_tensor* x = ggml_concat(ctx, class_embedding, patch_embedding); // [N, num_positions, embed_dim, 1]
683-
x = ggml_reshape_3d(ctx, x, embed_dim, num_positions, N); // [N, num_positions, embed_dim]
682+
struct ggml_tensor* x = ggml_concat(ctx, class_embedding, patch_embedding, 2); // [N, num_positions, embed_dim, 1]
683+
x = ggml_reshape_3d(ctx, x, embed_dim, num_positions, N); // [N, num_positions, embed_dim]
684684
x = ggml_add(ctx, x, position_embed_weight);
685685
return x; // [N, num_positions, embed_dim]
686686
}
@@ -1036,7 +1036,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
10361036
hidden_states2->ne[3]);
10371037
hidden_states2 = ggml_cont(ctx, ggml_permute(ctx, hidden_states2, 2, 0, 1, 3));
10381038

1039-
hidden_states = ggml_concat(ctx, hidden_states, hidden_states2); // [N, n_token, hidden_size + hidden_size2]
1039+
hidden_states = ggml_concat(ctx, hidden_states, hidden_states2, 2); // [N, n_token, hidden_size + hidden_size2]
10401040

10411041
hidden_states = ggml_cont(ctx, ggml_permute(ctx, hidden_states, 1, 2, 0, 3));
10421042
}
@@ -1069,7 +1069,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
10691069
auto token_embed_weight = text_model.get_token_embed_weight();
10701070
token_embed_weight = ggml_reshape_3d(compute_ctx, token_embed_weight, token_embed_weight->ne[0], 1, token_embed_weight->ne[1]);
10711071
// concatenate custom embeddings
1072-
embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings);
1072+
embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings, 2);
10731073
embeddings = ggml_reshape_2d(compute_ctx, embeddings, embeddings->ne[0], embeddings->ne[2]);
10741074
}
10751075

esrgan.hpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,13 @@ class ResidualDenseBlock : public GGMLBlock {
4242
auto conv5 = std::dynamic_pointer_cast<Conv2d>(blocks["conv5"]);
4343

4444
auto x1 = lrelu(ctx, conv1->forward(ctx, x));
45-
auto x_cat = ggml_concat(ctx, x, x1);
45+
auto x_cat = ggml_concat(ctx, x, x1, 2);
4646
auto x2 = lrelu(ctx, conv2->forward(ctx, x_cat));
47-
x_cat = ggml_concat(ctx, x_cat, x2);
47+
x_cat = ggml_concat(ctx, x_cat, x2, 2);
4848
auto x3 = lrelu(ctx, conv3->forward(ctx, x_cat));
49-
x_cat = ggml_concat(ctx, x_cat, x3);
49+
x_cat = ggml_concat(ctx, x_cat, x3, 2);
5050
auto x4 = lrelu(ctx, conv4->forward(ctx, x_cat));
51-
x_cat = ggml_concat(ctx, x_cat, x4);
51+
x_cat = ggml_concat(ctx, x_cat, x4, 2);
5252
auto x5 = conv5->forward(ctx, x_cat);
5353

5454
x5 = ggml_add(ctx, ggml_scale(ctx, x5, 0.2f), x);

ggml

Submodule ggml updated from 57869ad to 2aae01f

ggml_extend.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -917,7 +917,7 @@ struct GGMLModule {
917917
return NULL;
918918
}
919919
// it's performing a compute, check if backend isn't cpu
920-
if (!ggml_backend_is_cpu(backend) && tensor->backend == GGML_BACKEND_TYPE_CPU) {
920+
if (!ggml_backend_is_cpu(backend) && (tensor->buffer == NULL || ggml_backend_buffer_is_host(tensor->buffer))) {
921921
// pass input tensors to gpu memory
922922
auto backend_tensor = ggml_dup_tensor(compute_ctx, tensor);
923923

model.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -571,10 +571,9 @@ void convert_tensor(void* src,
571571
if (dst_type == GGML_TYPE_F16) {
572572
ggml_fp32_to_fp16_row((float*)src, (ggml_fp16_t*)dst, n);
573573
} else {
574-
int64_t hist[16];
575574
std::vector<float> imatrix(n_per_row, 1.0f); // dummy importance matrix
576575
const float* im = imatrix.data();
577-
ggml_quantize_chunk(dst_type, (float*)src, dst, 0, nrows, n_per_row, hist, im);
576+
ggml_quantize_chunk(dst_type, (float*)src, dst, 0, nrows, n_per_row, im);
578577
}
579578
} else if (dst_type == GGML_TYPE_F32) {
580579
if (src_type == GGML_TYPE_F16) {
@@ -602,10 +601,9 @@ void convert_tensor(void* src,
602601
if (dst_type == GGML_TYPE_F16) {
603602
ggml_fp32_to_fp16_row((float*)src_data_f32, (ggml_fp16_t*)dst, n);
604603
} else {
605-
int64_t hist[16];
606604
std::vector<float> imatrix(n_per_row, 1.0f); // dummy importance matrix
607605
const float* im = imatrix.data();
608-
ggml_quantize_chunk(dst_type, (float*)src_data_f32, dst, 0, nrows, n_per_row, hist, im);
606+
ggml_quantize_chunk(dst_type, (float*)src_data_f32, dst, 0, nrows, n_per_row, im);
609607
}
610608
}
611609
}

pmid.hpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ struct FuseModule : public GGMLBlock {
6464
auto prompt_embeds0 = ggml_cont(ctx, ggml_permute(ctx, prompt_embeds, 2, 0, 1, 3));
6565
auto id_embeds0 = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 2, 0, 1, 3));
6666
// concat is along dim 2
67-
auto stacked_id_embeds = ggml_concat(ctx, prompt_embeds0, id_embeds0);
67+
auto stacked_id_embeds = ggml_concat(ctx, prompt_embeds0, id_embeds0, 2);
6868
stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 1, 2, 0, 3));
6969

7070
// stacked_id_embeds = mlp1.forward(ctx, stacked_id_embeds);
@@ -102,12 +102,12 @@ struct FuseModule : public GGMLBlock {
102102

103103
stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 0, 2, 1, 3));
104104
if (left && right) {
105-
stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds);
106-
stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right);
105+
stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds, 2);
106+
stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right, 2);
107107
} else if (left) {
108-
stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds);
108+
stacked_id_embeds = ggml_concat(ctx, left, stacked_id_embeds, 2);
109109
} else if (right) {
110-
stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right);
110+
stacked_id_embeds = ggml_concat(ctx, stacked_id_embeds, right, 2);
111111
}
112112
stacked_id_embeds = ggml_cont(ctx, ggml_permute(ctx, stacked_id_embeds, 0, 2, 1, 3));
113113
class_tokens_mask = ggml_cont(ctx, ggml_transpose(ctx, class_tokens_mask));
@@ -146,7 +146,7 @@ struct PhotoMakerIDEncoderBlock : public CLIPVisionModelProjection {
146146
id_embeds = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 2, 0, 1, 3));
147147
id_embeds_2 = ggml_cont(ctx, ggml_permute(ctx, id_embeds_2, 2, 0, 1, 3));
148148

149-
id_embeds = ggml_concat(ctx, id_embeds, id_embeds_2); // [batch_size, seq_length, 1, 2048] check whether concat at dim 2 is right
149+
id_embeds = ggml_concat(ctx, id_embeds, id_embeds_2, 2); // [batch_size, seq_length, 1, 2048] check whether concat at dim 2 is right
150150
id_embeds = ggml_cont(ctx, ggml_permute(ctx, id_embeds, 1, 2, 0, 3));
151151

152152
struct ggml_tensor* updated_prompt_embeds = fuse_module->forward(ctx,

stable-diffusion.h

+12-9
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,11 @@ enum sd_type_t {
6060
SD_TYPE_Q4_0 = 2,
6161
SD_TYPE_Q4_1 = 3,
6262
// SD_TYPE_Q4_2 = 4, support has been removed
63-
// SD_TYPE_Q4_3 (5) support has been removed
64-
SD_TYPE_Q5_0 = 6,
65-
SD_TYPE_Q5_1 = 7,
66-
SD_TYPE_Q8_0 = 8,
67-
SD_TYPE_Q8_1 = 9,
68-
// k-quantizations
63+
// SD_TYPE_Q4_3 = 5, support has been removed
64+
SD_TYPE_Q5_0 = 6,
65+
SD_TYPE_Q5_1 = 7,
66+
SD_TYPE_Q8_0 = 8,
67+
SD_TYPE_Q8_1 = 9,
6968
SD_TYPE_Q2_K = 10,
7069
SD_TYPE_Q3_K = 11,
7170
SD_TYPE_Q4_K = 12,
@@ -80,9 +79,13 @@ enum sd_type_t {
8079
SD_TYPE_IQ3_S = 21,
8180
SD_TYPE_IQ2_S = 22,
8281
SD_TYPE_IQ4_XS = 23,
83-
SD_TYPE_I8,
84-
SD_TYPE_I16,
85-
SD_TYPE_I32,
82+
SD_TYPE_I8 = 24,
83+
SD_TYPE_I16 = 25,
84+
SD_TYPE_I32 = 26,
85+
SD_TYPE_I64 = 27,
86+
SD_TYPE_F64 = 28,
87+
SD_TYPE_IQ1_M = 29,
88+
SD_TYPE_BF16 = 30,
8689
SD_TYPE_COUNT,
8790
};
8891

unet.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ class UnetModelBlock : public GGMLBlock {
396396
if (c_concat->ne[3] != x->ne[3]) {
397397
c_concat = ggml_repeat(ctx, c_concat, x);
398398
}
399-
x = ggml_concat(ctx, x, c_concat);
399+
x = ggml_concat(ctx, x, c_concat, 2);
400400
}
401401

402402
if (y != NULL) {
@@ -491,7 +491,7 @@ class UnetModelBlock : public GGMLBlock {
491491
control_offset--;
492492
}
493493

494-
h = ggml_concat(ctx, h, h_skip);
494+
h = ggml_concat(ctx, h, h_skip, 2);
495495

496496
std::string name = "output_blocks." + std::to_string(output_block_idx) + ".0";
497497

0 commit comments

Comments
 (0)