Skip to content

Commit 254ef8a

Browse files
committed
Merge branch 'master' into progress_callback
2 parents af449d3 + d164236 commit 254ef8a

File tree

8 files changed

+271
-115
lines changed

8 files changed

+271
-115
lines changed

.github/workflows/build.yml

+121-29
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ jobs:
3030
with:
3131
submodules: recursive
3232

33-
3433
- name: Dependencies
3534
id: depends
3635
run: |
@@ -42,14 +41,37 @@ jobs:
4241
run: |
4342
mkdir build
4443
cd build
45-
cmake ..
44+
cmake .. -DGGML_AVX2=ON -DSD_BUILD_SHARED_LIBS=ON
4645
cmake --build . --config Release
4746
48-
#- name: Test
49-
#id: cmake_test
50-
#run: |
51-
#cd build
52-
#ctest --verbose --timeout 900
47+
- name: Get commit hash
48+
id: commit
49+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/main' ) || github.event.inputs.create_release == 'true' }}
50+
uses: pr-mpt/actions-commit-hash@v2
51+
52+
- name: Fetch system info
53+
id: system-info
54+
run: |
55+
echo "CPU_ARCH=`uname -m`" >> "$GITHUB_OUTPUT"
56+
echo "OS_NAME=`lsb_release -s -i`" >> "$GITHUB_OUTPUT"
57+
echo "OS_VERSION=`lsb_release -s -r`" >> "$GITHUB_OUTPUT"
58+
echo "OS_TYPE=`uname -s`" >> "$GITHUB_OUTPUT"
59+
60+
- name: Pack artifacts
61+
id: pack_artifacts
62+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
63+
run: |
64+
cp ggml/LICENSE ./build/bin/ggml.txt
65+
cp LICENSE ./build/bin/stable-diffusion.cpp.txt
66+
zip -j sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip ./build/bin/*
67+
68+
- name: Upload artifacts
69+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
70+
uses: actions/upload-artifact@v3
71+
with:
72+
path: |
73+
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip
74+
5375
5476
macOS-latest-cmake:
5577
runs-on: macos-latest
@@ -63,24 +85,46 @@ jobs:
6385

6486
- name: Dependencies
6587
id: depends
66-
continue-on-error: true
6788
run: |
68-
brew update
89+
brew install zip
6990
7091
- name: Build
7192
id: cmake_build
7293
run: |
7394
sysctl -a
7495
mkdir build
7596
cd build
76-
cmake ..
97+
cmake .. -DGGML_AVX2=ON -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" -DSD_BUILD_SHARED_LIBS=ON
7798
cmake --build . --config Release
7899
79-
#- name: Test
80-
#id: cmake_test
81-
#run: |
82-
#cd build
83-
#ctest --verbose --timeout 900
100+
- name: Get commit hash
101+
id: commit
102+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/main' ) || github.event.inputs.create_release == 'true' }}
103+
uses: pr-mpt/actions-commit-hash@v2
104+
105+
- name: Fetch system info
106+
id: system-info
107+
run: |
108+
echo "CPU_ARCH=`uname -m`" >> "$GITHUB_OUTPUT"
109+
echo "OS_NAME=`sw_vers -productName`" >> "$GITHUB_OUTPUT"
110+
echo "OS_VERSION=`sw_vers -productVersion`" >> "$GITHUB_OUTPUT"
111+
echo "OS_TYPE=`uname -s`" >> "$GITHUB_OUTPUT"
112+
113+
- name: Pack artifacts
114+
id: pack_artifacts
115+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
116+
run: |
117+
cp ggml/LICENSE ./build/bin/ggml.txt
118+
cp LICENSE ./build/bin/stable-diffusion.cpp.txt
119+
zip -j sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip ./build/bin/*
120+
121+
- name: Upload artifacts
122+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
123+
uses: actions/upload-artifact@v3
124+
with:
125+
path: |
126+
sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-${{ steps.system-info.outputs.OS_TYPE }}-${{ steps.system-info.outputs.OS_NAME }}-${{ steps.system-info.outputs.OS_VERSION }}-${{ steps.system-info.outputs.CPU_ARCH }}.zip
127+
84128
85129
windows-latest-cmake:
86130
runs-on: windows-latest
@@ -89,21 +133,47 @@ jobs:
89133
matrix:
90134
include:
91135
- build: 'noavx'
92-
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
136+
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DSD_BUILD_SHARED_LIBS=ON'
93137
- build: 'avx2'
94-
defines: '-DGGML_AVX2=ON'
138+
defines: '-DGGML_AVX2=ON -DSD_BUILD_SHARED_LIBS=ON'
95139
- build: 'avx'
96-
defines: '-DGGML_AVX2=OFF'
140+
defines: '-DGGML_AVX2=OFF -DSD_BUILD_SHARED_LIBS=ON'
97141
- build: 'avx512'
98-
defines: '-DGGML_AVX512=ON'
99-
142+
defines: '-DGGML_AVX512=ON -DSD_BUILD_SHARED_LIBS=ON'
143+
- build: 'cuda12'
144+
defines: '-DSD_CUBLAS=ON -DSD_BUILD_SHARED_LIBS=ON'
145+
- build: 'rocm5.5'
146+
defines: '-G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS="gfx1100;gfx1102;gfx1030" -DSD_BUILD_SHARED_LIBS=ON'
100147
steps:
101148
- name: Clone
102149
id: checkout
103150
uses: actions/checkout@v3
104151
with:
105152
submodules: recursive
106153

154+
- name: Install cuda-toolkit
155+
id: cuda-toolkit
156+
if: ${{ matrix.build == 'cuda12' }}
157+
uses: Jimver/[email protected]
158+
with:
159+
cuda: '12.2.0'
160+
method: 'network'
161+
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
162+
163+
- name: Install rocm-toolkit
164+
id: rocm-toolkit
165+
if: ${{ matrix.build == 'rocm5.5' }}
166+
uses: Cyberhan123/[email protected]
167+
with:
168+
rocm: '5.5.0'
169+
170+
- name: Install Ninja
171+
id: install-ninja
172+
if: ${{ matrix.build == 'rocm5.5' }}
173+
uses: urkle/action-get-ninja@v1
174+
with:
175+
version: 1.11.1
176+
107177
- name: Build
108178
id: cmake_build
109179
run: |
@@ -125,12 +195,6 @@ jobs:
125195
& $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
126196
.\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
127197
128-
#- name: Test
129-
#id: cmake_test
130-
#run: |
131-
#cd build
132-
#ctest -C Release --verbose --timeout 900
133-
134198
- name: Get commit hash
135199
id: commit
136200
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
@@ -140,9 +204,37 @@ jobs:
140204
id: pack_artifacts
141205
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
142206
run: |
143-
Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt
144-
Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt
145-
7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip .\build\bin\Release\*
207+
$filePath = ".\build\bin\Release\*"
208+
if (Test-Path $filePath) {
209+
echo "Exists at path $filePath"
210+
Copy-Item ggml/LICENSE .\build\bin\Release\ggml.txt
211+
Copy-Item LICENSE .\build\bin\Release\stable-diffusion.cpp.txt
212+
} elseif (Test-Path ".\build\bin\stable-diffusion.dll") {
213+
$filePath = ".\build\bin\*"
214+
echo "Exists at path $filePath"
215+
Copy-Item ggml/LICENSE .\build\bin\ggml.txt
216+
Copy-Item LICENSE .\build\bin\stable-diffusion.cpp.txt
217+
} else {
218+
ls .\build\bin
219+
throw "Can't find stable-diffusion.dll"
220+
}
221+
7z a sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip $filePath
222+
223+
- name: Copy and pack Cuda runtime
224+
id: pack_cuda_runtime
225+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' && matrix.build == 'cuda12' ) || github.event.inputs.create_release == 'true' }}
226+
run: |
227+
echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
228+
$dst='.\build\bin\cudart\'
229+
robocopy "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
230+
7z a cudart-sd-bin-win-cu12-x64.zip $dst\*
231+
232+
- name: Upload Cuda runtime
233+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' && matrix.build == 'cuda12' ) || github.event.inputs.create_release == 'true' }}
234+
uses: actions/upload-artifact@v3
235+
with:
236+
path: |
237+
cudart-sd-bin-win-cu12-x64.zip
146238
147239
- name: Upload artifacts
148240
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}

CMakeLists.txt

+13-5
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ option(SD_CUBLAS "sd: cuda backend" OFF)
2828
option(SD_HIPBLAS "sd: rocm backend" OFF)
2929
option(SD_METAL "sd: metal backend" OFF)
3030
option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF)
31-
option(BUILD_SHARED_LIBS "sd: build shared libs" OFF)
31+
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
32+
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
3233
#option(SD_BUILD_SERVER "sd: build server example" ON)
3334

3435
if(SD_CUBLAS)
@@ -59,17 +60,24 @@ endif()
5960

6061
set(SD_LIB stable-diffusion)
6162

62-
add_library(${SD_LIB} stable-diffusion.h stable-diffusion.cpp model.h model.cpp util.h util.cpp upscaler.cpp
63-
ggml_extend.hpp clip.hpp common.hpp unet.hpp tae.hpp esrgan.hpp lora.hpp denoiser.hpp rng.hpp rng_philox.hpp
64-
control.hpp preprocessing.hpp)
63+
file(GLOB SD_LIB_SOURCES
64+
"*.h"
65+
"*.cpp"
66+
"*.hpp"
67+
)
6568

66-
if(BUILD_SHARED_LIBS)
69+
# we can get only one share lib
70+
if(SD_BUILD_SHARED_LIBS)
6771
message("Build shared library")
72+
set(BUILD_SHARED_LIBS OFF)
73+
message(${SD_LIB_SOURCES})
74+
add_library(${SD_LIB} SHARED ${SD_LIB_SOURCES})
6875
add_definitions(-DSD_BUILD_SHARED_LIB)
6976
target_compile_definitions(${SD_LIB} PRIVATE -DSD_BUILD_DLL)
7077
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
7178
else()
7279
message("Build static library")
80+
add_library(${SD_LIB} STATIC ${SD_LIB_SOURCES})
7381
endif()
7482

7583

clip.hpp

+60-22
Original file line numberDiff line numberDiff line change
@@ -558,11 +558,14 @@ class CLIPEmbeddings : public GGMLBlock {
558558
auto token_embed_weight = params["token_embedding.weight"];
559559
auto position_embed_weight = params["position_embedding.weight"];
560560

561-
GGML_ASSERT(input_ids->ne[0] <= position_embed_weight->ne[0]);
561+
GGML_ASSERT(input_ids->ne[0] == position_embed_weight->ne[1]);
562+
input_ids = ggml_reshape_3d(ctx, input_ids, input_ids->ne[0], 1, input_ids->ne[1]);
563+
auto token_embedding = ggml_get_rows(ctx, custom_embed_weight != NULL ? custom_embed_weight : token_embed_weight, input_ids);
564+
token_embedding = ggml_reshape_3d(ctx, token_embedding, token_embedding->ne[0], token_embedding->ne[1], token_embedding->ne[3]);
562565

563566
// token_embedding + position_embedding
564567
auto x = ggml_add(ctx,
565-
ggml_get_rows(ctx, custom_embed_weight != NULL ? custom_embed_weight : token_embed_weight, input_ids),
568+
token_embedding,
566569
position_embed_weight); // [N, n_token, embed_dim]
567570
return x;
568571
}
@@ -700,7 +703,7 @@ class CLIPTextModel : public GGMLBlock {
700703
auto final_layer_norm = std::dynamic_pointer_cast<LayerNorm>(blocks["final_layer_norm"]);
701704

702705
auto x = embeddings->forward(ctx, input_ids, tkn_embeddings); // [N, n_token, hidden_size]
703-
x = encoder->forward(ctx, x, return_pooled ? -1 : clip_skip, true);
706+
x = encoder->forward(ctx, x, return_pooled ? -1 : clip_skip, true);
704707
if (return_pooled || with_final_ln) {
705708
x = final_layer_norm->forward(ctx, x);
706709
}
@@ -893,7 +896,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
893896
return true;
894897
}
895898
struct ggml_init_params params;
896-
params.mem_size = 1 * 1024 * 1024; // 1MB
899+
params.mem_size = 10 * 1024 * 1024; // max for custom embeddings 10 MB
897900
params.mem_buffer = NULL;
898901
params.no_alloc = false;
899902
struct ggml_context* embd_ctx = ggml_init(params);
@@ -928,9 +931,21 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
928931
struct ggml_tensor* embeddings,
929932
size_t max_token_idx = 0,
930933
bool return_pooled = false) {
934+
size_t N = input_ids->ne[1];
935+
size_t n_token = input_ids->ne[0];
936+
if (input_ids != NULL && input_ids->ne[0] > text_model.n_token) {
937+
GGML_ASSERT(input_ids->ne[0] % text_model.n_token == 0);
938+
input_ids = ggml_reshape_2d(ctx, input_ids, text_model.n_token, input_ids->ne[0] / text_model.n_token);
939+
}
940+
if (input_ids2 != NULL && input_ids2->ne[0] > text_model2.n_token) {
941+
GGML_ASSERT(input_ids2->ne[0] % text_model2.n_token == 0);
942+
input_ids2 = ggml_reshape_2d(ctx, input_ids2, text_model2.n_token, input_ids2->ne[0] / text_model2.n_token);
943+
}
944+
931945
if (return_pooled) {
932946
return text_model2.forward(ctx, input_ids2, NULL, max_token_idx, return_pooled);
933947
}
948+
934949
auto hidden_states = text_model.forward(ctx, input_ids, embeddings); // [N, n_token, hidden_size]
935950
// LOG_DEBUG("hidden_states: %d %d %d %d", hidden_states->ne[0], hidden_states->ne[1], hidden_states->ne[2], hidden_states->ne[3]);
936951
if (version == VERSION_XL) {
@@ -956,6 +971,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
956971

957972
hidden_states = ggml_cont(ctx, ggml_permute(ctx, hidden_states, 1, 2, 0, 3));
958973
}
974+
hidden_states = ggml_reshape_3d(ctx, hidden_states, hidden_states->ne[0], n_token, N);
959975
// LOG_DEBUG("hidden_states: %d %d %d %d", hidden_states->ne[0], hidden_states->ne[1], hidden_states->ne[2], hidden_states->ne[3]);
960976
return hidden_states;
961977
}
@@ -1061,26 +1077,48 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
10611077
tokens.insert(tokens.end(), curr_tokens.begin(), curr_tokens.end());
10621078
weights.insert(weights.end(), curr_tokens.size(), curr_weight);
10631079
}
1064-
tokens.insert(tokens.begin(), BOS_TOKEN_ID);
1065-
weights.insert(weights.begin(), 1.0);
10661080

1067-
if (max_length > 0) {
1068-
if (tokens.size() > max_length - 1) {
1069-
tokens.resize(max_length - 1);
1070-
weights.resize(max_length - 1);
1071-
tokens.push_back(EOS_TOKEN_ID);
1072-
weights.push_back(1.0);
1073-
} else {
1074-
tokens.push_back(EOS_TOKEN_ID);
1075-
weights.push_back(1.0);
1076-
if (padding) {
1077-
int pad_token_id = PAD_TOKEN_ID;
1078-
if (version == VERSION_2_x) {
1079-
pad_token_id = 0;
1080-
}
1081-
tokens.insert(tokens.end(), max_length - tokens.size(), pad_token_id);
1082-
weights.insert(weights.end(), max_length - weights.size(), 1.0);
1081+
if (max_length > 0 && padding) {
1082+
size_t n = std::ceil(tokens.size() * 1.0 / (max_length - 2));
1083+
if (n == 0) {
1084+
n = 1;
1085+
}
1086+
size_t length = max_length * n;
1087+
LOG_DEBUG("token length: %llu", length);
1088+
std::vector<int> new_tokens;
1089+
std::vector<float> new_weights;
1090+
new_tokens.push_back(BOS_TOKEN_ID);
1091+
new_weights.push_back(1.0);
1092+
int token_idx = 0;
1093+
for (int i = 1; i < length; i++) {
1094+
if (token_idx >= tokens.size()) {
1095+
break;
1096+
}
1097+
if (i % max_length == 0) {
1098+
new_tokens.push_back(BOS_TOKEN_ID);
1099+
new_weights.push_back(1.0);
1100+
} else if (i % max_length == max_length - 1) {
1101+
new_tokens.push_back(EOS_TOKEN_ID);
1102+
new_weights.push_back(1.0);
1103+
} else {
1104+
new_tokens.push_back(tokens[token_idx]);
1105+
new_weights.push_back(weights[token_idx]);
1106+
token_idx++;
1107+
}
1108+
}
1109+
1110+
new_tokens.push_back(EOS_TOKEN_ID);
1111+
new_weights.push_back(1.0);
1112+
tokens = new_tokens;
1113+
weights = new_weights;
1114+
1115+
if (padding) {
1116+
int pad_token_id = PAD_TOKEN_ID;
1117+
if (version == VERSION_2_x) {
1118+
pad_token_id = 0;
10831119
}
1120+
tokens.insert(tokens.end(), length - tokens.size(), pad_token_id);
1121+
weights.insert(weights.end(), length - weights.size(), 1.0);
10841122
}
10851123
}
10861124

0 commit comments

Comments
 (0)