开源C++版AI画图大模型框架stable-diffusion.cpp开发使用初体验
stable-diffusion.cpp是一个C++编写的轻量级开源类AIGC大模型框架,可以支持在消费级普通设备上本地部署运行大模型进行AI画图,以及作为依赖库集成的到应用程序中提供类似于网页版stable-diffusion的功能。
(图片来源网络,侵删)
以下基于stable-diffusion.cpp的源码利用C++ api来开发实例demo演示加载本地模型文件输入提示词生成画图,这里采用显卡CUDA加速计算,如果没有显卡也可以直接使用CPU。
项目结构
stable_diffusion_cpp_starter - stable-diffusion.cpp - src |- main.cpp - CMakeLists.txt
有两个前置操作:
- 在系统安装好CUDA Toolkit
- 将stable-diffusion.cpp源码根目录的CMakeLists.txt里面SD_CUBLAS选项打开设为ON
不过,如果没有支持CUDA的显卡,默认采用CPU计算,则可以忽略以上两项
CMakeLists.txt
cmake_minimum_required(VERSION 3.15) project(stable_diffusion_cpp_starter) set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) add_subdirectory(stable-diffusion.cpp) include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/stable-diffusion.cpp ${CMAKE_CURRENT_SOURCE_DIR}/stable-diffusion.cpp/thirdparty ) file(GLOB SRC src/*.h src/*.cpp ) add_executable(${PROJECT_NAME} ${SRC}) target_link_libraries(${PROJECT_NAME} stable-diffusion ${CMAKE_THREAD_LIBS_INIT} # means pthread on unix )
main.cpp
#include #include #include #include #include #include #include #include "stable-diffusion.h" #define STB_IMAGE_IMPLEMENTATION #define STB_IMAGE_STATIC #include "stb_image.h" #define STB_IMAGE_WRITE_IMPLEMENTATION #define STB_IMAGE_WRITE_STATIC #include "stb_image_write.h" #define STB_IMAGE_RESIZE_IMPLEMENTATION #define STB_IMAGE_RESIZE_STATIC #include "stb_image_resize.h" const char* rng_type_to_str[] = { "std_default", "cuda", }; // Names of the sampler method, same order as enum sample_method in stable-diffusion.h const char* sample_method_str[] = { "euler_a", "euler", "heun", "dpm2", "dpm++2s_a", "dpm++2m", "dpm++2mv2", "lcm", }; // Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h const char* schedule_str[] = { "default", "discrete", "karras", "ays", }; const char* modes_str[] = { "txt2img", "img2img", "img2vid", "convert", }; enum SDMode { TXT2IMG, IMG2IMG, IMG2VID, CONVERT, MODE_COUNT }; struct SDParams { int n_threads = -1; SDMode mode = TXT2IMG; std::string model_path; std::string vae_path; std::string taesd_path; std::string esrgan_path; std::string controlnet_path; std::string embeddings_path; std::string stacked_id_embeddings_path; std::string input_id_images_path; sd_type_t wtype = SD_TYPE_COUNT; std::string lora_model_dir; std::string output_path = "output.png"; std::string input_path; std::string control_image_path; std::string prompt; std::string negative_prompt; float min_cfg = 1.0f; float cfg_scale = 7.0f; float style_ratio = 20.f; int clip_skip = -1; // size_t pos = path.find_last_of('/'); if (pos != std::string::npos) { return path.substr(pos + 1); } pos = path.find_last_of('\\'); if (pos != std::string::npos) { return path.substr(pos + 1); } return path; } std::string get_image_params(SDParams params, int64_t seed) { std::string parameter_string = params.prompt + "\n"; if (params.negative_prompt.size() != 0) { parameter_string += "Negative prompt: " + params.negative_prompt + "\n"; } parameter_string += "Steps: " + std::to_string(params.sample_steps) + ", "; parameter_string += "CFG scale: " + std::to_string(params.cfg_scale) + ", "; parameter_string += "Seed: " + std::to_string(seed) + ", "; parameter_string += "Size: " + std::to_string(params.width) + "x" + std::to_string(params.height) + ", "; parameter_string += "Model: " + sd_basename(params.model_path) + ", "; parameter_string += "RNG: " + std::string(rng_type_to_str[params.rng_type]) + ", "; parameter_string += "Sampler: " + std::string(sample_method_str[params.sample_method]); if (params.schedule == KARRAS) { parameter_string += " karras"; } parameter_string += ", "; parameter_string += "Version: stable-diffusion.cpp"; return parameter_string; } /* Enables Printing the log level tag in color using ANSI escape codes */ void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) { SDParams* params = (SDParams*)data; int tag_color; const char* level_str; FILE* out_stream = (level == SD_LOG_ERROR) ? stderr : stdout; if (!log || (!params-verbose && level case SD_LOG_DEBUG: tag_color = 37; level_str = "DEBUG"; break; case SD_LOG_INFO: tag_color = 34; level_str = "INFO"; break; case SD_LOG_WARN: tag_color = 35; level_str = "WARN"; break; case SD_LOG_ERROR: tag_color = 31; level_str = "ERROR"; break; default: /* Potential future-proofing */ tag_color = 33; level_str = "?????"; break; } if (params-color == true) fprintf(out_stream, "\033[%d;1m[%-5s]\033[0m ", tag_color, level_str); else fprintf(out_stream, "[%-5s] ", level_str); fputs(log, out_stream); fflush(out_stream); } int main(int argc, const char* argv[]) { // set sd params const std::string model_path = "./v1-5-pruned-emaonly.ckpt"; const std::string img_output_path = "./gen_img.png"; const std::string prompt = "a cute little dog with flowers"; SDParams params; params.model_path = model_path; params.output_path = img_output_path; params.prompt = prompt; sd_set_log_callback(sd_log_cb, (void*)¶ms); if (params.mode == CONVERT) { bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype); if (!success) { fprintf(stderr, "convert '%s'/'%s' to '%s' failed\n", params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str()); return 1; } else { printf("convert '%s'/'%s' to '%s' success\n", params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str()); return 0; } } if (params.mode == IMG2VID) { fprintf(stderr, "SVD support is broken, do not use it!!!\n"); return 1; } // prepare image buffer bool vae_decode_only = true; uint8_t* input_image_buffer = NULL; uint8_t* control_image_buffer = NULL; if (params.mode == IMG2IMG || params.mode == IMG2VID) { vae_decode_only = false; int c = 0; int width = 0; int height = 0; input_image_buffer = stbi_load(params.input_path.c_str(), &width, &height, &c, 3); if (input_image_buffer == NULL) { fprintf(stderr, "load image from '%s' failed\n", params.input_path.c_str()); return 1; } if (c = 3, but got %d channels\n", c); free(input_image_buffer); return 1; } if (width fprintf(stderr, "error: the width of image must be greater than 0\n"); free(input_image_buffer); return 1; } if (height fprintf(stderr, "error: the height of image must be greater than 0\n"); free(input_image_buffer); return 1; } // Resize input image ... if (params.height != height || params.width != width) { printf("resize input image from %dx%d to %dx%d\n", width, height, params.width, params.height); int resized_height = params.height; int resized_width = params.width; uint8_t* resized_image_buffer = (uint8_t*)malloc(resized_height * resized_width * 3); if (resized_image_buffer == NULL) { fprintf(stderr, "error: allocate memory for resize input image\n"); free(input_image_buffer); return 1; } stbir_resize(input_image_buffer, width, height, 0, resized_image_buffer, resized_width, resized_height, 0, STBIR_TYPE_UINT8, 3 /*RGB channel*/, STBIR_ALPHA_CHANNEL_NONE, 0, STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP, STBIR_FILTER_BOX, STBIR_FILTER_BOX, STBIR_COLORSPACE_SRGB, nullptr); // Save resized result free(input_image_buffer); input_image_buffer = resized_image_buffer; } } // init sd context sd_ctx_t* sd_ctx = new_sd_ctx(params.model_path.c_str(), params.vae_path.c_str(), params.taesd_path.c_str(), params.controlnet_path.c_str(), params.lora_model_dir.c_str(), params.embeddings_path.c_str(), params.stacked_id_embeddings_path.c_str(), vae_decode_only, params.vae_tiling, true, params.n_threads, params.wtype, params.rng_type, params.schedule, params.clip_on_cpu, params.control_net_cpu, params.vae_on_cpu); if (sd_ctx == NULL) { printf("new_sd_ctx_t failed\n"); return 1; } sd_image_t* control_image = NULL; if (params.controlnet_path.size() 0 && params.control_image_path.size() 0) { int c = 0; control_image_buffer = stbi_load(params.control_image_path.c_str(), ¶ms.width, ¶ms.height, &c, 3); if (control_image_buffer == NULL) { fprintf(stderr, "load image from '%s' failed\n", params.control_image_path.c_str()); return 1; } control_image = new sd_image_t{(uint32_t)params.width, (uint32_t)params.height, 3, control_image_buffer}; if (params.canny_preprocess) { // apply preprocessor control_image->data = preprocess_canny(control_image->data, control_image->width, control_image->height, 0.08f, 0.08f, 0.8f, 1.0f, false); } } // generate image sd_image_t* results; if (params.mode == TXT2IMG) { results = txt2img(sd_ctx, params.prompt.c_str(), params.negative_prompt.c_str(), params.clip_skip, params.cfg_scale, params.width, params.height, params.sample_method, params.sample_steps, params.seed, params.batch_count, control_image, params.control_strength, params.style_ratio, params.normalize_input, params.input_id_images_path.c_str()); } else { sd_image_t input_image = {(uint32_t)params.width, (uint32_t)params.height, 3, input_image_buffer}; if (params.mode == IMG2VID) { results = img2vid(sd_ctx, input_image, params.width, params.height, params.video_frames, params.motion_bucket_id, params.fps, params.augmentation_level, params.min_cfg, params.cfg_scale, params.sample_method, params.sample_steps, params.strength, params.seed); if (results == NULL) { printf("generate failed\n"); free_sd_ctx(sd_ctx); return 1; } size_t last = params.output_path.find_last_of("."); std::string dummy_name = last != std::string::npos ? params.output_path.substr(0, last) : params.output_path; for (int i = 0; i 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png"; stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel, results[i].data, 0, get_image_params(params, params.seed + i).c_str()); printf("save result image to '%s'\n", final_image_path.c_str()); free(results[i].data); results[i].data = NULL; } free(results); free_sd_ctx(sd_ctx); return 0; } else { results = img2img(sd_ctx, input_image, params.prompt.c_str(), params.negative_prompt.c_str(), params.clip_skip, params.cfg_scale, params.width, params.height, params.sample_method, params.sample_steps, params.strength, params.seed, params.batch_count, control_image, params.control_strength, params.style_ratio, params.normalize_input, params.input_id_images_path.c_str()); } } if (results == NULL) { printf("generate failed\n"); free_sd_ctx(sd_ctx); return 1; } int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth if (params.esrgan_path.size() > 0 && params.upscale_repeats > 0) { upscaler_ctx_t* upscaler_ctx = new_upscaler_ctx(params.esrgan_path.c_str(), params.n_threads, params.wtype); if (upscaler_ctx == NULL) printf("new_upscaler_ctx failed\n"); else { for (int i = 0; i 0 ? dummy_name + "_" + std::to_string(i + 1) + ".png" : dummy_name + ".png"; stbi_write_png(final_image_path.c_str(), results[i].width, results[i].height, results[i].channel, results[i].data, 0, get_image_params(params, params.seed + i).c_str()); printf("save result image to '%s'\n", final_image_path.c_str()); free(results[i].data); results[i].data = NULL; } free(results); free_sd_ctx(sd_ctx); free(control_image_buffer); free(input_image_buffer); return 0; }
运行结果
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: CUDA_USE_TENSOR_CORES: yes ggml_cuda_init: found 1 CUDA devices: Device 0: NVIDIA GeForce GTX 1060 with Max-Q Design, compute capability 6.1, VMM: yes [INFO ] stable-diffusion.cpp:169 - loading model from './v1-5-pruned-emaonly.ckpt' [INFO ] model.cpp:736 - load ./v1-5-pruned-emaonly.ckpt using checkpoint format [INFO ] stable-diffusion.cpp:192 - Stable Diffusion 1.x [INFO ] stable-diffusion.cpp:198 - Stable Diffusion weight type: f32 [INFO ] stable-diffusion.cpp:419 - total params memory size = 2719.24MB (VRAM 2719.24MB, RAM 0.00MB): clip 469.44MB(VRAM), unet 2155.33MB(VRAM), vae 94.47MB(VRAM), controlnet 0.00MB(VRAM), pmid 0.00MB(VRAM) [INFO ] stable-diffusion.cpp:423 - loading model from './v1-5-pruned-emaonly.ckpt' completed, taking 18.72s [INFO ] stable-diffusion.cpp:440 - running in eps-prediction mode [INFO ] stable-diffusion.cpp:556 - Attempting to apply 0 LoRAs [INFO ] stable-diffusion.cpp:1203 - apply_loras completed, taking 0.00s ggml_gallocr_reserve_n: reallocating CUDA0 buffer from size 0.00 MiB to 1.40 MiB ggml_gallocr_reserve_n: reallocating CUDA0 buffer from size 0.00 MiB to 1.40 MiB [INFO ] stable-diffusion.cpp:1316 - get_learned_condition completed, taking 514 ms [INFO ] stable-diffusion.cpp:1334 - sampling using Euler A method [INFO ] stable-diffusion.cpp:1338 - generating image: 1/1 - seed 42 ggml_gallocr_reserve_n: reallocating CUDA0 buffer from size 0.00 MiB to 559.90 MiB |==================================================| 20/20 - 1.40s/it [INFO ] stable-diffusion.cpp:1381 - sampling completed, taking 35.05s [INFO ] stable-diffusion.cpp:1389 - generating 1 latent images completed, taking 35.07s [INFO ] stable-diffusion.cpp:1392 - decoding 1 latents ggml_gallocr_reserve_n: reallocating CUDA0 buffer from size 0.00 MiB to 1664.00 MiB [INFO ] stable-diffusion.cpp:1402 - latent 1 decoded, taking 3.03s [INFO ] stable-diffusion.cpp:1406 - decode_first_stage completed, taking 3.03s [INFO ] stable-diffusion.cpp:1490 - txt2img completed in 38.64s save result image to './gen_img.png'
注:
- stable_diffusion支持的模型文件需要自己去下载,推荐到huggingface官网下载ckpt格式文件
- 提示词要使用英文
- 支持文字生成图和以图辅助生成图,参数很多,可以多尝试
源码
stable_diffusion_cpp_starter
文章版权声明:除非注明,否则均为主机测评原创文章,转载或复制请以超链接形式并注明出处。