环境准备
- 操作系统:Ubuntu 24.04
- 显卡:RTX 3090 (24G) 或同等性能 NVIDIA GPU
- 基础软件:已安装 Docker 并配置
nvidia-container-toolkit
模型获取
从官方仓库下载 Tongyi-MAI/Z-Image-Turbo 模型权重文件,并解压至服务器指定目录(例如 /data/models/Z-Image-Turbo)。
构建服务代码
1. 后端服务 (zimage_server.py)
新建 Python 文件,代码如下。可根据实际情况修改端口和分辨率。
import os
import torch
import base64
import logging
import uvicorn
from io import BytesIO
from contextlib import asynccontextmanager
from fastapi import FastAPI, Form, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from diffusers import DiffusionPipeline
from fastapi.responses import FileResponse
# --- 配置 ---
MODEL_PATH = "/data/models/Z-Image-Turbo"
HOST = "0.0.0.0"
PORT = 8000
DEVICE = "cuda"
# 日志配置
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
logger = logging.getLogger(__name__)
ml_models = {}
@asynccontextmanager
async def lifespan(app: FastAPI):
logger.info(f"正在加载模型:{MODEL_PATH} ...")
try:
dtype = torch.bfloat16 torch.cuda.is_bf16_supported() torch.float16
pipe = DiffusionPipeline.from_pretrained(
MODEL_PATH,
torch_dtype=dtype,
use_safetensors=
)
pipe.to(DEVICE)
ml_models[] = pipe
logger.info()
Exception e:
logger.error()
e
ml_models.clear()
torch.cuda.is_available():
torch.cuda.empty_cache()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=[],
allow_credentials=,
allow_methods=[],
allow_headers=[]
)
():
FileResponse()
():
ml_models:
HTTPException(status_code=, detail=)
logger.info()
:
pipe = ml_models[]
image = pipe(
prompt=prompt,
height=,
width=,
num_inference_steps=,
guidance_scale=,
output_type=
).images[]
buffer = BytesIO()
image.save(buffer, =)
base64_img = base64.b64encode(buffer.getvalue()).decode()
{: base64_img}
Exception e:
logger.error()
HTTPException(status_code=, detail=(e))
__name__ == :
uvicorn.run(app, host=HOST, port=PORT)


