Mac 部署 Qwen3 TTS -

2026-03-14 约 400 字预计阅读 2 分钟 - 次阅读
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94


# 请预先运行以下指令安装一些工具
brew install portaudio ffmpeg sox

# 安装 Conda，地址 https://repo.anaconda.com/archive/
Anaconda3-2025.12-2-MacOSX-arm64.pkg

# 创建虚拟环境
conda create -n qwen3-tts python=3.12 -y

# 激活虚拟环境
conda activate qwen3-tts

# 安装系统依赖
pip install -U qwen-tts

# 进入主目录
cd Qwen3-TTS

# 安装 Python 依赖项
pip install -e .

# 验证 MPS
(qwen3-tts) xxxMac-mini Qwen3-TTS % python
Python 3.12.12 | packaged by Anaconda, Inc. | (main, Oct 21 2025, 20:07:49) [Clang 20.1.8 ] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> print(torch.backends.mps.is_available())
True
>>> print(torch.backends.mps.is_built())
True
>>> exit()

# 测试
cd examples 
python test_model_12hz_base.py

# 报错：SoX could not be found!
# 安装 SOX
brew install sox

# 无效的路径格式：ERROR：Invalid path format detected
# 删除模型路径 "/"
# 修改 MODEL_PATH = "Qwen/Qwen3-TTS-12Hz-1.7B-Base/" 为 MODEL_PATH = "Qwen/Qwen3-TTS-12Hz-1.7B-Base"

# 测试
python test_model_12hz_base.py


# ERROR：ImportError: FlashAttention2 has been toggled on, but it cannot be used the package flash_attn seems to be not installed
# FlashAttention / FlashAttention2 只支持 NVIDIA CUDA GPU，不支持 Apple Silicon
# 修改支持 Mac M 系列芯片的代码
# tts = Qwen3TTSModel.from_pretrained(
  #     MODEL_PATH,
  #     device_map=device,
  #     dtype=torch.bfloat16,
  #     attn_implementation="flash_attention_2",
# )

tts = Qwen3TTSModel.from_pretrained(
  MODEL_PATH,
  torch_dtype=torch.bfloat16,     # M4 芯片完美支持 bfloat16
  attn_implementation="sdpa",     # 改为 sdpa，这是 Mac MPS 硬件加速支持的优化方式
  device_map="mps",               # 强制使用 Mac M4 的 GPU
)

# 测试
python test_model_12hz_base.py

# ERROR：Torch 不支持编译 CUDA 的方法 （M 芯片没有 CUDA）
# 使用 M 系列芯片的同步指令
def run_case(tts: Qwen3TTSModel, out_dir: str, case_name: str, call_fn):
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    elif torch.backends.mps.is_available():
        torch.mps.synchronize() # 这是 Mac M4 对应的同步指令
    # torch.cuda.synchronize()
    t0 = time.time()

    wavs, sr = call_fn()

    if torch.cuda.is_available():
        torch.cuda.synchronize()
    elif torch.backends.mps.is_available():
        torch.mps.synchronize() # 这是 Mac M4 对应的同步指令
    # torch.cuda.synchronize()
    t1 = time.time()
    print(f"[{case_name}] time: {t1 - t0:.3f}s, n_wavs={len(wavs)}, sr={sr}")

    for i, w in enumerate(wavs):
        sf.write(os.path.join(out_dir, f"{case_name}_{i}.wav"), w, sr)


# 测试
python test_model_12hz_base.py