HY-2012 commited on
Commit
c6cd813
·
verified ·
1 Parent(s): 37ab0d3

Update melotts module

Browse files
ax_speech_translate_demo_qwen_api.py CHANGED
@@ -48,7 +48,7 @@ def intersperse(lst, item):
48
  result[1::2] = lst
49
  return result
50
 
51
-
52
  def get_text_for_tts_infer(text, language_str, symbol_to_id=None):
53
  norm_text, phone, tone, word2ph = clean_text(text, language_str)
54
  phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str, symbol_to_id)
@@ -64,6 +64,75 @@ def get_text_for_tts_infer(text, language_str, symbol_to_id=None):
64
  word2ph[0] += 1
65
 
66
  return phone, tone, language, norm_text, word2ph
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  def audio_numpy_concat(segment_data_list, sr, speed=1.):
 
48
  result[1::2] = lst
49
  return result
50
 
51
+ """
52
  def get_text_for_tts_infer(text, language_str, symbol_to_id=None):
53
  norm_text, phone, tone, word2ph = clean_text(text, language_str)
54
  phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str, symbol_to_id)
 
64
  word2ph[0] += 1
65
 
66
  return phone, tone, language, norm_text, word2ph
67
+ """
68
+
69
+ # 处理字符无法不识别
70
+ def get_text_for_tts_infer(text, language_str, symbol_to_id=None):
71
+ """修复版音素处理:确保所有数组长度一致"""
72
+ try:
73
+ norm_text, phone, tone, word2ph = clean_text(text, language_str)
74
+
75
+ # 特殊音素直接映射为空字符串
76
+ phone_mapping = {
77
+ 'ɛ': '', 'æ': '', 'ʌ': '', 'ʊ': '', 'ɔ': '', 'ɪ': '', 'ɝ': '', 'ɚ': '', 'ɑ': '',
78
+ 'ʒ': '', 'θ': '', 'ð': '', 'ŋ': '', 'ʃ': '', 'ʧ': '', 'ʤ': '', 'ː': '', 'ˈ': '',
79
+ 'ˌ': '', 'ʰ': '', 'ʲ': '', 'ʷ': '', 'ʔ': '', 'ɾ': '', 'ɹ': '', 'ɫ': '', 'ɡ': '',
80
+ }
81
+
82
+ # 同步处理 phone 和 tone,确保它们长度一致
83
+ processed_phone = []
84
+ processed_tone = []
85
+ removed_symbols = set()
86
+
87
+ for p, t in zip(phone, tone):
88
+ if p in phone_mapping:
89
+ # 特殊音素直接删除,同时删除对应的 tone
90
+ removed_symbols.add(p)
91
+ elif p in symbol_to_id:
92
+ # 正常音素保留,同时保留对应的 tone
93
+ processed_phone.append(p)
94
+ processed_tone.append(t)
95
+ else:
96
+ # 其他未知音素也删除
97
+ removed_symbols.add(p)
98
+
99
+ # 记录被删除的音素
100
+ if removed_symbols:
101
+ print(f"[音素过滤] 删除了 {len(removed_symbols)} 个特殊音素: {sorted(removed_symbols)}")
102
+ print(f"[音素过滤] 处理后音素序列长度: {len(processed_phone)}")
103
+ print(f"[音素过滤] 处理后音调序列长度: {len(processed_tone)}")
104
+
105
+ # 如果没有有效音素,使用默认音素,
106
+ if not processed_phone:
107
+ print("[警告] 没有有效音素,使用默认中文音素")
108
+ processed_phone = ['ni', 'hao']
109
+ processed_tone = ['1', '3']
110
+ word2ph = [1, 1]
111
+
112
+ # 确保 word2ph 的长度与处理后的音素序列匹配
113
+ if len(processed_phone) != len(phone):
114
+ print(f"[警告] 音素序列长度变化: {len(phone)} -> {len(processed_phone)}")
115
+ # 简单处理:重新计算 word2ph
116
+ word2ph = [1] * len(processed_phone)
117
+
118
+ phone, tone, language = cleaned_text_to_sequence(processed_phone, processed_tone, language_str, symbol_to_id)
119
+
120
+ phone = intersperse(phone, 0)
121
+ tone = intersperse(tone, 0)
122
+ language = intersperse(language, 0)
123
+
124
+ phone = np.array(phone, dtype=np.int32)
125
+ tone = np.array(tone, dtype=np.int32)
126
+ language = np.array(language, dtype=np.int32)
127
+ word2ph = np.array(word2ph, dtype=np.int32) * 2
128
+ word2ph[0] += 1
129
+ return phone, tone, language, norm_text, word2ph
130
+
131
+ except Exception as e:
132
+ print(f"[错误] 文本处理失败: {e}")
133
+ import traceback
134
+ traceback.print_exc()
135
+ raise e
136
 
137
 
138
  def audio_numpy_concat(segment_data_list, sr, speed=1.):