{ "connection": [ { "from": "neural_prompt", "id": "audio_connection", "to": "audio_marker" }, { "from": "neural_prompt", "id": "neural_thermal_fallback_connection", "pipeline": [ { "id": "neural_adapter", "params": { "add_trailing_punc": false, "end_period": ".", "eos": "~", "max_word_limit": 40, "phone_map": { "!": 2, "#": 3, "$": 4, ",": 5, ".": 6, "145": 7, "145:": 8, "146": 9, "164": 10, ":": 11, ";": 12, "?": 13, "@": 14, "@:": 15, "A": 16, "A:": 17, "C": 18, "D": 19, "E": 20, "E:": 21, "G": 22, "I": 23, "I:": 24, "J": 25, "J:": 26, "K": 27, "L": 28, "N": 29, "O": 30, "O:": 31, "P": 32, "R": 33, "S": 34, "T": 35, "U": 36, "U:": 37, "Y": 38, "Y:": 39, "Z": 40, "^": 41, "^:": 42, "_": 0, "a": 43, "a:": 44, "b": 45, "d": 46, "e": 47, "e:": 48, "f": 49, "g": 50, "h": 51, "i": 52, "i:": 53, "j": 54, "k": 55, "l": 56, "m": 57, "n": 58, "p": 59, "pau": 60, "r": 61, "s": 62, "t": 63, "u": 64, "u:": 65, "v": 66, "w": 67, "y": 68, "y:": 69, "z": 70, "~": 1 }, "punctuation": [ "!", "?", ",", ".", ":", ";" ], "stress_marker": ":", "word_boundary_marker": "#" } }, { "async": true, "id": "fastspeech2_encoder", "params": { "framework_type": "mil2bnns", "model_config": "compact_fastspeech2/fastspeech2_encoder.json", "model_type": "features" } }, { "async": true, "id": "fastspeech2_decoder", "params": { "framework_type": "mil2bnns", "model_config": "compact_fastspeech2/fastspeech2_decoder.json", "model_type": "default" } }, { "id": "neural_rate_change", "params": { "active": false, "frame_shift": 240, "global_rate": 1.0 } }, { "id": "mil_wavernn_fallback", "params": { "frame_size": 240, "framework_type": "mil2bnns", "graph_gru_model_hidden_size": 448, "graph_gru_model_input_input_audio": "input_audio", "graph_gru_model_input_input_mel_linear": "input_mel_linear", "graph_gru_model_input_state": "state", "graph_gru_model_output_even_out": "Identity_1", "graph_gru_model_output_final_state": "Identity_2", "graph_gru_model_output_odd_out": "Identity", "graph_linear_model_input_input_mel": "input_mel", "graph_linear_model_mel_size": 80, "graph_linear_model_output_mel_linear_output": "Identity_3", "gru_model_io_names": [ "Identity_2", "Identity_1", "Identity", "input_mel_linear", "input_audio", "state" ], "gru_model_path": "compact_wavernn/wavernn_gru_model.mlmodelc/model.mil", "linear_model_io_names": [ "input_mel", "Identity_3" ], "linear_model_path": "compact_wavernn/wavernn_linear_model.mlmodelc/model.mil", "model_type": "subscale" } }, { "id": "waveform_rate_change_fallback", "params": { "active": true, "global_rate": 1.0 } }, { "id": "audio_resampling_fallback", "params": { "leading_silence": 50, "sample_rate_in": 24000, "sample_rate_out": 48000 } } ], "to": "audio_marker" } ], "pipeline": [ { "id": "frontend_feature_log" }, { "id": "prompt_partition", "params": { "pause_marker": ",", "pause_threshold": 500, "pause_threshold_base": 200 } }, { "id": "neural_prompt", "params": { "sample_rate": 48000, "spelling_pause_ms": 200 } }, { "id": "hydra_adapter", "params": { "punctuation_map": { "!!": "!", "!?": "?", "?!": "?", "??": "?" } } }, { "id": "neural_adapter", "params": { "add_trailing_punc": false, "end_period": ".", "bos": "*", "eos": "~", "max_word_limit": 40, "max_phone_limit": 200, "pause_marker": "pau", "pause_min_duration": 70.0, "pause_speech_energy": 0.25, "pause_version": 2, "phone_map": { "!": 3, "#": 4, "$": 5, "$:": 6, "*": 2, ",": 7, ".": 8, "145": 9, "145:": 10, "146": 11, "164": 12, ":": 13, ";": 14, "?": 15, "@": 16, "@:": 17, "A": 18, "A:": 19, "C": 20, "D": 21, "E": 22, "E:": 23, "G": 24, "I": 25, "I:": 26, "J": 27, "J:": 28, "K": 29, "L": 30, "N": 31, "O": 32, "O:": 33, "P": 34, "R": 35, "S": 36, "T": 37, "U": 38, "U:": 39, "Y": 40, "Y:": 41, "Z": 42, "^": 43, "^:": 44, "_": 0, "a": 45, "a:": 46, "b": 47, "d": 48, "e": 49, "e:": 50, "f": 51, "g": 52, "h": 53, "i": 54, "i:": 55, "j": 56, "k": 57, "l": 58, "m": 59, "n": 60, "p": 61, "pau": 62, "r": 63, "s": 64, "t": 65, "u": 66, "u:": 67, "v": 68, "w": 69, "y": 70, "y:": 71, "z": 72, "~": 1 }, "punctuation": [ "!", "?", ",", ".", ":", ";" ], "punctuation_set1": [ "?", ".", "!" ], "punctuation_set2": [ ",", ";", ":" ], "stress_marker": ":", "word_boundary_marker": "#" } }, { "id": "fs2s1_encoder", "params": { "model_type": "p2a_dual_optimised", "model_config": "p2a/p2a_dual_prompt_encoder.json", "framework_type": "mil2bnns", "style_prompt_map": { "siri": "prompts_1/", "narration": "prompts_2/", "default": "prompts_1/" }, "encoder_prompt_k": "encoder_prompt_k.bin", "encoder_prompt_v": "encoder_prompt_v.bin", "duration_input_prompt": "dur_input.bin", "fusion_prompt_k": "fusion_prompt_k.bin", "fusion_prompt_v": "fusion_prompt_v.bin", "fusion_cross_prompt_k": "fusion_cross_prompt_k.bin", "fusion_cross_prompt_v": "fusion_cross_prompt_v.bin" } }, { "async": true, "id": "soundstorm_streaming", "params": { "model_type": "p2a_prompt", "model_config": "p2a/p2a_dual_prompt_decoder.json", "framework_type": "mil2bnns", "prompt_v_0": "voice_prompts/prompt_v_vq_0.bin", "prompt_k_0": "voice_prompts/prompt_k_vq_0.bin", "prompt_v_1": "voice_prompts/prompt_v_vq_1.bin", "prompt_k_1": "voice_prompts/prompt_k_vq_1.bin", "prompt_v_2": "voice_prompts/prompt_v_vq_2.bin", "prompt_k_2": "voice_prompts/prompt_k_vq_2.bin", "prompt_v_3": "voice_prompts/prompt_v_vq_3.bin", "prompt_k_3": "voice_prompts/prompt_k_vq_3.bin", "prompt_v_4": "voice_prompts/prompt_v_vq_4.bin", "prompt_k_4": "voice_prompts/prompt_k_vq_4.bin", "prompt_v_5": "voice_prompts/prompt_v_vq_5.bin", "prompt_k_5": "voice_prompts/prompt_k_vq_5.bin", "prompt_v_6": "voice_prompts/prompt_v_vq_6.bin", "prompt_k_6": "voice_prompts/prompt_k_vq_6.bin", "prompt_v_7": "voice_prompts/prompt_v_vq_7.bin", "prompt_k_7": "voice_prompts/prompt_k_vq_7.bin" } }, { "id": "anetec_streaming_decoder", "params": { "model_config": "anetec/anetec_decoder_streaming_inference.json", "framework_type": "mil2bnns" } }, { "id": "waveform_rate_change", "params": { "active": true, "global_rate": 1.0, "sampling_frequency": 24000, "frame_size": 20 } }, { "id": "waveform_volume_change", "params": { "global": 0.8, "peak_ratio": 0.7, "smoothing_window": 120 } }, { "id": "audio_resampling", "params": { "leading_silence": 0, "sample_rate_in": 24000 } }, { "id": "audio_marker" } ] }