{ "connection": [ { "from": "neural_prompt", "id": "audio_connection", "to": "audio_marker" }, { "from": "neural_rate_change", "id": "neural_thermal_fallback_connection", "pipeline": [ { "id": "mil_wavernn_fallback", "params": { "framework_type": "mil2bnns", "model_type": "subscale", "linear_model_path": "wavernn_fallback/wavernn_linear_model.mlmodelc/model.mil", "gru_model_path": "wavernn_fallback/wavernn_gru_model.mlmodelc/model.mil", "graph_linear_model_input_input_mel": "input_mel", "graph_linear_model_output_mel_linear_output": "Identity", "graph_gru_model_input_input_mel_linear": "input_mel_linear", "graph_gru_model_input_input_audio": "input_audio", "graph_gru_model_input_state": "input_state", "graph_gru_model_output_odd_out": "Identity", "graph_gru_model_output_even_out": "Identity_2", "graph_gru_model_output_final_state": "Identity_1", "model_io_dtype": "fp16", "frame_size": 160, "graph_linear_model_mel_size": 80, "graph_gru_model_hidden_size": 352, "linear_model_io_names": [ "input_mel", "Identity" ], "gru_model_io_names": [ "input_mel_linear", "input_audio", "input_state", "Identity_2", "Identity", "Identity_1" ] } }, { "id": "waveform_rate_change_fallback", "params": { "active": true, "global_rate": 1.0 } }, { "id": "audio_resampling_fallback", "params": { "leading_silence": 50, "sample_rate_in": 16000, "sample_rate_out": 48000 } } ], "to": "audio_marker" } ], "pipeline": [ { "id": "hydra_adapter", "params": { "punctuation_map": { "!!": "!", "!?": "?", "?!": "?", "??": "?" } } }, { "id": "frontend_feature_log" }, { "id": "prompt_partition", "params": { "pause_marker": ",", "pause_threshold": 500, "pause_threshold_base": 200 } }, { "id": "neural_prompt", "params": { "sample_rate": 48000, "spelling_pause_ms": 200 } }, { "id": "neural_adapter", "params": { "add_trailing_punc": false, "end_period": ".", "eos": "~", "max_word_limit": 40, "pause_marker": "pau", "pause_min_duration": 70.0, "pause_speech_energy": 0.25, "pause_version": 2, "phone_map": { "!": 2, "#": 3, ",": 4, ".": 5, "2": 6, "2:": 7, ":": 8, ";": 9, "E": 10, "E:": 11, "G": 12, "I": 13, "I:": 14, "M": 15, "M:": 16, "N": 17, "S": 18, "Z": 19, "_": 0, "a": 20, "a:": 21, "b": 22, "d": 23, "e": 24, "e:": 25, "f": 26, "g": 27, "h": 28, "i": 29, "i:": 30, "j": 31, "k": 32, "l": 33, "m": 34, "n": 35, "o": 36, "o:": 37, "p": 38, "pau": 39, "q": 40, "r": 41, "s": 42, "t": 43, "tS": 44, "u": 45, "u:": 46, "v": 47, "w": 48, "x": 49, "y": 50, "y:": 51, "z": 52, "{": 53, "{:": 54, "~": 1 }, "punctuation": [ "!", "?", ",", ".", ":", ";" ], "punctuation_set1": [ "?", ".", "!" ], "punctuation_set2": [ ",", ";", ":" ], "stress_marker": ":", "word_boundary_marker": "#" } }, { "async": true, "id": "fastspeech2_encoder", "params": { "model_type": "default", "model_config": "fastspeech2/fastspeech2_encoder.json", "framework_type": "mil2bnns" } }, { "async": true, "id": "fastspeech2_decoder", "params": { "model_type": "default", "model_config": "fastspeech2/fastspeech2_decoder.json", "framework_type": "mil2bnns" } }, { "id": "neural_rate_change", "params": { "active": true, "frame_shift": 240, "global_rate": 1.0 } }, { "id": "mil_wavernn", "params": { "framework_type": "mil2bnns", "model_type": "subscale", "linear_model_path": "wavernn/wavernn_linear_model.mlmodelc/model.mil", "gru_model_path": "wavernn/wavernn_gru_model.mlmodelc/model.mil", "graph_linear_model_input_input_mel": "input_mel", "graph_linear_model_output_mel_linear_output": "Identity", "graph_gru_model_input_input_mel_linear": "input_mel_linear", "graph_gru_model_input_input_audio": "input_audio", "graph_gru_model_input_state": "input_state", "graph_gru_model_output_odd_out": "Identity", "graph_gru_model_output_even_out": "Identity_2", "graph_gru_model_output_final_state": "Identity_1", "model_io_dtype": "fp16", "frame_size": 200, "graph_linear_model_mel_size": 80, "graph_gru_model_hidden_size": 384, "linear_model_io_names": [ "input_mel", "Identity" ], "gru_model_io_names": [ "input_mel_linear", "input_audio", "input_state", "Identity_2", "Identity", "Identity_1" ] } }, { "id": "waveform_rate_change", "params": { "active": true, "global_rate": 1.0, "sampling_frequency": 20000 } }, { "id": "waveform_volume_change", "params": { "global": 0.8, "peak_ratio": 0.7, "smoothing_window": 120 } }, { "id": "audio_resampling", "params": { "leading_silence": 50, "sample_rate_in": 20000 } }, { "id": "audio_marker" } ] }