{ "connection": [ { "from": "neural_prompt", "id": "audio_connection", "to": "audio_marker" }, { "from": "neural_rate_change", "id": "neural_thermal_fallback_connection", "pipeline": [ { "id": "mil_wavernn_fallback", "params": { "framework_type": "mil2bnns", "model_type": "subscale", "linear_model_path": "wavernn_fallback/wavernn_linear_model.mlmodelc/model.mil", "gru_model_path": "wavernn_fallback/wavernn_gru_model.mlmodelc/model.mil", "graph_linear_model_input_input_mel": "input_mel", "graph_linear_model_output_mel_linear_output": "Identity", "graph_gru_model_input_input_mel_linear": "input_mel_linear", "graph_gru_model_input_input_audio": "input_audio", "graph_gru_model_input_state": "input_state", "graph_gru_model_output_odd_out": "Identity", "graph_gru_model_output_even_out": "Identity_2", "graph_gru_model_output_final_state": "Identity_1", "model_io_dtype": "fp16", "frame_size": 160, "graph_linear_model_mel_size": 80, "graph_gru_model_hidden_size": 352, "linear_model_io_names": [ "input_mel", "Identity" ], "gru_model_io_names": [ "input_mel_linear", "input_audio", "input_state", "Identity_2", "Identity", "Identity_1" ] } }, { "id": "waveform_rate_change_fallback", "params": { "active": true, "global_rate": 1.0 } }, { "id": "audio_resampling_fallback", "params": { "leading_silence": 50, "sample_rate_in": 16000, "sample_rate_out": 48000 } } ], "to": "audio_marker" } ], "pipeline": [ { "id": "hydra_adapter", "params": { "punctuation_map": { "!!": "!", "!?": "?", "?!": "?", "??": "?", "?": "?" } } }, { "id": "frontend_feature_log" }, { "id": "prompt_partition", "params": { "pause_marker": ",", "pause_threshold": 500, "pause_threshold_base": 200 } }, { "id": "neural_prompt", "params": { "sample_rate": 48000, "spelling_pause_ms": 200 } }, { "id": "neural_adapter", "params": { "add_trailing_punc": false, "end_period": ".", "eos": "~", "max_word_limit": 40, "pause_marker": "pau", "pause_min_duration": 70.0, "pause_speech_energy": 0.25, "pause_version": 2, "phone_map": { "!": 2, "\"3": 3, "\"@": 4, "\"@I": 5, "\"@U": 6, "\"A": 7, "\"E": 8, "\"E@": 9, "\"I": 10, "\"I@": 11, "\"O": 12, "\"OI": 13, "\"Q": 14, "\"U": 15, "\"U@": 16, "\"V": 17, "\"a": 18, "\"aI": 19, "\"aU": 20, "\"e": 21, "\"eI": 22, "\"eI@": 23, "\"i": 24, "\"o": 25, "\"u": 26, "\"{": 27, "#": 28, ",": 29, ".": 30, "3": 31, "4": 32, ":": 33, ";": 34, "@": 35, "@I": 36, "@U": 37, "A": 38, "E": 39, "E@": 40, "G": 41, "I": 42, "I@": 43, "J": 44, "N": 45, "O": 46, "OI": 47, "Q": 48, "S": 49, "U": 50, "U@": 51, "V": 52, "Z": 53, "_": 0, "a": 54, "aI": 55, "aU": 56, "b": 57, "b_h": 58, "d": 59, "dZ": 60, "dZ_h": 61, "d_d": 62, "d_h": 63, "d`": 64, "d`_h": 65, "e": 66, "eI": 67, "eI@": 68, "f": 69, "g": 70, "g_h": 71, "h": 72, "h\\": 73, "i": 74, "j": 75, "k": 76, "k_h": 77, "l": 78, "l=": 79, "l`": 80, "m": 81, "m=": 82, "n": 83, "n=": 84, "n`": 85, "o": 86, "p": 87, "p_h": 88, "q": 89, "q_h": 90, "r": 91, "r=": 92, "r\\`": 93, "r`": 94, "r`_h": 95, "s": 96, "s`": 97, "t": 98, "tS": 99, "tS_h": 100, "t_d": 101, "t_h": 102, "t`": 103, "t`_h": 104, "u": 105, "v\\": 106, "w": 107, "z": 108, "{": 109, "pau": 110, "a:": 111, "~": 1 }, "punctuation": [ "!", "?", ",", ".", ":", ";" ], "punctuation_set1": [ "?", ".", "!" ], "punctuation_set2": [ ",", ";", ":" ], "stress_marker": "", "word_boundary_marker": "#" } }, { "async": true, "id": "fastspeech2_encoder", "params": { "model_type": "features", "model_config": "fastspeech2/fastspeech2_encoder.json", "framework_type": "mil2bnns" } }, { "async": true, "id": "fastspeech2_decoder", "params": { "model_type": "default", "model_config": "fastspeech2/fastspeech2_decoder.json", "framework_type": "mil2bnns" } }, { "id": "neural_rate_change", "params": { "active": true, "frame_shift": 240, "global_rate": 1.0 } }, { "id": "mil_wavernn", "params": { "framework_type": "mil2bnns", "model_type": "subscale", "linear_model_path": "wavernn/wavernn_linear_model.mlmodelc/model.mil", "gru_model_path": "wavernn/wavernn_gru_model.mlmodelc/model.mil", "graph_linear_model_input_input_mel": "input_mel", "graph_linear_model_output_mel_linear_output": "Identity", "graph_gru_model_input_input_mel_linear": "input_mel_linear", "graph_gru_model_input_input_audio": "input_audio", "graph_gru_model_input_state": "input_state", "graph_gru_model_output_odd_out": "Identity", "graph_gru_model_output_even_out": "Identity_2", "graph_gru_model_output_final_state": "Identity_1", "model_io_dtype": "fp16", "frame_size": 200, "graph_linear_model_mel_size": 80, "graph_gru_model_hidden_size": 384, "linear_model_io_names": [ "input_mel", "Identity" ], "gru_model_io_names": [ "input_mel_linear", "input_audio", "input_state", "Identity_2", "Identity", "Identity_1" ] } }, { "id": "waveform_rate_change", "params": { "active": true, "global_rate": 1.0, "sampling_frequency": 20000 } }, { "id": "audio_resampling", "params": { "leading_silence": 50, "sample_rate_in": 20000 } }, { "id": "audio_marker" } ] }