{ "storage": "unilm_joint.espresso.weights", "analyses": { "N8Espresso23analysis_debug_metadataE": { "path": "", "bundle": "", "product": "", "use_case": "", "name": "LOCALHOST-2023-05-01-ANE-quant", "version": "" } }, "properties": { "mldb_token" : "mldb-5qbg63zgxe" }, "format_version": 200, "metadata_in_weights": [], "layers": [ { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": -1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 3, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "input", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "24", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "24", "begin_48": 0, "top": "24_0,24_1,24_2", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "nB": 15000, "top": "60", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 3, "W_t_int8": 5 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "24_0", "debug_info": "", "has_tanh": 0, "nd_mode": true, "name": "inner_product_0", "has_prelu": 0 }, { "nB": 256, "top": "63", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 7, "W_t_int8": 9 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "24_1", "debug_info": "", "has_tanh": 0, "nd_mode": true, "name": "63", "has_prelu": 0 }, { "nB": 2, "top": "66", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 11, "W_t_int8": 13 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "24_2", "debug_info": "", "has_tanh": 0, "nd_mode": true, "name": "66", "has_prelu": 0 }, { "bottom": "60,63", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "68", "top": "68", "type": "elementwise", "name": "68", "beta": 0 }, { "bottom": "68,66", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "69", "top": "69", "type": "elementwise", "name": "69", "beta": 0 }, { "axis_seq": 4, "top": "transpose_8", "name": "transpose_8", "axis_n": 3, "axis_h": 1, "type": "transpose", "axis_w": 2, "bottom": "69", "hint_fallback_from_cpu": 1, "axis_k": 0, "debug_info": "transpose_8", "weights": {} }, { "axis_mode": 4, "bottom": "transpose_8", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.3", "use_version": 1, "top": "channels_mean.3", "type": "reduce", "name": "channels_mean.3" }, { "alpha": -1, "bottom": "channels_mean.3", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.3", "top": "_neg_y_zero_mean.3", "type": "activation", "name": "_neg_y_zero_mean.3", "beta": 0 }, { "bottom": "transpose_8,_neg_y_zero_mean.3", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.3", "top": "zero_mean.3", "type": "elementwise", "name": "zero_mean.3", "beta": 0 }, { "bottom": "zero_mean.3,zero_mean.3", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.3", "top": "zero_mean_sq.3", "type": "elementwise", "name": "zero_mean_sq.3", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.3", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "95", "use_version": 1, "top": "95", "type": "reduce", "name": "95" }, { "bottom": "95", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "97", "top": "97", "type": "elementwise", "name": "97", "beta": 0 }, { "bottom": "97", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.3", "top": "denom.3", "type": "elementwise", "name": "denom.3", "beta": 0 }, { "bottom": "zero_mean.3,denom.3", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.3", "top": "out.3", "type": "elementwise", "name": "out.3", "beta": 0 }, { "top": "101", "w": 1, "h": 1, "name": "101", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "101", "n": 1, "weights": {}, "constant_blob": 293 }, { "bottom": "out.3,101", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "102", "top": "102", "type": "elementwise", "name": "102", "beta": 0 }, { "top": "104", "w": 1, "h": 1, "name": "104", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "104", "n": 1, "weights": {}, "constant_blob": 17 }, { "bottom": "102,104", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "k.3", "top": "k.3", "type": "elementwise", "name": "k.3", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "119", "K": 512, "blob_biases": 21, "name": "119", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.3", "weights": { "per_ch_qscale": 23, "W_S8": 19, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "k.5", "K": 512, "blob_biases": 27, "name": "k.5", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.3", "weights": { "per_ch_qscale": 29, "W_S8": 25, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "133", "K": 512, "blob_biases": 33, "name": "133", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.3", "weights": { "per_ch_qscale": 35, "W_S8": 31, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "119", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "134", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "134", "begin_48": 0, "top": "134_0,134_1,134_2,134_3,134_4,134_5,134_6,134_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 1, "axis_w": 2, "bottom": "k.5", "axis_k": 0, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "transpose_7", "top": "transpose_7", "type": "transpose", "name": "transpose_7" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 3, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "transpose_7", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "144", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "144", "begin_48": 0, "top": "144_0,144_1,144_2,144_3,144_4,144_5,144_6,144_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "133", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "153", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "153", "begin_48": 0, "top": "153_0,153_1,153_2,153_3,153_4,153_5,153_6,153_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "144_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "163_transpose_x", "top": "144_0_transposed", "type": "transpose", "name": "163_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "134_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "163_transpose_y", "top": "134_0_transposed", "type": "transpose", "name": "163_transpose_y" }, { "bottom": "144_0_transposed,134_0_transposed", "weights": {}, "debug_info": "163_batch_matmul", "top": "163_pre_transpose", "type": "batch_matmul", "name": "163_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "163_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "163", "top": "163", "type": "transpose", "name": "163" }, { "bottom": "163", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "165", "top": "165", "type": "elementwise", "name": "165", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "144_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "167_transpose_x", "top": "144_1_transposed", "type": "transpose", "name": "167_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "134_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "167_transpose_y", "top": "134_1_transposed", "type": "transpose", "name": "167_transpose_y" }, { "bottom": "144_1_transposed,134_1_transposed", "weights": {}, "debug_info": "167_batch_matmul", "top": "167_pre_transpose", "type": "batch_matmul", "name": "167_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "167_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "167", "top": "167", "type": "transpose", "name": "167" }, { "bottom": "167", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "169", "top": "169", "type": "elementwise", "name": "169", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "144_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "171_transpose_x", "top": "144_2_transposed", "type": "transpose", "name": "171_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "134_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "171_transpose_y", "top": "134_2_transposed", "type": "transpose", "name": "171_transpose_y" }, { "bottom": "144_2_transposed,134_2_transposed", "weights": {}, "debug_info": "171_batch_matmul", "top": "171_pre_transpose", "type": "batch_matmul", "name": "171_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "171_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "171", "top": "171", "type": "transpose", "name": "171" }, { "bottom": "171", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "173", "top": "173", "type": "elementwise", "name": "173", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "144_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "175_transpose_x", "top": "144_3_transposed", "type": "transpose", "name": "175_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "134_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "175_transpose_y", "top": "134_3_transposed", "type": "transpose", "name": "175_transpose_y" }, { "bottom": "144_3_transposed,134_3_transposed", "weights": {}, "debug_info": "175_batch_matmul", "top": "175_pre_transpose", "type": "batch_matmul", "name": "175_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "175_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "175", "top": "175", "type": "transpose", "name": "175" }, { "bottom": "175", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "177", "top": "177", "type": "elementwise", "name": "177", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "144_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "179_transpose_x", "top": "144_4_transposed", "type": "transpose", "name": "179_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "134_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "179_transpose_y", "top": "134_4_transposed", "type": "transpose", "name": "179_transpose_y" }, { "bottom": "144_4_transposed,134_4_transposed", "weights": {}, "debug_info": "179_batch_matmul", "top": "179_pre_transpose", "type": "batch_matmul", "name": "179_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "179_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "179", "top": "179", "type": "transpose", "name": "179" }, { "bottom": "179", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "181", "top": "181", "type": "elementwise", "name": "181", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "144_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "183_transpose_x", "top": "144_5_transposed", "type": "transpose", "name": "183_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "134_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "183_transpose_y", "top": "134_5_transposed", "type": "transpose", "name": "183_transpose_y" }, { "bottom": "144_5_transposed,134_5_transposed", "weights": {}, "debug_info": "183_batch_matmul", "top": "183_pre_transpose", "type": "batch_matmul", "name": "183_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "183_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "183", "top": "183", "type": "transpose", "name": "183" }, { "bottom": "183", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "185", "top": "185", "type": "elementwise", "name": "185", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "144_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "187_transpose_x", "top": "144_6_transposed", "type": "transpose", "name": "187_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "134_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "187_transpose_y", "top": "134_6_transposed", "type": "transpose", "name": "187_transpose_y" }, { "bottom": "144_6_transposed,134_6_transposed", "weights": {}, "debug_info": "187_batch_matmul", "top": "187_pre_transpose", "type": "batch_matmul", "name": "187_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "187_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "187", "top": "187", "type": "transpose", "name": "187" }, { "bottom": "187", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "189", "top": "189", "type": "elementwise", "name": "189", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "144_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "191_transpose_x", "top": "144_7_transposed", "type": "transpose", "name": "191_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "134_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "191_transpose_y", "top": "134_7_transposed", "type": "transpose", "name": "191_transpose_y" }, { "bottom": "144_7_transposed,134_7_transposed", "weights": {}, "debug_info": "191_batch_matmul", "top": "191_pre_transpose", "type": "batch_matmul", "name": "191_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "191_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "191", "top": "191", "type": "transpose", "name": "191" }, { "bottom": "191", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "193", "top": "193", "type": "elementwise", "name": "193", "beta": 0 }, { "bottom": "165,169,173,177,181,185,189,193", "weights": {}, "nd_mode": true, "axis": 2, "debug_info": "attn_weights.2", "top": "attn_weights.2", "type": "general_concat", "name": "attn_weights.2" }, { "bottom": "qk_mask", "alpha": -10000, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "qk_mask.1", "type": "elementwise", "name": "qk_mask/scaling", "beta": 0 }, { "bottom": "attn_weights.2,qk_mask.1", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "attn_weights0.2", "nd_mode": true, "top": "attn_weights0.2", "type": "elementwise", "name": "attn_weights0.2", "beta": 0 }, { "bottom": "attn_weights0.2", "weights": {}, "debug_info": "input.3", "top": "input.3", "C": 2, "type": "softmax", "name": "input.3" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "input.3", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "199", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "199", "begin_48": 0, "top": "199_0,199_1,199_2,199_3,199_4,199_5,199_6,199_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "153_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "209_transpose_x", "top": "153_0_transposed", "type": "transpose", "name": "209_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "199_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "209_transpose_y", "top": "199_0_transposed", "type": "transpose", "name": "209_transpose_y" }, { "bottom": "153_0_transposed,199_0_transposed", "weights": {}, "debug_info": "209_batch_matmul", "top": "209_pre_transpose", "type": "batch_matmul", "name": "209_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "209_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "209", "top": "209", "type": "transpose", "name": "209" }, { "axis_h": 2, "axis_w": 0, "bottom": "153_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "211_transpose_x", "top": "153_1_transposed", "type": "transpose", "name": "211_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "199_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "211_transpose_y", "top": "199_1_transposed", "type": "transpose", "name": "211_transpose_y" }, { "bottom": "153_1_transposed,199_1_transposed", "weights": {}, "debug_info": "211_batch_matmul", "top": "211_pre_transpose", "type": "batch_matmul", "name": "211_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "211_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "211", "top": "211", "type": "transpose", "name": "211" }, { "axis_h": 2, "axis_w": 0, "bottom": "153_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "213_transpose_x", "top": "153_2_transposed", "type": "transpose", "name": "213_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "199_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "213_transpose_y", "top": "199_2_transposed", "type": "transpose", "name": "213_transpose_y" }, { "bottom": "153_2_transposed,199_2_transposed", "weights": {}, "debug_info": "213_batch_matmul", "top": "213_pre_transpose", "type": "batch_matmul", "name": "213_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "213_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "213", "top": "213", "type": "transpose", "name": "213" }, { "axis_h": 2, "axis_w": 0, "bottom": "153_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "215_transpose_x", "top": "153_3_transposed", "type": "transpose", "name": "215_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "199_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "215_transpose_y", "top": "199_3_transposed", "type": "transpose", "name": "215_transpose_y" }, { "bottom": "153_3_transposed,199_3_transposed", "weights": {}, "debug_info": "215_batch_matmul", "top": "215_pre_transpose", "type": "batch_matmul", "name": "215_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "215_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "215", "top": "215", "type": "transpose", "name": "215" }, { "axis_h": 2, "axis_w": 0, "bottom": "153_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "217_transpose_x", "top": "153_4_transposed", "type": "transpose", "name": "217_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "199_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "217_transpose_y", "top": "199_4_transposed", "type": "transpose", "name": "217_transpose_y" }, { "bottom": "153_4_transposed,199_4_transposed", "weights": {}, "debug_info": "217_batch_matmul", "top": "217_pre_transpose", "type": "batch_matmul", "name": "217_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "217_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "217", "top": "217", "type": "transpose", "name": "217" }, { "axis_h": 2, "axis_w": 0, "bottom": "153_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "219_transpose_x", "top": "153_5_transposed", "type": "transpose", "name": "219_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "199_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "219_transpose_y", "top": "199_5_transposed", "type": "transpose", "name": "219_transpose_y" }, { "bottom": "153_5_transposed,199_5_transposed", "weights": {}, "debug_info": "219_batch_matmul", "top": "219_pre_transpose", "type": "batch_matmul", "name": "219_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "219_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "219", "top": "219", "type": "transpose", "name": "219" }, { "axis_h": 2, "axis_w": 0, "bottom": "153_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "221_transpose_x", "top": "153_6_transposed", "type": "transpose", "name": "221_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "199_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "221_transpose_y", "top": "199_6_transposed", "type": "transpose", "name": "221_transpose_y" }, { "bottom": "153_6_transposed,199_6_transposed", "weights": {}, "debug_info": "221_batch_matmul", "top": "221_pre_transpose", "type": "batch_matmul", "name": "221_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "221_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "221", "top": "221", "type": "transpose", "name": "221" }, { "axis_h": 2, "axis_w": 0, "bottom": "153_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "223_transpose_x", "top": "153_7_transposed", "type": "transpose", "name": "223_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "199_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "223_transpose_y", "top": "199_7_transposed", "type": "transpose", "name": "223_transpose_y" }, { "bottom": "153_7_transposed,199_7_transposed", "weights": {}, "debug_info": "223_batch_matmul", "top": "223_pre_transpose", "type": "batch_matmul", "name": "223_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "223_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "223", "top": "223", "type": "transpose", "name": "223" }, { "weights": {}, "debug_info": "attn.6", "top": "attn.6", "type": "concat", "name": "attn.6", "bottom": "209,211,213,215,217,219,221,223" }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "attn.8", "K": 512, "blob_biases": 39, "name": "attn.8", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "attn.6", "weights": { "per_ch_qscale": 41, "W_S8": 37, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "transpose_8,attn.8", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs.3", "top": "inputs.3", "type": "elementwise", "name": "inputs.3", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs.3", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.5", "use_version": 1, "top": "channels_mean.5", "type": "reduce", "name": "channels_mean.5" }, { "alpha": -1, "bottom": "channels_mean.5", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.5", "top": "_neg_y_zero_mean.5", "type": "activation", "name": "_neg_y_zero_mean.5", "beta": 0 }, { "bottom": "inputs.3,_neg_y_zero_mean.5", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.5", "top": "zero_mean.5", "type": "elementwise", "name": "zero_mean.5", "beta": 0 }, { "bottom": "zero_mean.5,zero_mean.5", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.5", "top": "zero_mean_sq.5", "type": "elementwise", "name": "zero_mean_sq.5", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.5", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "244", "use_version": 1, "top": "244", "type": "reduce", "name": "244" }, { "bottom": "244", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "246", "top": "246", "type": "elementwise", "name": "246", "beta": 0 }, { "bottom": "246", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.5", "top": "denom.5", "type": "elementwise", "name": "denom.5", "beta": 0 }, { "bottom": "zero_mean.5,denom.5", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.5", "top": "out.5", "type": "elementwise", "name": "out.5", "beta": 0 }, { "top": "250", "w": 1, "h": 1, "name": "250", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "250", "n": 1, "weights": {}, "constant_blob": 297 }, { "bottom": "out.5,250", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "251", "top": "251", "type": "elementwise", "name": "251", "beta": 0 }, { "top": "253", "w": 1, "h": 1, "name": "253", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "253", "n": 1, "weights": {}, "constant_blob": 45 }, { "bottom": "251,253", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.5", "top": "input.5", "type": "elementwise", "name": "input.5", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "x.3", "K": 512, "blob_biases": 49, "name": "x.3", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 2048, "bottom": "input.5", "weights": { "per_ch_qscale": 51, "W_S8": 47, "per_ch_qbias": 299 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "x.3", "alpha": 1.7020000219345093, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "265", "top": "265", "type": "elementwise", "name": "265", "beta": 0 }, { "bottom": "265", "weights": {}, "mode": 3, "debug_info": "266", "top": "266", "type": "activation", "name": "266" }, { "bottom": "x.3,266", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.7", "top": "input.7", "type": "elementwise", "name": "input.7", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "input0.3", "K": 2048, "blob_biases": 55, "name": "input0.3", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "input.7", "weights": { "per_ch_qscale": 57, "W_S8": 53, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "input0.3,inputs.3", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "276", "top": "276", "type": "elementwise", "name": "276", "beta": 0 }, { "bottom": "transpose_8", "alpha": 0, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "278", "top": "278", "type": "elementwise", "name": "278", "beta": 0 }, { "bottom": "278,276", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs.2", "top": "inputs.2", "type": "elementwise", "name": "inputs.2", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs.2", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.7", "use_version": 1, "top": "channels_mean.7", "type": "reduce", "name": "channels_mean.7" }, { "alpha": -1, "bottom": "channels_mean.7", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.7", "top": "_neg_y_zero_mean.7", "type": "activation", "name": "_neg_y_zero_mean.7", "beta": 0 }, { "bottom": "inputs.2,_neg_y_zero_mean.7", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.7", "top": "zero_mean.7", "type": "elementwise", "name": "zero_mean.7", "beta": 0 }, { "bottom": "zero_mean.7,zero_mean.7", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.7", "top": "zero_mean_sq.7", "type": "elementwise", "name": "zero_mean_sq.7", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.7", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "293", "use_version": 1, "top": "293", "type": "reduce", "name": "293" }, { "bottom": "293", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "295", "top": "295", "type": "elementwise", "name": "295", "beta": 0 }, { "bottom": "295", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.7", "top": "denom.7", "type": "elementwise", "name": "denom.7", "beta": 0 }, { "bottom": "zero_mean.7,denom.7", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.7", "top": "out.7", "type": "elementwise", "name": "out.7", "beta": 0 }, { "top": "299", "w": 1, "h": 1, "name": "299", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "299", "n": 1, "weights": {}, "constant_blob": 301 }, { "bottom": "out.7,299", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "300", "top": "300", "type": "elementwise", "name": "300", "beta": 0 }, { "top": "302", "w": 1, "h": 1, "name": "302", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "302", "n": 1, "weights": {}, "constant_blob": 61 }, { "bottom": "300,302", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "k.7", "top": "k.7", "type": "elementwise", "name": "k.7", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "317", "K": 512, "blob_biases": 65, "name": "317", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.7", "weights": { "per_ch_qscale": 67, "W_S8": 63, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "k.9", "K": 512, "blob_biases": 71, "name": "k.9", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.7", "weights": { "per_ch_qscale": 73, "W_S8": 69, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "331", "K": 512, "blob_biases": 77, "name": "331", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.7", "weights": { "per_ch_qscale": 79, "W_S8": 75, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "317", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "332", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "332", "begin_48": 0, "top": "332_0,332_1,332_2,332_3,332_4,332_5,332_6,332_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 1, "axis_w": 2, "bottom": "k.9", "axis_k": 0, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "transpose_6", "top": "transpose_6", "type": "transpose", "name": "transpose_6" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 3, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "transpose_6", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "342", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "342", "begin_48": 0, "top": "342_0,342_1,342_2,342_3,342_4,342_5,342_6,342_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "331", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "351", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "351", "begin_48": 0, "top": "351_0,351_1,351_2,351_3,351_4,351_5,351_6,351_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "342_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "361_transpose_x", "top": "342_0_transposed", "type": "transpose", "name": "361_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "332_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "361_transpose_y", "top": "332_0_transposed", "type": "transpose", "name": "361_transpose_y" }, { "bottom": "342_0_transposed,332_0_transposed", "weights": {}, "debug_info": "361_batch_matmul", "top": "361_pre_transpose", "type": "batch_matmul", "name": "361_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "361_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "361", "top": "361", "type": "transpose", "name": "361" }, { "bottom": "361", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "363", "top": "363", "type": "elementwise", "name": "363", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "342_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "365_transpose_x", "top": "342_1_transposed", "type": "transpose", "name": "365_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "332_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "365_transpose_y", "top": "332_1_transposed", "type": "transpose", "name": "365_transpose_y" }, { "bottom": "342_1_transposed,332_1_transposed", "weights": {}, "debug_info": "365_batch_matmul", "top": "365_pre_transpose", "type": "batch_matmul", "name": "365_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "365_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "365", "top": "365", "type": "transpose", "name": "365" }, { "bottom": "365", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "367", "top": "367", "type": "elementwise", "name": "367", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "342_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "369_transpose_x", "top": "342_2_transposed", "type": "transpose", "name": "369_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "332_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "369_transpose_y", "top": "332_2_transposed", "type": "transpose", "name": "369_transpose_y" }, { "bottom": "342_2_transposed,332_2_transposed", "weights": {}, "debug_info": "369_batch_matmul", "top": "369_pre_transpose", "type": "batch_matmul", "name": "369_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "369_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "369", "top": "369", "type": "transpose", "name": "369" }, { "bottom": "369", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "371", "top": "371", "type": "elementwise", "name": "371", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "342_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "373_transpose_x", "top": "342_3_transposed", "type": "transpose", "name": "373_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "332_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "373_transpose_y", "top": "332_3_transposed", "type": "transpose", "name": "373_transpose_y" }, { "bottom": "342_3_transposed,332_3_transposed", "weights": {}, "debug_info": "373_batch_matmul", "top": "373_pre_transpose", "type": "batch_matmul", "name": "373_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "373_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "373", "top": "373", "type": "transpose", "name": "373" }, { "bottom": "373", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "375", "top": "375", "type": "elementwise", "name": "375", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "342_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "377_transpose_x", "top": "342_4_transposed", "type": "transpose", "name": "377_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "332_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "377_transpose_y", "top": "332_4_transposed", "type": "transpose", "name": "377_transpose_y" }, { "bottom": "342_4_transposed,332_4_transposed", "weights": {}, "debug_info": "377_batch_matmul", "top": "377_pre_transpose", "type": "batch_matmul", "name": "377_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "377_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "377", "top": "377", "type": "transpose", "name": "377" }, { "bottom": "377", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "379", "top": "379", "type": "elementwise", "name": "379", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "342_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "381_transpose_x", "top": "342_5_transposed", "type": "transpose", "name": "381_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "332_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "381_transpose_y", "top": "332_5_transposed", "type": "transpose", "name": "381_transpose_y" }, { "bottom": "342_5_transposed,332_5_transposed", "weights": {}, "debug_info": "381_batch_matmul", "top": "381_pre_transpose", "type": "batch_matmul", "name": "381_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "381_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "381", "top": "381", "type": "transpose", "name": "381" }, { "bottom": "381", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "383", "top": "383", "type": "elementwise", "name": "383", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "342_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "385_transpose_x", "top": "342_6_transposed", "type": "transpose", "name": "385_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "332_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "385_transpose_y", "top": "332_6_transposed", "type": "transpose", "name": "385_transpose_y" }, { "bottom": "342_6_transposed,332_6_transposed", "weights": {}, "debug_info": "385_batch_matmul", "top": "385_pre_transpose", "type": "batch_matmul", "name": "385_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "385_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "385", "top": "385", "type": "transpose", "name": "385" }, { "bottom": "385", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "387", "top": "387", "type": "elementwise", "name": "387", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "342_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "389_transpose_x", "top": "342_7_transposed", "type": "transpose", "name": "389_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "332_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "389_transpose_y", "top": "332_7_transposed", "type": "transpose", "name": "389_transpose_y" }, { "bottom": "342_7_transposed,332_7_transposed", "weights": {}, "debug_info": "389_batch_matmul", "top": "389_pre_transpose", "type": "batch_matmul", "name": "389_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "389_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "389", "top": "389", "type": "transpose", "name": "389" }, { "bottom": "389", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "391", "top": "391", "type": "elementwise", "name": "391", "beta": 0 }, { "bottom": "363,367,371,375,379,383,387,391", "weights": {}, "nd_mode": true, "axis": 2, "debug_info": "attn_weights.4", "top": "attn_weights.4", "type": "general_concat", "name": "attn_weights.4" }, { "bottom": "attn_weights.4,qk_mask.1", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "attn_weights0.4", "nd_mode": true, "top": "attn_weights0.4", "type": "elementwise", "name": "attn_weights0.4", "beta": 0 }, { "bottom": "attn_weights0.4", "weights": {}, "debug_info": "input.9", "top": "input.9", "C": 2, "type": "softmax", "name": "input.9" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "input.9", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "397", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "397", "begin_48": 0, "top": "397_0,397_1,397_2,397_3,397_4,397_5,397_6,397_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "351_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "407_transpose_x", "top": "351_0_transposed", "type": "transpose", "name": "407_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "397_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "407_transpose_y", "top": "397_0_transposed", "type": "transpose", "name": "407_transpose_y" }, { "bottom": "351_0_transposed,397_0_transposed", "weights": {}, "debug_info": "407_batch_matmul", "top": "407_pre_transpose", "type": "batch_matmul", "name": "407_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "407_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "407", "top": "407", "type": "transpose", "name": "407" }, { "axis_h": 2, "axis_w": 0, "bottom": "351_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "409_transpose_x", "top": "351_1_transposed", "type": "transpose", "name": "409_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "397_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "409_transpose_y", "top": "397_1_transposed", "type": "transpose", "name": "409_transpose_y" }, { "bottom": "351_1_transposed,397_1_transposed", "weights": {}, "debug_info": "409_batch_matmul", "top": "409_pre_transpose", "type": "batch_matmul", "name": "409_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "409_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "409", "top": "409", "type": "transpose", "name": "409" }, { "axis_h": 2, "axis_w": 0, "bottom": "351_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "411_transpose_x", "top": "351_2_transposed", "type": "transpose", "name": "411_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "397_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "411_transpose_y", "top": "397_2_transposed", "type": "transpose", "name": "411_transpose_y" }, { "bottom": "351_2_transposed,397_2_transposed", "weights": {}, "debug_info": "411_batch_matmul", "top": "411_pre_transpose", "type": "batch_matmul", "name": "411_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "411_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "411", "top": "411", "type": "transpose", "name": "411" }, { "axis_h": 2, "axis_w": 0, "bottom": "351_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "413_transpose_x", "top": "351_3_transposed", "type": "transpose", "name": "413_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "397_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "413_transpose_y", "top": "397_3_transposed", "type": "transpose", "name": "413_transpose_y" }, { "bottom": "351_3_transposed,397_3_transposed", "weights": {}, "debug_info": "413_batch_matmul", "top": "413_pre_transpose", "type": "batch_matmul", "name": "413_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "413_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "413", "top": "413", "type": "transpose", "name": "413" }, { "axis_h": 2, "axis_w": 0, "bottom": "351_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "415_transpose_x", "top": "351_4_transposed", "type": "transpose", "name": "415_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "397_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "415_transpose_y", "top": "397_4_transposed", "type": "transpose", "name": "415_transpose_y" }, { "bottom": "351_4_transposed,397_4_transposed", "weights": {}, "debug_info": "415_batch_matmul", "top": "415_pre_transpose", "type": "batch_matmul", "name": "415_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "415_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "415", "top": "415", "type": "transpose", "name": "415" }, { "axis_h": 2, "axis_w": 0, "bottom": "351_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "417_transpose_x", "top": "351_5_transposed", "type": "transpose", "name": "417_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "397_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "417_transpose_y", "top": "397_5_transposed", "type": "transpose", "name": "417_transpose_y" }, { "bottom": "351_5_transposed,397_5_transposed", "weights": {}, "debug_info": "417_batch_matmul", "top": "417_pre_transpose", "type": "batch_matmul", "name": "417_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "417_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "417", "top": "417", "type": "transpose", "name": "417" }, { "axis_h": 2, "axis_w": 0, "bottom": "351_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "419_transpose_x", "top": "351_6_transposed", "type": "transpose", "name": "419_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "397_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "419_transpose_y", "top": "397_6_transposed", "type": "transpose", "name": "419_transpose_y" }, { "bottom": "351_6_transposed,397_6_transposed", "weights": {}, "debug_info": "419_batch_matmul", "top": "419_pre_transpose", "type": "batch_matmul", "name": "419_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "419_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "419", "top": "419", "type": "transpose", "name": "419" }, { "axis_h": 2, "axis_w": 0, "bottom": "351_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "421_transpose_x", "top": "351_7_transposed", "type": "transpose", "name": "421_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "397_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "421_transpose_y", "top": "397_7_transposed", "type": "transpose", "name": "421_transpose_y" }, { "bottom": "351_7_transposed,397_7_transposed", "weights": {}, "debug_info": "421_batch_matmul", "top": "421_pre_transpose", "type": "batch_matmul", "name": "421_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "421_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "421", "top": "421", "type": "transpose", "name": "421" }, { "weights": {}, "debug_info": "attn.12", "top": "attn.12", "type": "concat", "name": "attn.12", "bottom": "407,409,411,413,415,417,419,421" }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "attn.14", "K": 512, "blob_biases": 83, "name": "attn.14", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "attn.12", "weights": { "per_ch_qscale": 85, "W_S8": 81, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "inputs.2,attn.14", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs0.4", "top": "inputs0.4", "type": "elementwise", "name": "inputs0.4", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs0.4", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.9", "use_version": 1, "top": "channels_mean.9", "type": "reduce", "name": "channels_mean.9" }, { "alpha": -1, "bottom": "channels_mean.9", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.9", "top": "_neg_y_zero_mean.9", "type": "activation", "name": "_neg_y_zero_mean.9", "beta": 0 }, { "bottom": "inputs0.4,_neg_y_zero_mean.9", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.9", "top": "zero_mean.9", "type": "elementwise", "name": "zero_mean.9", "beta": 0 }, { "bottom": "zero_mean.9,zero_mean.9", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.9", "top": "zero_mean_sq.9", "type": "elementwise", "name": "zero_mean_sq.9", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.9", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "442", "use_version": 1, "top": "442", "type": "reduce", "name": "442" }, { "bottom": "442", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "444", "top": "444", "type": "elementwise", "name": "444", "beta": 0 }, { "bottom": "444", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.9", "top": "denom.9", "type": "elementwise", "name": "denom.9", "beta": 0 }, { "bottom": "zero_mean.9,denom.9", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.9", "top": "out.9", "type": "elementwise", "name": "out.9", "beta": 0 }, { "top": "448", "w": 1, "h": 1, "name": "448", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "448", "n": 1, "weights": {}, "constant_blob": 303 }, { "bottom": "out.9,448", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "449", "top": "449", "type": "elementwise", "name": "449", "beta": 0 }, { "top": "451", "w": 1, "h": 1, "name": "451", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "451", "n": 1, "weights": {}, "constant_blob": 89 }, { "bottom": "449,451", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.11", "top": "input.11", "type": "elementwise", "name": "input.11", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "x.4", "K": 512, "blob_biases": 93, "name": "x.4", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 2048, "bottom": "input.11", "weights": { "per_ch_qscale": 95, "W_S8": 91, "per_ch_qbias": 299 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "x.4", "alpha": 1.7020000219345093, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "463", "top": "463", "type": "elementwise", "name": "463", "beta": 0 }, { "bottom": "463", "weights": {}, "mode": 3, "debug_info": "464", "top": "464", "type": "activation", "name": "464" }, { "bottom": "x.4,464", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.13", "top": "input.13", "type": "elementwise", "name": "input.13", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "input0.7", "K": 2048, "blob_biases": 99, "name": "input0.7", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "input.13", "weights": { "per_ch_qscale": 101, "W_S8": 97, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "input0.7,inputs0.4", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "474", "top": "474", "type": "elementwise", "name": "474", "beta": 0 }, { "bottom": "inputs.2", "alpha": 0, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "476", "top": "476", "type": "elementwise", "name": "476", "beta": 0 }, { "bottom": "476,474", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs0.2", "top": "inputs0.2", "type": "elementwise", "name": "inputs0.2", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs0.2", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.11", "use_version": 1, "top": "channels_mean.11", "type": "reduce", "name": "channels_mean.11" }, { "alpha": -1, "bottom": "channels_mean.11", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.11", "top": "_neg_y_zero_mean.11", "type": "activation", "name": "_neg_y_zero_mean.11", "beta": 0 }, { "bottom": "inputs0.2,_neg_y_zero_mean.11", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.11", "top": "zero_mean.11", "type": "elementwise", "name": "zero_mean.11", "beta": 0 }, { "bottom": "zero_mean.11,zero_mean.11", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.11", "top": "zero_mean_sq.11", "type": "elementwise", "name": "zero_mean_sq.11", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.11", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "491", "use_version": 1, "top": "491", "type": "reduce", "name": "491" }, { "bottom": "491", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "493", "top": "493", "type": "elementwise", "name": "493", "beta": 0 }, { "bottom": "493", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.11", "top": "denom.11", "type": "elementwise", "name": "denom.11", "beta": 0 }, { "bottom": "zero_mean.11,denom.11", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.11", "top": "out.11", "type": "elementwise", "name": "out.11", "beta": 0 }, { "top": "497", "w": 1, "h": 1, "name": "497", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "497", "n": 1, "weights": {}, "constant_blob": 305 }, { "bottom": "out.11,497", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "498", "top": "498", "type": "elementwise", "name": "498", "beta": 0 }, { "top": "500", "w": 1, "h": 1, "name": "500", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "500", "n": 1, "weights": {}, "constant_blob": 105 }, { "bottom": "498,500", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "k.11", "top": "k.11", "type": "elementwise", "name": "k.11", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "515", "K": 512, "blob_biases": 109, "name": "515", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.11", "weights": { "per_ch_qscale": 111, "W_S8": 107, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "k.13", "K": 512, "blob_biases": 115, "name": "k.13", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.11", "weights": { "per_ch_qscale": 117, "W_S8": 113, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "529", "K": 512, "blob_biases": 121, "name": "529", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.11", "weights": { "per_ch_qscale": 123, "W_S8": 119, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "515", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "530", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "530", "begin_48": 0, "top": "530_0,530_1,530_2,530_3,530_4,530_5,530_6,530_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 1, "axis_w": 2, "bottom": "k.13", "axis_k": 0, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "transpose_5", "top": "transpose_5", "type": "transpose", "name": "transpose_5" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 3, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "transpose_5", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "540", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "540", "begin_48": 0, "top": "540_0,540_1,540_2,540_3,540_4,540_5,540_6,540_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "529", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "549", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "549", "begin_48": 0, "top": "549_0,549_1,549_2,549_3,549_4,549_5,549_6,549_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "540_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "559_transpose_x", "top": "540_0_transposed", "type": "transpose", "name": "559_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "530_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "559_transpose_y", "top": "530_0_transposed", "type": "transpose", "name": "559_transpose_y" }, { "bottom": "540_0_transposed,530_0_transposed", "weights": {}, "debug_info": "559_batch_matmul", "top": "559_pre_transpose", "type": "batch_matmul", "name": "559_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "559_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "559", "top": "559", "type": "transpose", "name": "559" }, { "bottom": "559", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "561", "top": "561", "type": "elementwise", "name": "561", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "540_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "563_transpose_x", "top": "540_1_transposed", "type": "transpose", "name": "563_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "530_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "563_transpose_y", "top": "530_1_transposed", "type": "transpose", "name": "563_transpose_y" }, { "bottom": "540_1_transposed,530_1_transposed", "weights": {}, "debug_info": "563_batch_matmul", "top": "563_pre_transpose", "type": "batch_matmul", "name": "563_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "563_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "563", "top": "563", "type": "transpose", "name": "563" }, { "bottom": "563", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "565", "top": "565", "type": "elementwise", "name": "565", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "540_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "567_transpose_x", "top": "540_2_transposed", "type": "transpose", "name": "567_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "530_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "567_transpose_y", "top": "530_2_transposed", "type": "transpose", "name": "567_transpose_y" }, { "bottom": "540_2_transposed,530_2_transposed", "weights": {}, "debug_info": "567_batch_matmul", "top": "567_pre_transpose", "type": "batch_matmul", "name": "567_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "567_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "567", "top": "567", "type": "transpose", "name": "567" }, { "bottom": "567", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "569", "top": "569", "type": "elementwise", "name": "569", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "540_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "571_transpose_x", "top": "540_3_transposed", "type": "transpose", "name": "571_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "530_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "571_transpose_y", "top": "530_3_transposed", "type": "transpose", "name": "571_transpose_y" }, { "bottom": "540_3_transposed,530_3_transposed", "weights": {}, "debug_info": "571_batch_matmul", "top": "571_pre_transpose", "type": "batch_matmul", "name": "571_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "571_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "571", "top": "571", "type": "transpose", "name": "571" }, { "bottom": "571", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "573", "top": "573", "type": "elementwise", "name": "573", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "540_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "575_transpose_x", "top": "540_4_transposed", "type": "transpose", "name": "575_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "530_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "575_transpose_y", "top": "530_4_transposed", "type": "transpose", "name": "575_transpose_y" }, { "bottom": "540_4_transposed,530_4_transposed", "weights": {}, "debug_info": "575_batch_matmul", "top": "575_pre_transpose", "type": "batch_matmul", "name": "575_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "575_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "575", "top": "575", "type": "transpose", "name": "575" }, { "bottom": "575", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "577", "top": "577", "type": "elementwise", "name": "577", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "540_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "579_transpose_x", "top": "540_5_transposed", "type": "transpose", "name": "579_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "530_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "579_transpose_y", "top": "530_5_transposed", "type": "transpose", "name": "579_transpose_y" }, { "bottom": "540_5_transposed,530_5_transposed", "weights": {}, "debug_info": "579_batch_matmul", "top": "579_pre_transpose", "type": "batch_matmul", "name": "579_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "579_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "579", "top": "579", "type": "transpose", "name": "579" }, { "bottom": "579", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "581", "top": "581", "type": "elementwise", "name": "581", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "540_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "583_transpose_x", "top": "540_6_transposed", "type": "transpose", "name": "583_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "530_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "583_transpose_y", "top": "530_6_transposed", "type": "transpose", "name": "583_transpose_y" }, { "bottom": "540_6_transposed,530_6_transposed", "weights": {}, "debug_info": "583_batch_matmul", "top": "583_pre_transpose", "type": "batch_matmul", "name": "583_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "583_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "583", "top": "583", "type": "transpose", "name": "583" }, { "bottom": "583", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "585", "top": "585", "type": "elementwise", "name": "585", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "540_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "587_transpose_x", "top": "540_7_transposed", "type": "transpose", "name": "587_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "530_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "587_transpose_y", "top": "530_7_transposed", "type": "transpose", "name": "587_transpose_y" }, { "bottom": "540_7_transposed,530_7_transposed", "weights": {}, "debug_info": "587_batch_matmul", "top": "587_pre_transpose", "type": "batch_matmul", "name": "587_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "587_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "587", "top": "587", "type": "transpose", "name": "587" }, { "bottom": "587", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "589", "top": "589", "type": "elementwise", "name": "589", "beta": 0 }, { "bottom": "561,565,569,573,577,581,585,589", "weights": {}, "nd_mode": true, "axis": 2, "debug_info": "attn_weights.6", "top": "attn_weights.6", "type": "general_concat", "name": "attn_weights.6" }, { "bottom": "attn_weights.6,qk_mask.1", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "attn_weights0.6", "nd_mode": true, "top": "attn_weights0.6", "type": "elementwise", "name": "attn_weights0.6", "beta": 0 }, { "bottom": "attn_weights0.6", "weights": {}, "debug_info": "input.15", "top": "input.15", "C": 2, "type": "softmax", "name": "input.15" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "input.15", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "595", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "595", "begin_48": 0, "top": "595_0,595_1,595_2,595_3,595_4,595_5,595_6,595_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "549_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "605_transpose_x", "top": "549_0_transposed", "type": "transpose", "name": "605_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "595_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "605_transpose_y", "top": "595_0_transposed", "type": "transpose", "name": "605_transpose_y" }, { "bottom": "549_0_transposed,595_0_transposed", "weights": {}, "debug_info": "605_batch_matmul", "top": "605_pre_transpose", "type": "batch_matmul", "name": "605_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "605_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "605", "top": "605", "type": "transpose", "name": "605" }, { "axis_h": 2, "axis_w": 0, "bottom": "549_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "607_transpose_x", "top": "549_1_transposed", "type": "transpose", "name": "607_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "595_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "607_transpose_y", "top": "595_1_transposed", "type": "transpose", "name": "607_transpose_y" }, { "bottom": "549_1_transposed,595_1_transposed", "weights": {}, "debug_info": "607_batch_matmul", "top": "607_pre_transpose", "type": "batch_matmul", "name": "607_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "607_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "607", "top": "607", "type": "transpose", "name": "607" }, { "axis_h": 2, "axis_w": 0, "bottom": "549_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "609_transpose_x", "top": "549_2_transposed", "type": "transpose", "name": "609_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "595_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "609_transpose_y", "top": "595_2_transposed", "type": "transpose", "name": "609_transpose_y" }, { "bottom": "549_2_transposed,595_2_transposed", "weights": {}, "debug_info": "609_batch_matmul", "top": "609_pre_transpose", "type": "batch_matmul", "name": "609_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "609_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "609", "top": "609", "type": "transpose", "name": "609" }, { "axis_h": 2, "axis_w": 0, "bottom": "549_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "611_transpose_x", "top": "549_3_transposed", "type": "transpose", "name": "611_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "595_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "611_transpose_y", "top": "595_3_transposed", "type": "transpose", "name": "611_transpose_y" }, { "bottom": "549_3_transposed,595_3_transposed", "weights": {}, "debug_info": "611_batch_matmul", "top": "611_pre_transpose", "type": "batch_matmul", "name": "611_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "611_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "611", "top": "611", "type": "transpose", "name": "611" }, { "axis_h": 2, "axis_w": 0, "bottom": "549_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "613_transpose_x", "top": "549_4_transposed", "type": "transpose", "name": "613_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "595_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "613_transpose_y", "top": "595_4_transposed", "type": "transpose", "name": "613_transpose_y" }, { "bottom": "549_4_transposed,595_4_transposed", "weights": {}, "debug_info": "613_batch_matmul", "top": "613_pre_transpose", "type": "batch_matmul", "name": "613_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "613_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "613", "top": "613", "type": "transpose", "name": "613" }, { "axis_h": 2, "axis_w": 0, "bottom": "549_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "615_transpose_x", "top": "549_5_transposed", "type": "transpose", "name": "615_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "595_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "615_transpose_y", "top": "595_5_transposed", "type": "transpose", "name": "615_transpose_y" }, { "bottom": "549_5_transposed,595_5_transposed", "weights": {}, "debug_info": "615_batch_matmul", "top": "615_pre_transpose", "type": "batch_matmul", "name": "615_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "615_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "615", "top": "615", "type": "transpose", "name": "615" }, { "axis_h": 2, "axis_w": 0, "bottom": "549_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "617_transpose_x", "top": "549_6_transposed", "type": "transpose", "name": "617_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "595_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "617_transpose_y", "top": "595_6_transposed", "type": "transpose", "name": "617_transpose_y" }, { "bottom": "549_6_transposed,595_6_transposed", "weights": {}, "debug_info": "617_batch_matmul", "top": "617_pre_transpose", "type": "batch_matmul", "name": "617_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "617_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "617", "top": "617", "type": "transpose", "name": "617" }, { "axis_h": 2, "axis_w": 0, "bottom": "549_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "619_transpose_x", "top": "549_7_transposed", "type": "transpose", "name": "619_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "595_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "619_transpose_y", "top": "595_7_transposed", "type": "transpose", "name": "619_transpose_y" }, { "bottom": "549_7_transposed,595_7_transposed", "weights": {}, "debug_info": "619_batch_matmul", "top": "619_pre_transpose", "type": "batch_matmul", "name": "619_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "619_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "619", "top": "619", "type": "transpose", "name": "619" }, { "weights": {}, "debug_info": "attn.18", "top": "attn.18", "type": "concat", "name": "attn.18", "bottom": "605,607,609,611,613,615,617,619" }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "attn.20", "K": 512, "blob_biases": 127, "name": "attn.20", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "attn.18", "weights": { "per_ch_qscale": 129, "W_S8": 125, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "inputs0.2,attn.20", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs0.6", "top": "inputs0.6", "type": "elementwise", "name": "inputs0.6", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs0.6", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.13", "use_version": 1, "top": "channels_mean.13", "type": "reduce", "name": "channels_mean.13" }, { "alpha": -1, "bottom": "channels_mean.13", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.13", "top": "_neg_y_zero_mean.13", "type": "activation", "name": "_neg_y_zero_mean.13", "beta": 0 }, { "bottom": "inputs0.6,_neg_y_zero_mean.13", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.13", "top": "zero_mean.13", "type": "elementwise", "name": "zero_mean.13", "beta": 0 }, { "bottom": "zero_mean.13,zero_mean.13", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.13", "top": "zero_mean_sq.13", "type": "elementwise", "name": "zero_mean_sq.13", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.13", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "640", "use_version": 1, "top": "640", "type": "reduce", "name": "640" }, { "bottom": "640", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "642", "top": "642", "type": "elementwise", "name": "642", "beta": 0 }, { "bottom": "642", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.13", "top": "denom.13", "type": "elementwise", "name": "denom.13", "beta": 0 }, { "bottom": "zero_mean.13,denom.13", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.13", "top": "out.13", "type": "elementwise", "name": "out.13", "beta": 0 }, { "top": "646", "w": 1, "h": 1, "name": "646", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "646", "n": 1, "weights": {}, "constant_blob": 307 }, { "bottom": "out.13,646", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "647", "top": "647", "type": "elementwise", "name": "647", "beta": 0 }, { "top": "649", "w": 1, "h": 1, "name": "649", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "649", "n": 1, "weights": {}, "constant_blob": 133 }, { "bottom": "647,649", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.17", "top": "input.17", "type": "elementwise", "name": "input.17", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "x.6", "K": 512, "blob_biases": 137, "name": "x.6", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 2048, "bottom": "input.17", "weights": { "per_ch_qscale": 139, "W_S8": 135, "per_ch_qbias": 299 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "x.6", "alpha": 1.7020000219345093, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "661", "top": "661", "type": "elementwise", "name": "661", "beta": 0 }, { "bottom": "661", "weights": {}, "mode": 3, "debug_info": "662", "top": "662", "type": "activation", "name": "662" }, { "bottom": "x.6,662", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.19", "top": "input.19", "type": "elementwise", "name": "input.19", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "input0.11", "K": 2048, "blob_biases": 143, "name": "input0.11", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "input.19", "weights": { "per_ch_qscale": 145, "W_S8": 141, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "input0.11,inputs0.6", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "672", "top": "672", "type": "elementwise", "name": "672", "beta": 0 }, { "bottom": "inputs0.2", "alpha": 0, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "674", "top": "674", "type": "elementwise", "name": "674", "beta": 0 }, { "bottom": "674,672", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs1.1", "top": "inputs1.1", "type": "elementwise", "name": "inputs1.1", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs1.1", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.15", "use_version": 1, "top": "channels_mean.15", "type": "reduce", "name": "channels_mean.15" }, { "alpha": -1, "bottom": "channels_mean.15", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.15", "top": "_neg_y_zero_mean.15", "type": "activation", "name": "_neg_y_zero_mean.15", "beta": 0 }, { "bottom": "inputs1.1,_neg_y_zero_mean.15", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.15", "top": "zero_mean.15", "type": "elementwise", "name": "zero_mean.15", "beta": 0 }, { "bottom": "zero_mean.15,zero_mean.15", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.15", "top": "zero_mean_sq.15", "type": "elementwise", "name": "zero_mean_sq.15", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.15", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "689", "use_version": 1, "top": "689", "type": "reduce", "name": "689" }, { "bottom": "689", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "691", "top": "691", "type": "elementwise", "name": "691", "beta": 0 }, { "bottom": "691", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.15", "top": "denom.15", "type": "elementwise", "name": "denom.15", "beta": 0 }, { "bottom": "zero_mean.15,denom.15", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.15", "top": "out.15", "type": "elementwise", "name": "out.15", "beta": 0 }, { "top": "695", "w": 1, "h": 1, "name": "695", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "695", "n": 1, "weights": {}, "constant_blob": 309 }, { "bottom": "out.15,695", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "696", "top": "696", "type": "elementwise", "name": "696", "beta": 0 }, { "top": "698", "w": 1, "h": 1, "name": "698", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "698", "n": 1, "weights": {}, "constant_blob": 149 }, { "bottom": "696,698", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "k.15", "top": "k.15", "type": "elementwise", "name": "k.15", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "713", "K": 512, "blob_biases": 153, "name": "713", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.15", "weights": { "per_ch_qscale": 155, "W_S8": 151, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "k.17", "K": 512, "blob_biases": 159, "name": "k.17", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.15", "weights": { "per_ch_qscale": 161, "W_S8": 157, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "727", "K": 512, "blob_biases": 165, "name": "727", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.15", "weights": { "per_ch_qscale": 167, "W_S8": 163, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "713", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "728", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "728", "begin_48": 0, "top": "728_0,728_1,728_2,728_3,728_4,728_5,728_6,728_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 1, "axis_w": 2, "bottom": "k.17", "axis_k": 0, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "transpose_4", "top": "transpose_4", "type": "transpose", "name": "transpose_4" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 3, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "transpose_4", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "738", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "738", "begin_48": 0, "top": "738_0,738_1,738_2,738_3,738_4,738_5,738_6,738_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "727", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "747", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "747", "begin_48": 0, "top": "747_0,747_1,747_2,747_3,747_4,747_5,747_6,747_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "738_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "757_transpose_x", "top": "738_0_transposed", "type": "transpose", "name": "757_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "728_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "757_transpose_y", "top": "728_0_transposed", "type": "transpose", "name": "757_transpose_y" }, { "bottom": "738_0_transposed,728_0_transposed", "weights": {}, "debug_info": "757_batch_matmul", "top": "757_pre_transpose", "type": "batch_matmul", "name": "757_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "757_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "757", "top": "757", "type": "transpose", "name": "757" }, { "bottom": "757", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "759", "top": "759", "type": "elementwise", "name": "759", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "738_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "761_transpose_x", "top": "738_1_transposed", "type": "transpose", "name": "761_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "728_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "761_transpose_y", "top": "728_1_transposed", "type": "transpose", "name": "761_transpose_y" }, { "bottom": "738_1_transposed,728_1_transposed", "weights": {}, "debug_info": "761_batch_matmul", "top": "761_pre_transpose", "type": "batch_matmul", "name": "761_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "761_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "761", "top": "761", "type": "transpose", "name": "761" }, { "bottom": "761", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "763", "top": "763", "type": "elementwise", "name": "763", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "738_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "765_transpose_x", "top": "738_2_transposed", "type": "transpose", "name": "765_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "728_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "765_transpose_y", "top": "728_2_transposed", "type": "transpose", "name": "765_transpose_y" }, { "bottom": "738_2_transposed,728_2_transposed", "weights": {}, "debug_info": "765_batch_matmul", "top": "765_pre_transpose", "type": "batch_matmul", "name": "765_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "765_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "765", "top": "765", "type": "transpose", "name": "765" }, { "bottom": "765", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "767", "top": "767", "type": "elementwise", "name": "767", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "738_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "769_transpose_x", "top": "738_3_transposed", "type": "transpose", "name": "769_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "728_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "769_transpose_y", "top": "728_3_transposed", "type": "transpose", "name": "769_transpose_y" }, { "bottom": "738_3_transposed,728_3_transposed", "weights": {}, "debug_info": "769_batch_matmul", "top": "769_pre_transpose", "type": "batch_matmul", "name": "769_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "769_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "769", "top": "769", "type": "transpose", "name": "769" }, { "bottom": "769", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "771", "top": "771", "type": "elementwise", "name": "771", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "738_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "773_transpose_x", "top": "738_4_transposed", "type": "transpose", "name": "773_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "728_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "773_transpose_y", "top": "728_4_transposed", "type": "transpose", "name": "773_transpose_y" }, { "bottom": "738_4_transposed,728_4_transposed", "weights": {}, "debug_info": "773_batch_matmul", "top": "773_pre_transpose", "type": "batch_matmul", "name": "773_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "773_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "773", "top": "773", "type": "transpose", "name": "773" }, { "bottom": "773", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "775", "top": "775", "type": "elementwise", "name": "775", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "738_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "777_transpose_x", "top": "738_5_transposed", "type": "transpose", "name": "777_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "728_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "777_transpose_y", "top": "728_5_transposed", "type": "transpose", "name": "777_transpose_y" }, { "bottom": "738_5_transposed,728_5_transposed", "weights": {}, "debug_info": "777_batch_matmul", "top": "777_pre_transpose", "type": "batch_matmul", "name": "777_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "777_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "777", "top": "777", "type": "transpose", "name": "777" }, { "bottom": "777", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "779", "top": "779", "type": "elementwise", "name": "779", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "738_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "781_transpose_x", "top": "738_6_transposed", "type": "transpose", "name": "781_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "728_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "781_transpose_y", "top": "728_6_transposed", "type": "transpose", "name": "781_transpose_y" }, { "bottom": "738_6_transposed,728_6_transposed", "weights": {}, "debug_info": "781_batch_matmul", "top": "781_pre_transpose", "type": "batch_matmul", "name": "781_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "781_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "781", "top": "781", "type": "transpose", "name": "781" }, { "bottom": "781", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "783", "top": "783", "type": "elementwise", "name": "783", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "738_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "785_transpose_x", "top": "738_7_transposed", "type": "transpose", "name": "785_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "728_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "785_transpose_y", "top": "728_7_transposed", "type": "transpose", "name": "785_transpose_y" }, { "bottom": "738_7_transposed,728_7_transposed", "weights": {}, "debug_info": "785_batch_matmul", "top": "785_pre_transpose", "type": "batch_matmul", "name": "785_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "785_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "785", "top": "785", "type": "transpose", "name": "785" }, { "bottom": "785", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "787", "top": "787", "type": "elementwise", "name": "787", "beta": 0 }, { "bottom": "759,763,767,771,775,779,783,787", "weights": {}, "nd_mode": true, "axis": 2, "debug_info": "attn_weights.8", "top": "attn_weights.8", "type": "general_concat", "name": "attn_weights.8" }, { "bottom": "attn_weights.8,qk_mask.1", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "attn_weights0.8", "nd_mode": true, "top": "attn_weights0.8", "type": "elementwise", "name": "attn_weights0.8", "beta": 0 }, { "bottom": "attn_weights0.8", "weights": {}, "debug_info": "input.21", "top": "input.21", "C": 2, "type": "softmax", "name": "input.21" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "input.21", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "793", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "793", "begin_48": 0, "top": "793_0,793_1,793_2,793_3,793_4,793_5,793_6,793_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "747_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "803_transpose_x", "top": "747_0_transposed", "type": "transpose", "name": "803_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "793_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "803_transpose_y", "top": "793_0_transposed", "type": "transpose", "name": "803_transpose_y" }, { "bottom": "747_0_transposed,793_0_transposed", "weights": {}, "debug_info": "803_batch_matmul", "top": "803_pre_transpose", "type": "batch_matmul", "name": "803_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "803_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "803", "top": "803", "type": "transpose", "name": "803" }, { "axis_h": 2, "axis_w": 0, "bottom": "747_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "805_transpose_x", "top": "747_1_transposed", "type": "transpose", "name": "805_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "793_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "805_transpose_y", "top": "793_1_transposed", "type": "transpose", "name": "805_transpose_y" }, { "bottom": "747_1_transposed,793_1_transposed", "weights": {}, "debug_info": "805_batch_matmul", "top": "805_pre_transpose", "type": "batch_matmul", "name": "805_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "805_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "805", "top": "805", "type": "transpose", "name": "805" }, { "axis_h": 2, "axis_w": 0, "bottom": "747_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "807_transpose_x", "top": "747_2_transposed", "type": "transpose", "name": "807_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "793_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "807_transpose_y", "top": "793_2_transposed", "type": "transpose", "name": "807_transpose_y" }, { "bottom": "747_2_transposed,793_2_transposed", "weights": {}, "debug_info": "807_batch_matmul", "top": "807_pre_transpose", "type": "batch_matmul", "name": "807_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "807_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "807", "top": "807", "type": "transpose", "name": "807" }, { "axis_h": 2, "axis_w": 0, "bottom": "747_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "809_transpose_x", "top": "747_3_transposed", "type": "transpose", "name": "809_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "793_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "809_transpose_y", "top": "793_3_transposed", "type": "transpose", "name": "809_transpose_y" }, { "bottom": "747_3_transposed,793_3_transposed", "weights": {}, "debug_info": "809_batch_matmul", "top": "809_pre_transpose", "type": "batch_matmul", "name": "809_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "809_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "809", "top": "809", "type": "transpose", "name": "809" }, { "axis_h": 2, "axis_w": 0, "bottom": "747_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "811_transpose_x", "top": "747_4_transposed", "type": "transpose", "name": "811_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "793_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "811_transpose_y", "top": "793_4_transposed", "type": "transpose", "name": "811_transpose_y" }, { "bottom": "747_4_transposed,793_4_transposed", "weights": {}, "debug_info": "811_batch_matmul", "top": "811_pre_transpose", "type": "batch_matmul", "name": "811_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "811_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "811", "top": "811", "type": "transpose", "name": "811" }, { "axis_h": 2, "axis_w": 0, "bottom": "747_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "813_transpose_x", "top": "747_5_transposed", "type": "transpose", "name": "813_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "793_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "813_transpose_y", "top": "793_5_transposed", "type": "transpose", "name": "813_transpose_y" }, { "bottom": "747_5_transposed,793_5_transposed", "weights": {}, "debug_info": "813_batch_matmul", "top": "813_pre_transpose", "type": "batch_matmul", "name": "813_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "813_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "813", "top": "813", "type": "transpose", "name": "813" }, { "axis_h": 2, "axis_w": 0, "bottom": "747_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "815_transpose_x", "top": "747_6_transposed", "type": "transpose", "name": "815_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "793_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "815_transpose_y", "top": "793_6_transposed", "type": "transpose", "name": "815_transpose_y" }, { "bottom": "747_6_transposed,793_6_transposed", "weights": {}, "debug_info": "815_batch_matmul", "top": "815_pre_transpose", "type": "batch_matmul", "name": "815_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "815_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "815", "top": "815", "type": "transpose", "name": "815" }, { "axis_h": 2, "axis_w": 0, "bottom": "747_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "817_transpose_x", "top": "747_7_transposed", "type": "transpose", "name": "817_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "793_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "817_transpose_y", "top": "793_7_transposed", "type": "transpose", "name": "817_transpose_y" }, { "bottom": "747_7_transposed,793_7_transposed", "weights": {}, "debug_info": "817_batch_matmul", "top": "817_pre_transpose", "type": "batch_matmul", "name": "817_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "817_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "817", "top": "817", "type": "transpose", "name": "817" }, { "weights": {}, "debug_info": "attn.24", "top": "attn.24", "type": "concat", "name": "attn.24", "bottom": "803,805,807,809,811,813,815,817" }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "attn.26", "K": 512, "blob_biases": 171, "name": "attn.26", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "attn.24", "weights": { "per_ch_qscale": 173, "W_S8": 169, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "inputs1.1,attn.26", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs0.8", "top": "inputs0.8", "type": "elementwise", "name": "inputs0.8", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs0.8", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.17", "use_version": 1, "top": "channels_mean.17", "type": "reduce", "name": "channels_mean.17" }, { "alpha": -1, "bottom": "channels_mean.17", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.17", "top": "_neg_y_zero_mean.17", "type": "activation", "name": "_neg_y_zero_mean.17", "beta": 0 }, { "bottom": "inputs0.8,_neg_y_zero_mean.17", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.17", "top": "zero_mean.17", "type": "elementwise", "name": "zero_mean.17", "beta": 0 }, { "bottom": "zero_mean.17,zero_mean.17", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.17", "top": "zero_mean_sq.17", "type": "elementwise", "name": "zero_mean_sq.17", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.17", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "838", "use_version": 1, "top": "838", "type": "reduce", "name": "838" }, { "bottom": "838", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "840", "top": "840", "type": "elementwise", "name": "840", "beta": 0 }, { "bottom": "840", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.17", "top": "denom.17", "type": "elementwise", "name": "denom.17", "beta": 0 }, { "bottom": "zero_mean.17,denom.17", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.17", "top": "out.17", "type": "elementwise", "name": "out.17", "beta": 0 }, { "top": "844", "w": 1, "h": 1, "name": "844", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "844", "n": 1, "weights": {}, "constant_blob": 311 }, { "bottom": "out.17,844", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "845", "top": "845", "type": "elementwise", "name": "845", "beta": 0 }, { "top": "847", "w": 1, "h": 1, "name": "847", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "847", "n": 1, "weights": {}, "constant_blob": 177 }, { "bottom": "845,847", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.23", "top": "input.23", "type": "elementwise", "name": "input.23", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "x.8", "K": 512, "blob_biases": 181, "name": "x.8", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 2048, "bottom": "input.23", "weights": { "per_ch_qscale": 183, "W_S8": 179, "per_ch_qbias": 299 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "x.8", "alpha": 1.7020000219345093, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "859", "top": "859", "type": "elementwise", "name": "859", "beta": 0 }, { "bottom": "859", "weights": {}, "mode": 3, "debug_info": "860", "top": "860", "type": "activation", "name": "860" }, { "bottom": "x.8,860", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.25", "top": "input.25", "type": "elementwise", "name": "input.25", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "input0.15", "K": 2048, "blob_biases": 187, "name": "input0.15", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "input.25", "weights": { "per_ch_qscale": 189, "W_S8": 185, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "input0.15,inputs0.8", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "870", "top": "870", "type": "elementwise", "name": "870", "beta": 0 }, { "bottom": "inputs1.1", "alpha": 0, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "872", "top": "872", "type": "elementwise", "name": "872", "beta": 0 }, { "bottom": "872,870", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs2.1", "top": "inputs2.1", "type": "elementwise", "name": "inputs2.1", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs2.1", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.19", "use_version": 1, "top": "channels_mean.19", "type": "reduce", "name": "channels_mean.19" }, { "alpha": -1, "bottom": "channels_mean.19", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.19", "top": "_neg_y_zero_mean.19", "type": "activation", "name": "_neg_y_zero_mean.19", "beta": 0 }, { "bottom": "inputs2.1,_neg_y_zero_mean.19", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.19", "top": "zero_mean.19", "type": "elementwise", "name": "zero_mean.19", "beta": 0 }, { "bottom": "zero_mean.19,zero_mean.19", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.19", "top": "zero_mean_sq.19", "type": "elementwise", "name": "zero_mean_sq.19", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.19", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "887", "use_version": 1, "top": "887", "type": "reduce", "name": "887" }, { "bottom": "887", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "889", "top": "889", "type": "elementwise", "name": "889", "beta": 0 }, { "bottom": "889", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.19", "top": "denom.19", "type": "elementwise", "name": "denom.19", "beta": 0 }, { "bottom": "zero_mean.19,denom.19", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.19", "top": "out.19", "type": "elementwise", "name": "out.19", "beta": 0 }, { "top": "893", "w": 1, "h": 1, "name": "893", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "893", "n": 1, "weights": {}, "constant_blob": 313 }, { "bottom": "out.19,893", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "894", "top": "894", "type": "elementwise", "name": "894", "beta": 0 }, { "top": "896", "w": 1, "h": 1, "name": "896", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "896", "n": 1, "weights": {}, "constant_blob": 193 }, { "bottom": "894,896", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "k.19", "top": "k.19", "type": "elementwise", "name": "k.19", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "911", "K": 512, "blob_biases": 197, "name": "911", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.19", "weights": { "per_ch_qscale": 199, "W_S8": 195, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "k.21", "K": 512, "blob_biases": 203, "name": "k.21", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.19", "weights": { "per_ch_qscale": 205, "W_S8": 201, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "925", "K": 512, "blob_biases": 209, "name": "925", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.19", "weights": { "per_ch_qscale": 211, "W_S8": 207, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "911", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "926", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "926", "begin_48": 0, "top": "926_0,926_1,926_2,926_3,926_4,926_5,926_6,926_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 1, "axis_w": 2, "bottom": "k.21", "axis_k": 0, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "transpose_3", "top": "transpose_3", "type": "transpose", "name": "transpose_3" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 3, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "transpose_3", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "936", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "936", "begin_48": 0, "top": "936_0,936_1,936_2,936_3,936_4,936_5,936_6,936_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "925", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "945", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "945", "begin_48": 0, "top": "945_0,945_1,945_2,945_3,945_4,945_5,945_6,945_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "936_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "955_transpose_x", "top": "936_0_transposed", "type": "transpose", "name": "955_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "926_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "955_transpose_y", "top": "926_0_transposed", "type": "transpose", "name": "955_transpose_y" }, { "bottom": "936_0_transposed,926_0_transposed", "weights": {}, "debug_info": "955_batch_matmul", "top": "955_pre_transpose", "type": "batch_matmul", "name": "955_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "955_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "955", "top": "955", "type": "transpose", "name": "955" }, { "bottom": "955", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "957", "top": "957", "type": "elementwise", "name": "957", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "936_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "959_transpose_x", "top": "936_1_transposed", "type": "transpose", "name": "959_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "926_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "959_transpose_y", "top": "926_1_transposed", "type": "transpose", "name": "959_transpose_y" }, { "bottom": "936_1_transposed,926_1_transposed", "weights": {}, "debug_info": "959_batch_matmul", "top": "959_pre_transpose", "type": "batch_matmul", "name": "959_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "959_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "959", "top": "959", "type": "transpose", "name": "959" }, { "bottom": "959", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "961", "top": "961", "type": "elementwise", "name": "961", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "936_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "963_transpose_x", "top": "936_2_transposed", "type": "transpose", "name": "963_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "926_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "963_transpose_y", "top": "926_2_transposed", "type": "transpose", "name": "963_transpose_y" }, { "bottom": "936_2_transposed,926_2_transposed", "weights": {}, "debug_info": "963_batch_matmul", "top": "963_pre_transpose", "type": "batch_matmul", "name": "963_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "963_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "963", "top": "963", "type": "transpose", "name": "963" }, { "bottom": "963", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "965", "top": "965", "type": "elementwise", "name": "965", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "936_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "967_transpose_x", "top": "936_3_transposed", "type": "transpose", "name": "967_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "926_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "967_transpose_y", "top": "926_3_transposed", "type": "transpose", "name": "967_transpose_y" }, { "bottom": "936_3_transposed,926_3_transposed", "weights": {}, "debug_info": "967_batch_matmul", "top": "967_pre_transpose", "type": "batch_matmul", "name": "967_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "967_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "967", "top": "967", "type": "transpose", "name": "967" }, { "bottom": "967", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "969", "top": "969", "type": "elementwise", "name": "969", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "936_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "971_transpose_x", "top": "936_4_transposed", "type": "transpose", "name": "971_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "926_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "971_transpose_y", "top": "926_4_transposed", "type": "transpose", "name": "971_transpose_y" }, { "bottom": "936_4_transposed,926_4_transposed", "weights": {}, "debug_info": "971_batch_matmul", "top": "971_pre_transpose", "type": "batch_matmul", "name": "971_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "971_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "971", "top": "971", "type": "transpose", "name": "971" }, { "bottom": "971", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "973", "top": "973", "type": "elementwise", "name": "973", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "936_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "975_transpose_x", "top": "936_5_transposed", "type": "transpose", "name": "975_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "926_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "975_transpose_y", "top": "926_5_transposed", "type": "transpose", "name": "975_transpose_y" }, { "bottom": "936_5_transposed,926_5_transposed", "weights": {}, "debug_info": "975_batch_matmul", "top": "975_pre_transpose", "type": "batch_matmul", "name": "975_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "975_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "975", "top": "975", "type": "transpose", "name": "975" }, { "bottom": "975", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "977", "top": "977", "type": "elementwise", "name": "977", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "936_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "979_transpose_x", "top": "936_6_transposed", "type": "transpose", "name": "979_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "926_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "979_transpose_y", "top": "926_6_transposed", "type": "transpose", "name": "979_transpose_y" }, { "bottom": "936_6_transposed,926_6_transposed", "weights": {}, "debug_info": "979_batch_matmul", "top": "979_pre_transpose", "type": "batch_matmul", "name": "979_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "979_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "979", "top": "979", "type": "transpose", "name": "979" }, { "bottom": "979", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "981", "top": "981", "type": "elementwise", "name": "981", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "936_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "983_transpose_x", "top": "936_7_transposed", "type": "transpose", "name": "983_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "926_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "983_transpose_y", "top": "926_7_transposed", "type": "transpose", "name": "983_transpose_y" }, { "bottom": "936_7_transposed,926_7_transposed", "weights": {}, "debug_info": "983_batch_matmul", "top": "983_pre_transpose", "type": "batch_matmul", "name": "983_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "983_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "983", "top": "983", "type": "transpose", "name": "983" }, { "bottom": "983", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "985", "top": "985", "type": "elementwise", "name": "985", "beta": 0 }, { "bottom": "957,961,965,969,973,977,981,985", "weights": {}, "nd_mode": true, "axis": 2, "debug_info": "attn_weights.10", "top": "attn_weights.10", "type": "general_concat", "name": "attn_weights.10" }, { "bottom": "attn_weights.10,qk_mask.1", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "attn_weights0.10", "nd_mode": true, "top": "attn_weights0.10", "type": "elementwise", "name": "attn_weights0.10", "beta": 0 }, { "bottom": "attn_weights0.10", "weights": {}, "debug_info": "input.27", "top": "input.27", "C": 2, "type": "softmax", "name": "input.27" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "input.27", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "991", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "991", "begin_48": 0, "top": "991_0,991_1,991_2,991_3,991_4,991_5,991_6,991_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "945_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1001_transpose_x", "top": "945_0_transposed", "type": "transpose", "name": "1001_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "991_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1001_transpose_y", "top": "991_0_transposed", "type": "transpose", "name": "1001_transpose_y" }, { "bottom": "945_0_transposed,991_0_transposed", "weights": {}, "debug_info": "1001_batch_matmul", "top": "1001_pre_transpose", "type": "batch_matmul", "name": "1001_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1001_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1001", "top": "1001", "type": "transpose", "name": "1001" }, { "axis_h": 2, "axis_w": 0, "bottom": "945_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1003_transpose_x", "top": "945_1_transposed", "type": "transpose", "name": "1003_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "991_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1003_transpose_y", "top": "991_1_transposed", "type": "transpose", "name": "1003_transpose_y" }, { "bottom": "945_1_transposed,991_1_transposed", "weights": {}, "debug_info": "1003_batch_matmul", "top": "1003_pre_transpose", "type": "batch_matmul", "name": "1003_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1003_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1003", "top": "1003", "type": "transpose", "name": "1003" }, { "axis_h": 2, "axis_w": 0, "bottom": "945_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1005_transpose_x", "top": "945_2_transposed", "type": "transpose", "name": "1005_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "991_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1005_transpose_y", "top": "991_2_transposed", "type": "transpose", "name": "1005_transpose_y" }, { "bottom": "945_2_transposed,991_2_transposed", "weights": {}, "debug_info": "1005_batch_matmul", "top": "1005_pre_transpose", "type": "batch_matmul", "name": "1005_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1005_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1005", "top": "1005", "type": "transpose", "name": "1005" }, { "axis_h": 2, "axis_w": 0, "bottom": "945_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1007_transpose_x", "top": "945_3_transposed", "type": "transpose", "name": "1007_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "991_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1007_transpose_y", "top": "991_3_transposed", "type": "transpose", "name": "1007_transpose_y" }, { "bottom": "945_3_transposed,991_3_transposed", "weights": {}, "debug_info": "1007_batch_matmul", "top": "1007_pre_transpose", "type": "batch_matmul", "name": "1007_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1007_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1007", "top": "1007", "type": "transpose", "name": "1007" }, { "axis_h": 2, "axis_w": 0, "bottom": "945_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1009_transpose_x", "top": "945_4_transposed", "type": "transpose", "name": "1009_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "991_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1009_transpose_y", "top": "991_4_transposed", "type": "transpose", "name": "1009_transpose_y" }, { "bottom": "945_4_transposed,991_4_transposed", "weights": {}, "debug_info": "1009_batch_matmul", "top": "1009_pre_transpose", "type": "batch_matmul", "name": "1009_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1009_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1009", "top": "1009", "type": "transpose", "name": "1009" }, { "axis_h": 2, "axis_w": 0, "bottom": "945_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1011_transpose_x", "top": "945_5_transposed", "type": "transpose", "name": "1011_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "991_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1011_transpose_y", "top": "991_5_transposed", "type": "transpose", "name": "1011_transpose_y" }, { "bottom": "945_5_transposed,991_5_transposed", "weights": {}, "debug_info": "1011_batch_matmul", "top": "1011_pre_transpose", "type": "batch_matmul", "name": "1011_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1011_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1011", "top": "1011", "type": "transpose", "name": "1011" }, { "axis_h": 2, "axis_w": 0, "bottom": "945_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1013_transpose_x", "top": "945_6_transposed", "type": "transpose", "name": "1013_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "991_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1013_transpose_y", "top": "991_6_transposed", "type": "transpose", "name": "1013_transpose_y" }, { "bottom": "945_6_transposed,991_6_transposed", "weights": {}, "debug_info": "1013_batch_matmul", "top": "1013_pre_transpose", "type": "batch_matmul", "name": "1013_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1013_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1013", "top": "1013", "type": "transpose", "name": "1013" }, { "axis_h": 2, "axis_w": 0, "bottom": "945_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1015_transpose_x", "top": "945_7_transposed", "type": "transpose", "name": "1015_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "991_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1015_transpose_y", "top": "991_7_transposed", "type": "transpose", "name": "1015_transpose_y" }, { "bottom": "945_7_transposed,991_7_transposed", "weights": {}, "debug_info": "1015_batch_matmul", "top": "1015_pre_transpose", "type": "batch_matmul", "name": "1015_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1015_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1015", "top": "1015", "type": "transpose", "name": "1015" }, { "weights": {}, "debug_info": "attn.30", "top": "attn.30", "type": "concat", "name": "attn.30", "bottom": "1001,1003,1005,1007,1009,1011,1013,1015" }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "attn.32", "K": 512, "blob_biases": 215, "name": "attn.32", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "attn.30", "weights": { "per_ch_qscale": 217, "W_S8": 213, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "inputs2.1,attn.32", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs0.10", "top": "inputs0.10", "type": "elementwise", "name": "inputs0.10", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs0.10", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.21", "use_version": 1, "top": "channels_mean.21", "type": "reduce", "name": "channels_mean.21" }, { "alpha": -1, "bottom": "channels_mean.21", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.21", "top": "_neg_y_zero_mean.21", "type": "activation", "name": "_neg_y_zero_mean.21", "beta": 0 }, { "bottom": "inputs0.10,_neg_y_zero_mean.21", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.21", "top": "zero_mean.21", "type": "elementwise", "name": "zero_mean.21", "beta": 0 }, { "bottom": "zero_mean.21,zero_mean.21", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.21", "top": "zero_mean_sq.21", "type": "elementwise", "name": "zero_mean_sq.21", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.21", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "1036", "use_version": 1, "top": "1036", "type": "reduce", "name": "1036" }, { "bottom": "1036", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1038", "top": "1038", "type": "elementwise", "name": "1038", "beta": 0 }, { "bottom": "1038", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.21", "top": "denom.21", "type": "elementwise", "name": "denom.21", "beta": 0 }, { "bottom": "zero_mean.21,denom.21", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.21", "top": "out.21", "type": "elementwise", "name": "out.21", "beta": 0 }, { "top": "1042", "w": 1, "h": 1, "name": "1042", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "1042", "n": 1, "weights": {}, "constant_blob": 315 }, { "bottom": "out.21,1042", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1043", "top": "1043", "type": "elementwise", "name": "1043", "beta": 0 }, { "top": "1045", "w": 1, "h": 1, "name": "1045", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "1045", "n": 1, "weights": {}, "constant_blob": 221 }, { "bottom": "1043,1045", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.29", "top": "input.29", "type": "elementwise", "name": "input.29", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "x.10", "K": 512, "blob_biases": 225, "name": "x.10", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 2048, "bottom": "input.29", "weights": { "per_ch_qscale": 227, "W_S8": 223, "per_ch_qbias": 299 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "x.10", "alpha": 1.7020000219345093, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1057", "top": "1057", "type": "elementwise", "name": "1057", "beta": 0 }, { "bottom": "1057", "weights": {}, "mode": 3, "debug_info": "1058", "top": "1058", "type": "activation", "name": "1058" }, { "bottom": "x.10,1058", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.31", "top": "input.31", "type": "elementwise", "name": "input.31", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "input0.19", "K": 2048, "blob_biases": 231, "name": "input0.19", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "input.31", "weights": { "per_ch_qscale": 233, "W_S8": 229, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "input0.19,inputs0.10", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1068", "top": "1068", "type": "elementwise", "name": "1068", "beta": 0 }, { "bottom": "inputs2.1", "alpha": 0, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1070", "top": "1070", "type": "elementwise", "name": "1070", "beta": 0 }, { "bottom": "1070,1068", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs3.1", "top": "inputs3.1", "type": "elementwise", "name": "inputs3.1", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs3.1", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.2", "use_version": 1, "top": "channels_mean.2", "type": "reduce", "name": "channels_mean.2" }, { "alpha": -1, "bottom": "channels_mean.2", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.2", "top": "_neg_y_zero_mean.2", "type": "activation", "name": "_neg_y_zero_mean.2", "beta": 0 }, { "bottom": "inputs3.1,_neg_y_zero_mean.2", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.2", "top": "zero_mean.2", "type": "elementwise", "name": "zero_mean.2", "beta": 0 }, { "bottom": "zero_mean.2,zero_mean.2", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.2", "top": "zero_mean_sq.2", "type": "elementwise", "name": "zero_mean_sq.2", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.2", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "1085", "use_version": 1, "top": "1085", "type": "reduce", "name": "1085" }, { "bottom": "1085", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1087", "top": "1087", "type": "elementwise", "name": "1087", "beta": 0 }, { "bottom": "1087", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.2", "top": "denom.2", "type": "elementwise", "name": "denom.2", "beta": 0 }, { "bottom": "zero_mean.2,denom.2", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.2", "top": "out.2", "type": "elementwise", "name": "out.2", "beta": 0 }, { "top": "1091", "w": 1, "h": 1, "name": "1091", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "1091", "n": 1, "weights": {}, "constant_blob": 317 }, { "bottom": "out.2,1091", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1092", "top": "1092", "type": "elementwise", "name": "1092", "beta": 0 }, { "top": "1094", "w": 1, "h": 1, "name": "1094", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "1094", "n": 1, "weights": {}, "constant_blob": 237 }, { "bottom": "1092,1094", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "k.2", "top": "k.2", "type": "elementwise", "name": "k.2", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "1109", "K": 512, "blob_biases": 241, "name": "1109", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.2", "weights": { "per_ch_qscale": 243, "W_S8": 239, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "k.1", "K": 512, "blob_biases": 247, "name": "k.1", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.2", "weights": { "per_ch_qscale": 249, "W_S8": 245, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "1123", "K": 512, "blob_biases": 253, "name": "1123", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "k.2", "weights": { "per_ch_qscale": 255, "W_S8": 251, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "1109", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "1124", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "1124", "begin_48": 0, "top": "1124_0,1124_1,1124_2,1124_3,1124_4,1124_5,1124_6,1124_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 1, "axis_w": 2, "bottom": "k.1", "axis_k": 0, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "transpose_2", "top": "transpose_2", "type": "transpose", "name": "transpose_2" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 3, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "transpose_2", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "1134", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "1134", "begin_48": 0, "top": "1134_0,1134_1,1134_2,1134_3,1134_4,1134_5,1134_6,1134_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 1, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "1123", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "1143", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "1143", "begin_48": 0, "top": "1143_0,1143_1,1143_2,1143_3,1143_4,1143_5,1143_6,1143_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "1134_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1153_transpose_x", "top": "1134_0_transposed", "type": "transpose", "name": "1153_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1124_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1153_transpose_y", "top": "1124_0_transposed", "type": "transpose", "name": "1153_transpose_y" }, { "bottom": "1134_0_transposed,1124_0_transposed", "weights": {}, "debug_info": "1153_batch_matmul", "top": "1153_pre_transpose", "type": "batch_matmul", "name": "1153_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1153_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1153", "top": "1153", "type": "transpose", "name": "1153" }, { "bottom": "1153", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1155", "top": "1155", "type": "elementwise", "name": "1155", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "1134_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1157_transpose_x", "top": "1134_1_transposed", "type": "transpose", "name": "1157_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1124_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1157_transpose_y", "top": "1124_1_transposed", "type": "transpose", "name": "1157_transpose_y" }, { "bottom": "1134_1_transposed,1124_1_transposed", "weights": {}, "debug_info": "1157_batch_matmul", "top": "1157_pre_transpose", "type": "batch_matmul", "name": "1157_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1157_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1157", "top": "1157", "type": "transpose", "name": "1157" }, { "bottom": "1157", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1159", "top": "1159", "type": "elementwise", "name": "1159", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "1134_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1161_transpose_x", "top": "1134_2_transposed", "type": "transpose", "name": "1161_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1124_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1161_transpose_y", "top": "1124_2_transposed", "type": "transpose", "name": "1161_transpose_y" }, { "bottom": "1134_2_transposed,1124_2_transposed", "weights": {}, "debug_info": "1161_batch_matmul", "top": "1161_pre_transpose", "type": "batch_matmul", "name": "1161_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1161_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1161", "top": "1161", "type": "transpose", "name": "1161" }, { "bottom": "1161", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1163", "top": "1163", "type": "elementwise", "name": "1163", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "1134_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1165_transpose_x", "top": "1134_3_transposed", "type": "transpose", "name": "1165_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1124_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1165_transpose_y", "top": "1124_3_transposed", "type": "transpose", "name": "1165_transpose_y" }, { "bottom": "1134_3_transposed,1124_3_transposed", "weights": {}, "debug_info": "1165_batch_matmul", "top": "1165_pre_transpose", "type": "batch_matmul", "name": "1165_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1165_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1165", "top": "1165", "type": "transpose", "name": "1165" }, { "bottom": "1165", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1167", "top": "1167", "type": "elementwise", "name": "1167", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "1134_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1169_transpose_x", "top": "1134_4_transposed", "type": "transpose", "name": "1169_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1124_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1169_transpose_y", "top": "1124_4_transposed", "type": "transpose", "name": "1169_transpose_y" }, { "bottom": "1134_4_transposed,1124_4_transposed", "weights": {}, "debug_info": "1169_batch_matmul", "top": "1169_pre_transpose", "type": "batch_matmul", "name": "1169_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1169_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1169", "top": "1169", "type": "transpose", "name": "1169" }, { "bottom": "1169", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1171", "top": "1171", "type": "elementwise", "name": "1171", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "1134_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1173_transpose_x", "top": "1134_5_transposed", "type": "transpose", "name": "1173_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1124_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1173_transpose_y", "top": "1124_5_transposed", "type": "transpose", "name": "1173_transpose_y" }, { "bottom": "1134_5_transposed,1124_5_transposed", "weights": {}, "debug_info": "1173_batch_matmul", "top": "1173_pre_transpose", "type": "batch_matmul", "name": "1173_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1173_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1173", "top": "1173", "type": "transpose", "name": "1173" }, { "bottom": "1173", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1175", "top": "1175", "type": "elementwise", "name": "1175", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "1134_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1177_transpose_x", "top": "1134_6_transposed", "type": "transpose", "name": "1177_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1124_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1177_transpose_y", "top": "1124_6_transposed", "type": "transpose", "name": "1177_transpose_y" }, { "bottom": "1134_6_transposed,1124_6_transposed", "weights": {}, "debug_info": "1177_batch_matmul", "top": "1177_pre_transpose", "type": "batch_matmul", "name": "1177_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1177_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1177", "top": "1177", "type": "transpose", "name": "1177" }, { "bottom": "1177", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1179", "top": "1179", "type": "elementwise", "name": "1179", "beta": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "1134_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1181_transpose_x", "top": "1134_7_transposed", "type": "transpose", "name": "1181_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1124_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1181_transpose_y", "top": "1124_7_transposed", "type": "transpose", "name": "1181_transpose_y" }, { "bottom": "1134_7_transposed,1124_7_transposed", "weights": {}, "debug_info": "1181_batch_matmul", "top": "1181_pre_transpose", "type": "batch_matmul", "name": "1181_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1181_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1181", "top": "1181", "type": "transpose", "name": "1181" }, { "bottom": "1181", "alpha": 0.125, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1183", "top": "1183", "type": "elementwise", "name": "1183", "beta": 0 }, { "bottom": "1155,1159,1163,1167,1171,1175,1179,1183", "weights": {}, "nd_mode": true, "axis": 2, "debug_info": "attn_weights.1", "top": "attn_weights.1", "type": "general_concat", "name": "attn_weights.1" }, { "bottom": "attn_weights.1,qk_mask.1", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "attn_weights0.1", "nd_mode": true, "top": "attn_weights0.1", "type": "elementwise", "name": "attn_weights0.1", "beta": 0 }, { "bottom": "attn_weights0.1", "weights": {}, "debug_info": "input.2", "top": "input.2", "C": 2, "type": "softmax", "name": "input.2" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "input.2", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "1189", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "1189", "begin_48": 0, "top": "1189_0,1189_1,1189_2,1189_3,1189_4,1189_5,1189_6,1189_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "axis_h": 2, "axis_w": 0, "bottom": "1143_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1199_transpose_x", "top": "1143_0_transposed", "type": "transpose", "name": "1199_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1189_0", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1199_transpose_y", "top": "1189_0_transposed", "type": "transpose", "name": "1199_transpose_y" }, { "bottom": "1143_0_transposed,1189_0_transposed", "weights": {}, "debug_info": "1199_batch_matmul", "top": "1199_pre_transpose", "type": "batch_matmul", "name": "1199_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1199_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1199", "top": "1199", "type": "transpose", "name": "1199" }, { "axis_h": 2, "axis_w": 0, "bottom": "1143_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1201_transpose_x", "top": "1143_1_transposed", "type": "transpose", "name": "1201_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1189_1", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1201_transpose_y", "top": "1189_1_transposed", "type": "transpose", "name": "1201_transpose_y" }, { "bottom": "1143_1_transposed,1189_1_transposed", "weights": {}, "debug_info": "1201_batch_matmul", "top": "1201_pre_transpose", "type": "batch_matmul", "name": "1201_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1201_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1201", "top": "1201", "type": "transpose", "name": "1201" }, { "axis_h": 2, "axis_w": 0, "bottom": "1143_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1203_transpose_x", "top": "1143_2_transposed", "type": "transpose", "name": "1203_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1189_2", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1203_transpose_y", "top": "1189_2_transposed", "type": "transpose", "name": "1203_transpose_y" }, { "bottom": "1143_2_transposed,1189_2_transposed", "weights": {}, "debug_info": "1203_batch_matmul", "top": "1203_pre_transpose", "type": "batch_matmul", "name": "1203_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1203_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1203", "top": "1203", "type": "transpose", "name": "1203" }, { "axis_h": 2, "axis_w": 0, "bottom": "1143_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1205_transpose_x", "top": "1143_3_transposed", "type": "transpose", "name": "1205_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1189_3", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1205_transpose_y", "top": "1189_3_transposed", "type": "transpose", "name": "1205_transpose_y" }, { "bottom": "1143_3_transposed,1189_3_transposed", "weights": {}, "debug_info": "1205_batch_matmul", "top": "1205_pre_transpose", "type": "batch_matmul", "name": "1205_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1205_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1205", "top": "1205", "type": "transpose", "name": "1205" }, { "axis_h": 2, "axis_w": 0, "bottom": "1143_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1207_transpose_x", "top": "1143_4_transposed", "type": "transpose", "name": "1207_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1189_4", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1207_transpose_y", "top": "1189_4_transposed", "type": "transpose", "name": "1207_transpose_y" }, { "bottom": "1143_4_transposed,1189_4_transposed", "weights": {}, "debug_info": "1207_batch_matmul", "top": "1207_pre_transpose", "type": "batch_matmul", "name": "1207_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1207_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1207", "top": "1207", "type": "transpose", "name": "1207" }, { "axis_h": 2, "axis_w": 0, "bottom": "1143_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1209_transpose_x", "top": "1143_5_transposed", "type": "transpose", "name": "1209_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1189_5", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1209_transpose_y", "top": "1189_5_transposed", "type": "transpose", "name": "1209_transpose_y" }, { "bottom": "1143_5_transposed,1189_5_transposed", "weights": {}, "debug_info": "1209_batch_matmul", "top": "1209_pre_transpose", "type": "batch_matmul", "name": "1209_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1209_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1209", "top": "1209", "type": "transpose", "name": "1209" }, { "axis_h": 2, "axis_w": 0, "bottom": "1143_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1211_transpose_x", "top": "1143_6_transposed", "type": "transpose", "name": "1211_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1189_6", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1211_transpose_y", "top": "1189_6_transposed", "type": "transpose", "name": "1211_transpose_y" }, { "bottom": "1143_6_transposed,1189_6_transposed", "weights": {}, "debug_info": "1211_batch_matmul", "top": "1211_pre_transpose", "type": "batch_matmul", "name": "1211_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1211_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1211", "top": "1211", "type": "transpose", "name": "1211" }, { "axis_h": 2, "axis_w": 0, "bottom": "1143_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1213_transpose_x", "top": "1143_7_transposed", "type": "transpose", "name": "1213_transpose_x" }, { "axis_h": 2, "axis_w": 0, "bottom": "1189_7", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1213_transpose_y", "top": "1189_7_transposed", "type": "transpose", "name": "1213_transpose_y" }, { "bottom": "1143_7_transposed,1189_7_transposed", "weights": {}, "debug_info": "1213_batch_matmul", "top": "1213_pre_transpose", "type": "batch_matmul", "name": "1213_batch_matmul", "channel_mode": false }, { "axis_h": 2, "axis_w": 0, "bottom": "1213_pre_transpose", "axis_k": 1, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "1213", "top": "1213", "type": "transpose", "name": "1213" }, { "weights": {}, "debug_info": "attn.2", "top": "attn.2", "type": "concat", "name": "attn.2", "bottom": "1199,1201,1203,1205,1207,1209,1211,1213" }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "attn.1", "K": 512, "blob_biases": 259, "name": "attn.1", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "attn.2", "weights": { "per_ch_qscale": 261, "W_S8": 257, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "inputs3.1,attn.1", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs0.1", "top": "inputs0.1", "type": "elementwise", "name": "inputs0.1", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs0.1", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.23", "use_version": 1, "top": "channels_mean.23", "type": "reduce", "name": "channels_mean.23" }, { "alpha": -1, "bottom": "channels_mean.23", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.23", "top": "_neg_y_zero_mean.23", "type": "activation", "name": "_neg_y_zero_mean.23", "beta": 0 }, { "bottom": "inputs0.1,_neg_y_zero_mean.23", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.23", "top": "zero_mean.23", "type": "elementwise", "name": "zero_mean.23", "beta": 0 }, { "bottom": "zero_mean.23,zero_mean.23", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.23", "top": "zero_mean_sq.23", "type": "elementwise", "name": "zero_mean_sq.23", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.23", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "1234", "use_version": 1, "top": "1234", "type": "reduce", "name": "1234" }, { "bottom": "1234", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1236", "top": "1236", "type": "elementwise", "name": "1236", "beta": 0 }, { "bottom": "1236", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.23", "top": "denom.23", "type": "elementwise", "name": "denom.23", "beta": 0 }, { "bottom": "zero_mean.23,denom.23", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.23", "top": "out.23", "type": "elementwise", "name": "out.23", "beta": 0 }, { "top": "1240", "w": 1, "h": 1, "name": "1240", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "1240", "n": 1, "weights": {}, "constant_blob": 319 }, { "bottom": "out.23,1240", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1241", "top": "1241", "type": "elementwise", "name": "1241", "beta": 0 }, { "top": "1243", "w": 1, "h": 1, "name": "1243", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "1243", "n": 1, "weights": {}, "constant_blob": 265 }, { "bottom": "1241,1243", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.4", "top": "input.4", "type": "elementwise", "name": "input.4", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "x.5", "K": 512, "blob_biases": 269, "name": "x.5", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 2048, "bottom": "input.4", "weights": { "per_ch_qscale": 271, "W_S8": 267, "per_ch_qbias": 299 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "x.5", "alpha": 1.7020000219345093, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1255", "top": "1255", "type": "elementwise", "name": "1255", "beta": 0 }, { "bottom": "1255", "weights": {}, "mode": 3, "debug_info": "1256", "top": "1256", "type": "activation", "name": "1256" }, { "bottom": "x.5,1256", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "input.8", "top": "input.8", "type": "elementwise", "name": "input.8", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "input0.1", "K": 2048, "blob_biases": 275, "name": "input0.1", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "input.8", "weights": { "per_ch_qscale": 277, "W_S8": 273, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "input0.1,inputs0.1", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1266", "top": "1266", "type": "elementwise", "name": "1266", "beta": 0 }, { "bottom": "inputs3.1", "alpha": 0, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1268", "top": "1268", "type": "elementwise", "name": "1268", "beta": 0 }, { "bottom": "1268,1266", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "inputs4.1", "top": "inputs4.1", "type": "elementwise", "name": "inputs4.1", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs4.1", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.4", "use_version": 1, "top": "channels_mean.4", "type": "reduce", "name": "channels_mean.4" }, { "alpha": -1, "bottom": "channels_mean.4", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.4", "top": "_neg_y_zero_mean.4", "type": "activation", "name": "_neg_y_zero_mean.4", "beta": 0 }, { "bottom": "inputs4.1,_neg_y_zero_mean.4", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.4", "top": "zero_mean.4", "type": "elementwise", "name": "zero_mean.4", "beta": 0 }, { "bottom": "zero_mean.4,zero_mean.4", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.4", "top": "zero_mean_sq.4", "type": "elementwise", "name": "zero_mean_sq.4", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.4", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "1279", "use_version": 1, "top": "1279", "type": "reduce", "name": "1279" }, { "bottom": "1279", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1281", "top": "1281", "type": "elementwise", "name": "1281", "beta": 0 }, { "bottom": "1281", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.4", "top": "denom.4", "type": "elementwise", "name": "denom.4", "beta": 0 }, { "bottom": "zero_mean.4,denom.4", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.4", "top": "out.4", "type": "elementwise", "name": "out.4", "beta": 0 }, { "top": "1285", "w": 1, "h": 1, "name": "1285", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "1285", "n": 1, "weights": {}, "constant_blob": 321 }, { "bottom": "out.4,1285", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1286", "top": "1286", "type": "elementwise", "name": "1286", "beta": 0 }, { "top": "1288", "w": 1, "h": 1, "name": "1288", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "1288", "n": 1, "weights": {}, "constant_blob": 281 }, { "bottom": "1286,1288", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "embeddings.1", "top": "embeddings.1", "type": "elementwise", "name": "embeddings.1", "beta": 0 }, { "pad_r": 0, "fused_relu": 0, "fused_tanh": 0, "debug_info": "", "pad_fill_mode": 0, "pad_b": 0, "pad_l": 0, "top": "x.2", "K": 512, "blob_biases": 285, "name": "x.2", "has_batch_norm": 0, "type": "convolution", "n_groups": 1, "pad_t": 0, "has_biases": 1, "C": 512, "bottom": "embeddings.1", "weights": { "per_ch_qscale": 287, "W_S8": 283, "per_ch_qbias": 295 }, "Nx": 1, "pad_mode": 0, "pad_value": 0, "Ny": 1, "n_parallel": 1 }, { "bottom": "x.2", "alpha": 1.7020000219345093, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "1308", "top": "1308", "type": "elementwise", "name": "1308", "beta": 0 }, { "bottom": "1308", "weights": {}, "mode": 3, "debug_info": "1309", "top": "1309", "type": "activation", "name": "1309" }, { "bottom": "x.2,1309", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "inputs.1", "top": "inputs.1", "type": "elementwise", "name": "inputs.1", "beta": 0 }, { "axis_mode": 4, "bottom": "inputs.1", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "channels_mean.1", "use_version": 1, "top": "channels_mean.1", "type": "reduce", "name": "channels_mean.1" }, { "alpha": -1, "bottom": "channels_mean.1", "weights": {}, "mode": 6, "debug_info": "_neg_y_zero_mean.1", "top": "_neg_y_zero_mean.1", "type": "activation", "name": "_neg_y_zero_mean.1", "beta": 0 }, { "bottom": "inputs.1,_neg_y_zero_mean.1", "alpha": 0.010416666977107525, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean.1", "top": "zero_mean.1", "type": "elementwise", "name": "zero_mean.1", "beta": 0 }, { "bottom": "zero_mean.1,zero_mean.1", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "zero_mean_sq.1", "top": "zero_mean_sq.1", "type": "elementwise", "name": "zero_mean_sq.1", "beta": 0 }, { "axis_mode": 4, "bottom": "zero_mean_sq.1", "weights": {}, "mode": 1, "nd_axis": 1, "nd_mode": true, "debug_info": "1318", "use_version": 1, "top": "1318", "type": "reduce", "name": "1318" }, { "bottom": "1318", "alpha": 1.0850693676900391e-09, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1320", "top": "1320", "type": "elementwise", "name": "1320", "beta": 0 }, { "bottom": "1320", "alpha": 1, "operation": 12, "eps": 9.999999960041972e-13, "weights": {}, "fused_relu": 0, "debug_info": "denom.1", "top": "denom.1", "type": "elementwise", "name": "denom.1", "beta": 0 }, { "bottom": "zero_mean.1,denom.1", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "out.1", "top": "out.1", "type": "elementwise", "name": "out.1", "beta": 0 }, { "top": "1324", "w": 1, "h": 1, "name": "1324", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "1324", "n": 1, "weights": {}, "constant_blob": 323 }, { "bottom": "out.1,1324", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "1325", "top": "1325", "type": "elementwise", "name": "1325", "beta": 0 }, { "top": "1327", "w": 1, "h": 1, "name": "1327", "nd_rank": 4, "type": "load_constant", "k": 512, "bottom": "", "debug_info": "1327", "n": 1, "weights": {}, "constant_blob": 291 }, { "bottom": "1325,1327", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "x.1", "top": "x.1", "type": "elementwise", "name": "x.1", "beta": 0 }, { "size_of_axes": 1, "bottom": "x.1", "axes_0": 2, "weights": {}, "nd_axis": 0, "debug_info": "1329", "version": 1, "top": "1329", "type": "squeeze", "name": "1329" }, { "axis_h": 0, "axis_w": 1, "bottom": "1329", "axis_k": 2, "axis_n": 3, "axis_seq": 4, "weights": {}, "debug_info": "transpose_0", "top": "transpose_0", "type": "transpose", "name": "transpose_0" }, { "nB": 512, "top": "lm/logits", "has_biases": 1, "weights": { "per_ch_qscale": 3, "W_S8": 5, "per_ch_qbias": 325 }, "nC": 15000, "type": "inner_product", "has_relu": 0, "bottom": "transpose_0", "blob_biases": 325, "has_tanh": 0, "debug_info": "", "name": "lm_logits.1", "has_prelu": 0 }, { "bottom": "lm/logits", "weights": {}, "nd_axis": -1, "debug_info": "lm_probs.1", "top": "output", "type": "softmax_nd", "name": "lm_probs.1", "attributes": { "is_output": 1 } }, { "axis_seq": 4, "name": "transpose_1", "axis_n": 3, "axis_h": 1, "type": "transpose", "attributes": { "is_output": 1 }, "bottom": "embeddings.1", "axis_w": 2, "axis_k": 0, "debug_info": "transpose_1", "weights": {}, "top": "embedding_out" } ] }