{ "storage": "unilm_joint.espresso.weights", "analyses": { "N8Espresso23analysis_debug_metadataE": { "bundle": "", "product": "", "version": "", "path": "", "mldb_token": "", "use_case": "", "name": "LOCALHOST-2024-12-26-CPU-quant" } }, "properties": { "fast_reshape": "1" }, "format_version": 200, "metadata_in_weights": [], "layers": [ { "nB": 15000, "top": "input_out", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 3, "W_t_int8": 5 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "input", "debug_info": "", "has_tanh": 0, "name": "embed", "has_prelu": 0 }, { "nB": 64, "top": "position_out", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 7, "W_t_int8": 9 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "position", "debug_info": "", "has_tanh": 0, "name": "embed_position", "has_prelu": 0 }, { "nB": 1, "top": "segment_out", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 11, "W_t_int8": 13 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "segment", "debug_info": "", "has_tanh": 0, "name": "embed_segment", "has_prelu": 0 }, { "bottom": "position_out,segment_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "embed_pos_seg_out", "type": "elementwise", "name": "embed_pos_seg", "beta": 0 }, { "bottom": "embed_pos_seg_out,input_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "add_out", "type": "elementwise", "name": "add", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "add_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 15, "wGamma": 17 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 19 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d:query_fc/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 21, "Qscale": 23 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 25 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d:key_fc/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 27, "Qscale": 29 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 31 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d:value_fc/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 33, "Qscale": 35 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d:query" }, { "name": "ane_gpt2_transformer_layer_3d:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d/attn_k_s_in,ane_gpt2_transformer_layer_3d/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d/attn_v_s_in,ane_gpt2_transformer_layer_3d/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d:value_state_concat" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d:query", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d:query_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d:query_0,ane_gpt2_transformer_layer_3d:query_1,ane_gpt2_transformer_layer_3d:query_2,ane_gpt2_transformer_layer_3d:query_3,ane_gpt2_transformer_layer_3d:query_4,ane_gpt2_transformer_layer_3d:query_5,ane_gpt2_transformer_layer_3d:query_6,ane_gpt2_transformer_layer_3d:query_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d:query_0,ane_gpt2_transformer_layer_3d:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:raw_score_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:raw_score_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:query_1,ane_gpt2_transformer_layer_3d:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:raw_score_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:raw_score_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:query_2,ane_gpt2_transformer_layer_3d:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:raw_score_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:raw_score_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:query_3,ane_gpt2_transformer_layer_3d:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:raw_score_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:raw_score_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:query_4,ane_gpt2_transformer_layer_3d:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:raw_score_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:raw_score_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:query_5,ane_gpt2_transformer_layer_3d:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:raw_score_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:raw_score_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:query_6,ane_gpt2_transformer_layer_3d:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:raw_score_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:raw_score_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:query_7,ane_gpt2_transformer_layer_3d:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:raw_score_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:raw_score_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:raw_score_0,ane_gpt2_transformer_layer_3d:raw_score_1,ane_gpt2_transformer_layer_3d:raw_score_2,ane_gpt2_transformer_layer_3d:raw_score_3,ane_gpt2_transformer_layer_3d:raw_score_4,ane_gpt2_transformer_layer_3d:raw_score_5,ane_gpt2_transformer_layer_3d:raw_score_6,ane_gpt2_transformer_layer_3d:raw_score_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:raw_score", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d:raw_score_join" }, { "bottom": "ane_gpt2_transformer_layer_3d:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d:softmax" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d:softmax", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d:softmax_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d:softmax_0,ane_gpt2_transformer_layer_3d:softmax_1,ane_gpt2_transformer_layer_3d:softmax_2,ane_gpt2_transformer_layer_3d:softmax_3,ane_gpt2_transformer_layer_3d:softmax_4,ane_gpt2_transformer_layer_3d:softmax_5,ane_gpt2_transformer_layer_3d:softmax_6,ane_gpt2_transformer_layer_3d:softmax_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d:softmax_0,ane_gpt2_transformer_layer_3d:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:weighted_avg_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:weighted_avg_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:softmax_1,ane_gpt2_transformer_layer_3d:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:weighted_avg_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:weighted_avg_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:softmax_2,ane_gpt2_transformer_layer_3d:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:weighted_avg_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:weighted_avg_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:softmax_3,ane_gpt2_transformer_layer_3d:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:weighted_avg_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:weighted_avg_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:softmax_4,ane_gpt2_transformer_layer_3d:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:weighted_avg_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:weighted_avg_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:softmax_5,ane_gpt2_transformer_layer_3d:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:weighted_avg_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:weighted_avg_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:softmax_6,ane_gpt2_transformer_layer_3d:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:weighted_avg_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:weighted_avg_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:softmax_7,ane_gpt2_transformer_layer_3d:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:weighted_avg_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:weighted_avg_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:weighted_avg_0,ane_gpt2_transformer_layer_3d:weighted_avg_1,ane_gpt2_transformer_layer_3d:weighted_avg_2,ane_gpt2_transformer_layer_3d:weighted_avg_3,ane_gpt2_transformer_layer_3d:weighted_avg_4,ane_gpt2_transformer_layer_3d:weighted_avg_5,ane_gpt2_transformer_layer_3d:weighted_avg_6,ane_gpt2_transformer_layer_3d:weighted_avg_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:weighted_avg", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d:weighted_avg_join" }, { "name": "ane_gpt2_transformer_layer_3d/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 37 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/attn_fc/output_raw,ane_gpt2_transformer_layer_3d/attn_out/act_scale_per_layer", "weights": { "biases": 39, "Qscale": 41 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "add_out,ane_gpt2_transformer_layer_3d/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 43, "wGamma": 45 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 47 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 49, "Qscale": 51 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 53 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d/ffn_act_out/act_scale_per_layer", "weights": { "biases": 55, "Qscale": 57 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d/residual_attn_out,ane_gpt2_transformer_layer_3d/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 59, "wGamma": 61 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 63 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1:query_fc/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 65, "Qscale": 67 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 69 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1:key_fc/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 71, "Qscale": 73 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 75 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1:value_fc/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 77, "Qscale": 79 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_1:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_1:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_1:query" }, { "name": "ane_gpt2_transformer_layer_3d_1:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_1:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_1/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_1:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_1:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_1/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/attn_k_s_in,ane_gpt2_transformer_layer_3d_1/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_1:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/attn_v_s_in,ane_gpt2_transformer_layer_3d_1/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_1:value_state_concat" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:query", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_1:query_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_1:query_0,ane_gpt2_transformer_layer_3d_1:query_1,ane_gpt2_transformer_layer_3d_1:query_2,ane_gpt2_transformer_layer_3d_1:query_3,ane_gpt2_transformer_layer_3d_1:query_4,ane_gpt2_transformer_layer_3d_1:query_5,ane_gpt2_transformer_layer_3d_1:query_6,ane_gpt2_transformer_layer_3d_1:query_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1:query_0,ane_gpt2_transformer_layer_3d_1:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:raw_score_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:raw_score_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:query_1,ane_gpt2_transformer_layer_3d_1:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:raw_score_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:raw_score_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:query_2,ane_gpt2_transformer_layer_3d_1:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:raw_score_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:raw_score_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:query_3,ane_gpt2_transformer_layer_3d_1:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:raw_score_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:raw_score_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:query_4,ane_gpt2_transformer_layer_3d_1:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:raw_score_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:raw_score_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:query_5,ane_gpt2_transformer_layer_3d_1:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:raw_score_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:raw_score_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:query_6,ane_gpt2_transformer_layer_3d_1:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:raw_score_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:raw_score_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:query_7,ane_gpt2_transformer_layer_3d_1:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:raw_score_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:raw_score_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:raw_score_0,ane_gpt2_transformer_layer_3d_1:raw_score_1,ane_gpt2_transformer_layer_3d_1:raw_score_2,ane_gpt2_transformer_layer_3d_1:raw_score_3,ane_gpt2_transformer_layer_3d_1:raw_score_4,ane_gpt2_transformer_layer_3d_1:raw_score_5,ane_gpt2_transformer_layer_3d_1:raw_score_6,ane_gpt2_transformer_layer_3d_1:raw_score_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:raw_score", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_1:raw_score_join" }, { "bottom": "ane_gpt2_transformer_layer_3d_1:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_1:softmax" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:softmax", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_1:softmax_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_1:softmax_0,ane_gpt2_transformer_layer_3d_1:softmax_1,ane_gpt2_transformer_layer_3d_1:softmax_2,ane_gpt2_transformer_layer_3d_1:softmax_3,ane_gpt2_transformer_layer_3d_1:softmax_4,ane_gpt2_transformer_layer_3d_1:softmax_5,ane_gpt2_transformer_layer_3d_1:softmax_6,ane_gpt2_transformer_layer_3d_1:softmax_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1:softmax_0,ane_gpt2_transformer_layer_3d_1:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:weighted_avg_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:weighted_avg_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:softmax_1,ane_gpt2_transformer_layer_3d_1:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:weighted_avg_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:weighted_avg_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:softmax_2,ane_gpt2_transformer_layer_3d_1:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:weighted_avg_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:weighted_avg_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:softmax_3,ane_gpt2_transformer_layer_3d_1:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:weighted_avg_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:weighted_avg_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:softmax_4,ane_gpt2_transformer_layer_3d_1:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:weighted_avg_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:weighted_avg_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:softmax_5,ane_gpt2_transformer_layer_3d_1:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:weighted_avg_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:weighted_avg_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:softmax_6,ane_gpt2_transformer_layer_3d_1:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:weighted_avg_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:weighted_avg_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:softmax_7,ane_gpt2_transformer_layer_3d_1:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:weighted_avg_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:weighted_avg_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:weighted_avg_0,ane_gpt2_transformer_layer_3d_1:weighted_avg_1,ane_gpt2_transformer_layer_3d_1:weighted_avg_2,ane_gpt2_transformer_layer_3d_1:weighted_avg_3,ane_gpt2_transformer_layer_3d_1:weighted_avg_4,ane_gpt2_transformer_layer_3d_1:weighted_avg_5,ane_gpt2_transformer_layer_3d_1:weighted_avg_6,ane_gpt2_transformer_layer_3d_1:weighted_avg_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:weighted_avg", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_1:weighted_avg_join" }, { "name": "ane_gpt2_transformer_layer_3d_1/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_1/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_1/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_1/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_1/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 81 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_1/attn_out/act_scale_per_layer", "weights": { "biases": 83, "Qscale": 85 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d/residual_ffn_out,ane_gpt2_transformer_layer_3d_1/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 87, "wGamma": 89 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 91 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 93, "Qscale": 95 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_1/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_1/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_1/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 97 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_1/ffn_act_out/act_scale_per_layer", "weights": { "biases": 99, "Qscale": 101 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/residual_attn_out,ane_gpt2_transformer_layer_3d_1/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 103, "wGamma": 105 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 107 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2:query_fc/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 109, "Qscale": 111 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 113 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2:key_fc/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 115, "Qscale": 117 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 119 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2:value_fc/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 121, "Qscale": 123 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_2:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_2:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_2:query" }, { "name": "ane_gpt2_transformer_layer_3d_2:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_2:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_2/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_2:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_2:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_2/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/attn_k_s_in,ane_gpt2_transformer_layer_3d_2/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_2:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/attn_v_s_in,ane_gpt2_transformer_layer_3d_2/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_2:value_state_concat" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:query", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_2:query_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_2:query_0,ane_gpt2_transformer_layer_3d_2:query_1,ane_gpt2_transformer_layer_3d_2:query_2,ane_gpt2_transformer_layer_3d_2:query_3,ane_gpt2_transformer_layer_3d_2:query_4,ane_gpt2_transformer_layer_3d_2:query_5,ane_gpt2_transformer_layer_3d_2:query_6,ane_gpt2_transformer_layer_3d_2:query_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2:query_0,ane_gpt2_transformer_layer_3d_2:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:raw_score_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:raw_score_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:query_1,ane_gpt2_transformer_layer_3d_2:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:raw_score_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:raw_score_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:query_2,ane_gpt2_transformer_layer_3d_2:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:raw_score_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:raw_score_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:query_3,ane_gpt2_transformer_layer_3d_2:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:raw_score_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:raw_score_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:query_4,ane_gpt2_transformer_layer_3d_2:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:raw_score_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:raw_score_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:query_5,ane_gpt2_transformer_layer_3d_2:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:raw_score_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:raw_score_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:query_6,ane_gpt2_transformer_layer_3d_2:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:raw_score_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:raw_score_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:query_7,ane_gpt2_transformer_layer_3d_2:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:raw_score_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:raw_score_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:raw_score_0,ane_gpt2_transformer_layer_3d_2:raw_score_1,ane_gpt2_transformer_layer_3d_2:raw_score_2,ane_gpt2_transformer_layer_3d_2:raw_score_3,ane_gpt2_transformer_layer_3d_2:raw_score_4,ane_gpt2_transformer_layer_3d_2:raw_score_5,ane_gpt2_transformer_layer_3d_2:raw_score_6,ane_gpt2_transformer_layer_3d_2:raw_score_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:raw_score", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_2:raw_score_join" }, { "bottom": "ane_gpt2_transformer_layer_3d_2:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_2:softmax" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:softmax", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_2:softmax_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_2:softmax_0,ane_gpt2_transformer_layer_3d_2:softmax_1,ane_gpt2_transformer_layer_3d_2:softmax_2,ane_gpt2_transformer_layer_3d_2:softmax_3,ane_gpt2_transformer_layer_3d_2:softmax_4,ane_gpt2_transformer_layer_3d_2:softmax_5,ane_gpt2_transformer_layer_3d_2:softmax_6,ane_gpt2_transformer_layer_3d_2:softmax_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2:softmax_0,ane_gpt2_transformer_layer_3d_2:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:weighted_avg_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:weighted_avg_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:softmax_1,ane_gpt2_transformer_layer_3d_2:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:weighted_avg_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:weighted_avg_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:softmax_2,ane_gpt2_transformer_layer_3d_2:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:weighted_avg_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:weighted_avg_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:softmax_3,ane_gpt2_transformer_layer_3d_2:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:weighted_avg_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:weighted_avg_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:softmax_4,ane_gpt2_transformer_layer_3d_2:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:weighted_avg_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:weighted_avg_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:softmax_5,ane_gpt2_transformer_layer_3d_2:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:weighted_avg_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:weighted_avg_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:softmax_6,ane_gpt2_transformer_layer_3d_2:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:weighted_avg_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:weighted_avg_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:softmax_7,ane_gpt2_transformer_layer_3d_2:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:weighted_avg_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:weighted_avg_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:weighted_avg_0,ane_gpt2_transformer_layer_3d_2:weighted_avg_1,ane_gpt2_transformer_layer_3d_2:weighted_avg_2,ane_gpt2_transformer_layer_3d_2:weighted_avg_3,ane_gpt2_transformer_layer_3d_2:weighted_avg_4,ane_gpt2_transformer_layer_3d_2:weighted_avg_5,ane_gpt2_transformer_layer_3d_2:weighted_avg_6,ane_gpt2_transformer_layer_3d_2:weighted_avg_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:weighted_avg", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_2:weighted_avg_join" }, { "name": "ane_gpt2_transformer_layer_3d_2/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_2/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_2/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_2/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_2/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 125 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_2/attn_out/act_scale_per_layer", "weights": { "biases": 127, "Qscale": 129 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/residual_ffn_out,ane_gpt2_transformer_layer_3d_2/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 131, "wGamma": 133 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 135 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 137, "Qscale": 139 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_2/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_2/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_2/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 141 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_2/ffn_act_out/act_scale_per_layer", "weights": { "biases": 143, "Qscale": 145 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/residual_attn_out,ane_gpt2_transformer_layer_3d_2/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 147, "wGamma": 149 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 151 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3:query_fc/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 153, "Qscale": 155 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 157 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3:key_fc/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 159, "Qscale": 161 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 163 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3:value_fc/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 165, "Qscale": 167 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_3:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_3:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_3:query" }, { "name": "ane_gpt2_transformer_layer_3d_3:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_3:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_3/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_3:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_3:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_3/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/attn_k_s_in,ane_gpt2_transformer_layer_3d_3/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_3:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/attn_v_s_in,ane_gpt2_transformer_layer_3d_3/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_3:value_state_concat" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:query", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_3:query_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_3:query_0,ane_gpt2_transformer_layer_3d_3:query_1,ane_gpt2_transformer_layer_3d_3:query_2,ane_gpt2_transformer_layer_3d_3:query_3,ane_gpt2_transformer_layer_3d_3:query_4,ane_gpt2_transformer_layer_3d_3:query_5,ane_gpt2_transformer_layer_3d_3:query_6,ane_gpt2_transformer_layer_3d_3:query_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3:query_0,ane_gpt2_transformer_layer_3d_3:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:raw_score_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:raw_score_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:query_1,ane_gpt2_transformer_layer_3d_3:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:raw_score_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:raw_score_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:query_2,ane_gpt2_transformer_layer_3d_3:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:raw_score_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:raw_score_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:query_3,ane_gpt2_transformer_layer_3d_3:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:raw_score_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:raw_score_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:query_4,ane_gpt2_transformer_layer_3d_3:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:raw_score_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:raw_score_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:query_5,ane_gpt2_transformer_layer_3d_3:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:raw_score_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:raw_score_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:query_6,ane_gpt2_transformer_layer_3d_3:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:raw_score_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:raw_score_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:query_7,ane_gpt2_transformer_layer_3d_3:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:raw_score_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:raw_score_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:raw_score_0,ane_gpt2_transformer_layer_3d_3:raw_score_1,ane_gpt2_transformer_layer_3d_3:raw_score_2,ane_gpt2_transformer_layer_3d_3:raw_score_3,ane_gpt2_transformer_layer_3d_3:raw_score_4,ane_gpt2_transformer_layer_3d_3:raw_score_5,ane_gpt2_transformer_layer_3d_3:raw_score_6,ane_gpt2_transformer_layer_3d_3:raw_score_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:raw_score", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_3:raw_score_join" }, { "bottom": "ane_gpt2_transformer_layer_3d_3:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_3:softmax" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:softmax", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_3:softmax_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_3:softmax_0,ane_gpt2_transformer_layer_3d_3:softmax_1,ane_gpt2_transformer_layer_3d_3:softmax_2,ane_gpt2_transformer_layer_3d_3:softmax_3,ane_gpt2_transformer_layer_3d_3:softmax_4,ane_gpt2_transformer_layer_3d_3:softmax_5,ane_gpt2_transformer_layer_3d_3:softmax_6,ane_gpt2_transformer_layer_3d_3:softmax_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3:softmax_0,ane_gpt2_transformer_layer_3d_3:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:weighted_avg_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:weighted_avg_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:softmax_1,ane_gpt2_transformer_layer_3d_3:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:weighted_avg_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:weighted_avg_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:softmax_2,ane_gpt2_transformer_layer_3d_3:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:weighted_avg_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:weighted_avg_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:softmax_3,ane_gpt2_transformer_layer_3d_3:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:weighted_avg_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:weighted_avg_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:softmax_4,ane_gpt2_transformer_layer_3d_3:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:weighted_avg_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:weighted_avg_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:softmax_5,ane_gpt2_transformer_layer_3d_3:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:weighted_avg_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:weighted_avg_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:softmax_6,ane_gpt2_transformer_layer_3d_3:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:weighted_avg_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:weighted_avg_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:softmax_7,ane_gpt2_transformer_layer_3d_3:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:weighted_avg_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:weighted_avg_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:weighted_avg_0,ane_gpt2_transformer_layer_3d_3:weighted_avg_1,ane_gpt2_transformer_layer_3d_3:weighted_avg_2,ane_gpt2_transformer_layer_3d_3:weighted_avg_3,ane_gpt2_transformer_layer_3d_3:weighted_avg_4,ane_gpt2_transformer_layer_3d_3:weighted_avg_5,ane_gpt2_transformer_layer_3d_3:weighted_avg_6,ane_gpt2_transformer_layer_3d_3:weighted_avg_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:weighted_avg", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_3:weighted_avg_join" }, { "name": "ane_gpt2_transformer_layer_3d_3/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_3/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_3/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_3/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_3/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 169 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_3/attn_out/act_scale_per_layer", "weights": { "biases": 171, "Qscale": 173 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/residual_ffn_out,ane_gpt2_transformer_layer_3d_3/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 175, "wGamma": 177 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 179 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 181, "Qscale": 183 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_3/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_3/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_3/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 185 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_3/ffn_act_out/act_scale_per_layer", "weights": { "biases": 187, "Qscale": 189 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/residual_attn_out,ane_gpt2_transformer_layer_3d_3/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 191, "wGamma": 193 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 195 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4:query_fc/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 197, "Qscale": 199 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 201 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4:key_fc/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 203, "Qscale": 205 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 207 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4:value_fc/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 209, "Qscale": 211 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_4:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_4:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_4:query" }, { "name": "ane_gpt2_transformer_layer_3d_4:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_4:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_4/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_4:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_4:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_4/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/attn_k_s_in,ane_gpt2_transformer_layer_3d_4/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_4:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/attn_v_s_in,ane_gpt2_transformer_layer_3d_4/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_4:value_state_concat" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:query", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_4:query_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_4:query_0,ane_gpt2_transformer_layer_3d_4:query_1,ane_gpt2_transformer_layer_3d_4:query_2,ane_gpt2_transformer_layer_3d_4:query_3,ane_gpt2_transformer_layer_3d_4:query_4,ane_gpt2_transformer_layer_3d_4:query_5,ane_gpt2_transformer_layer_3d_4:query_6,ane_gpt2_transformer_layer_3d_4:query_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4:query_0,ane_gpt2_transformer_layer_3d_4:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:raw_score_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:raw_score_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:query_1,ane_gpt2_transformer_layer_3d_4:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:raw_score_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:raw_score_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:query_2,ane_gpt2_transformer_layer_3d_4:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:raw_score_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:raw_score_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:query_3,ane_gpt2_transformer_layer_3d_4:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:raw_score_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:raw_score_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:query_4,ane_gpt2_transformer_layer_3d_4:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:raw_score_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:raw_score_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:query_5,ane_gpt2_transformer_layer_3d_4:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:raw_score_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:raw_score_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:query_6,ane_gpt2_transformer_layer_3d_4:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:raw_score_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:raw_score_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:query_7,ane_gpt2_transformer_layer_3d_4:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:raw_score_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:raw_score_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:raw_score_0,ane_gpt2_transformer_layer_3d_4:raw_score_1,ane_gpt2_transformer_layer_3d_4:raw_score_2,ane_gpt2_transformer_layer_3d_4:raw_score_3,ane_gpt2_transformer_layer_3d_4:raw_score_4,ane_gpt2_transformer_layer_3d_4:raw_score_5,ane_gpt2_transformer_layer_3d_4:raw_score_6,ane_gpt2_transformer_layer_3d_4:raw_score_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:raw_score", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_4:raw_score_join" }, { "bottom": "ane_gpt2_transformer_layer_3d_4:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_4:softmax" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:softmax", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_4:softmax_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_4:softmax_0,ane_gpt2_transformer_layer_3d_4:softmax_1,ane_gpt2_transformer_layer_3d_4:softmax_2,ane_gpt2_transformer_layer_3d_4:softmax_3,ane_gpt2_transformer_layer_3d_4:softmax_4,ane_gpt2_transformer_layer_3d_4:softmax_5,ane_gpt2_transformer_layer_3d_4:softmax_6,ane_gpt2_transformer_layer_3d_4:softmax_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4:softmax_0,ane_gpt2_transformer_layer_3d_4:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:weighted_avg_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:weighted_avg_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:softmax_1,ane_gpt2_transformer_layer_3d_4:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:weighted_avg_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:weighted_avg_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:softmax_2,ane_gpt2_transformer_layer_3d_4:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:weighted_avg_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:weighted_avg_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:softmax_3,ane_gpt2_transformer_layer_3d_4:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:weighted_avg_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:weighted_avg_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:softmax_4,ane_gpt2_transformer_layer_3d_4:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:weighted_avg_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:weighted_avg_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:softmax_5,ane_gpt2_transformer_layer_3d_4:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:weighted_avg_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:weighted_avg_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:softmax_6,ane_gpt2_transformer_layer_3d_4:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:weighted_avg_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:weighted_avg_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:softmax_7,ane_gpt2_transformer_layer_3d_4:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:weighted_avg_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:weighted_avg_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:weighted_avg_0,ane_gpt2_transformer_layer_3d_4:weighted_avg_1,ane_gpt2_transformer_layer_3d_4:weighted_avg_2,ane_gpt2_transformer_layer_3d_4:weighted_avg_3,ane_gpt2_transformer_layer_3d_4:weighted_avg_4,ane_gpt2_transformer_layer_3d_4:weighted_avg_5,ane_gpt2_transformer_layer_3d_4:weighted_avg_6,ane_gpt2_transformer_layer_3d_4:weighted_avg_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:weighted_avg", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_4:weighted_avg_join" }, { "name": "ane_gpt2_transformer_layer_3d_4/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_4/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_4/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_4/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_4/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 213 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_4/attn_out/act_scale_per_layer", "weights": { "biases": 215, "Qscale": 217 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/residual_ffn_out,ane_gpt2_transformer_layer_3d_4/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 219, "wGamma": 221 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 223 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 225, "Qscale": 227 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_4/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_4/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_4/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 229 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_4/ffn_act_out/act_scale_per_layer", "weights": { "biases": 231, "Qscale": 233 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/residual_attn_out,ane_gpt2_transformer_layer_3d_4/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 235, "wGamma": 237 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 239 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5:query_fc/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 241, "Qscale": 243 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 245 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5:key_fc/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 247, "Qscale": 249 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 251 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5:value_fc/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 253, "Qscale": 255 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_5:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_5:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_5:query" }, { "name": "ane_gpt2_transformer_layer_3d_5:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_5:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_5/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_5:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_5:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_5/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/attn_k_s_in,ane_gpt2_transformer_layer_3d_5/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_5:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/attn_v_s_in,ane_gpt2_transformer_layer_3d_5/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_5:value_state_concat" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:query", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_5:query_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_5:query_0,ane_gpt2_transformer_layer_3d_5:query_1,ane_gpt2_transformer_layer_3d_5:query_2,ane_gpt2_transformer_layer_3d_5:query_3,ane_gpt2_transformer_layer_3d_5:query_4,ane_gpt2_transformer_layer_3d_5:query_5,ane_gpt2_transformer_layer_3d_5:query_6,ane_gpt2_transformer_layer_3d_5:query_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5:query_0,ane_gpt2_transformer_layer_3d_5:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:raw_score_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:raw_score_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:query_1,ane_gpt2_transformer_layer_3d_5:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:raw_score_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:raw_score_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:query_2,ane_gpt2_transformer_layer_3d_5:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:raw_score_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:raw_score_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:query_3,ane_gpt2_transformer_layer_3d_5:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:raw_score_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:raw_score_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:query_4,ane_gpt2_transformer_layer_3d_5:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:raw_score_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:raw_score_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:query_5,ane_gpt2_transformer_layer_3d_5:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:raw_score_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:raw_score_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:query_6,ane_gpt2_transformer_layer_3d_5:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:raw_score_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:raw_score_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:query_7,ane_gpt2_transformer_layer_3d_5:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:raw_score_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:raw_score_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:raw_score_0,ane_gpt2_transformer_layer_3d_5:raw_score_1,ane_gpt2_transformer_layer_3d_5:raw_score_2,ane_gpt2_transformer_layer_3d_5:raw_score_3,ane_gpt2_transformer_layer_3d_5:raw_score_4,ane_gpt2_transformer_layer_3d_5:raw_score_5,ane_gpt2_transformer_layer_3d_5:raw_score_6,ane_gpt2_transformer_layer_3d_5:raw_score_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:raw_score", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_5:raw_score_join" }, { "bottom": "ane_gpt2_transformer_layer_3d_5:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_5:softmax" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:softmax", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_5:softmax_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_5:softmax_0,ane_gpt2_transformer_layer_3d_5:softmax_1,ane_gpt2_transformer_layer_3d_5:softmax_2,ane_gpt2_transformer_layer_3d_5:softmax_3,ane_gpt2_transformer_layer_3d_5:softmax_4,ane_gpt2_transformer_layer_3d_5:softmax_5,ane_gpt2_transformer_layer_3d_5:softmax_6,ane_gpt2_transformer_layer_3d_5:softmax_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5:softmax_0,ane_gpt2_transformer_layer_3d_5:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:weighted_avg_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:weighted_avg_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:softmax_1,ane_gpt2_transformer_layer_3d_5:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:weighted_avg_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:weighted_avg_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:softmax_2,ane_gpt2_transformer_layer_3d_5:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:weighted_avg_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:weighted_avg_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:softmax_3,ane_gpt2_transformer_layer_3d_5:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:weighted_avg_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:weighted_avg_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:softmax_4,ane_gpt2_transformer_layer_3d_5:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:weighted_avg_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:weighted_avg_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:softmax_5,ane_gpt2_transformer_layer_3d_5:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:weighted_avg_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:weighted_avg_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:softmax_6,ane_gpt2_transformer_layer_3d_5:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:weighted_avg_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:weighted_avg_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:softmax_7,ane_gpt2_transformer_layer_3d_5:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:weighted_avg_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:weighted_avg_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:weighted_avg_0,ane_gpt2_transformer_layer_3d_5:weighted_avg_1,ane_gpt2_transformer_layer_3d_5:weighted_avg_2,ane_gpt2_transformer_layer_3d_5:weighted_avg_3,ane_gpt2_transformer_layer_3d_5:weighted_avg_4,ane_gpt2_transformer_layer_3d_5:weighted_avg_5,ane_gpt2_transformer_layer_3d_5:weighted_avg_6,ane_gpt2_transformer_layer_3d_5:weighted_avg_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:weighted_avg", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_5:weighted_avg_join" }, { "name": "ane_gpt2_transformer_layer_3d_5/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_5/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_5/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_5/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_5/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 257 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_5/attn_out/act_scale_per_layer", "weights": { "biases": 259, "Qscale": 261 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/residual_ffn_out,ane_gpt2_transformer_layer_3d_5/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 263, "wGamma": 265 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 267 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 269, "Qscale": 271 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_5/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_5/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_5/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 273 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_5/ffn_act_out/act_scale_per_layer", "weights": { "biases": 275, "Qscale": 277 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/residual_attn_out,ane_gpt2_transformer_layer_3d_5/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 279, "wGamma": 281 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_6:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 283 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6:query_fc/output_raw,ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 285, "Qscale": 287 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_6:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 289 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6:key_fc/output_raw,ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 291, "Qscale": 293 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_6:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 295 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6:value_fc/output_raw,ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 297, "Qscale": 299 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_6:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_6:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_6:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_6:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_6:query" }, { "name": "ane_gpt2_transformer_layer_3d_6:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_6:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_6:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_6:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_6/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_6:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_6:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_6:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_6:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_6/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_6/attn_k_s_in,ane_gpt2_transformer_layer_3d_6/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_6:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_6/attn_v_s_in,ane_gpt2_transformer_layer_3d_6/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_6:value_state_concat" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_6:query", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_6:query_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_6:query_0,ane_gpt2_transformer_layer_3d_6:query_1,ane_gpt2_transformer_layer_3d_6:query_2,ane_gpt2_transformer_layer_3d_6:query_3,ane_gpt2_transformer_layer_3d_6:query_4,ane_gpt2_transformer_layer_3d_6:query_5,ane_gpt2_transformer_layer_3d_6:query_6,ane_gpt2_transformer_layer_3d_6:query_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_6:query_0,ane_gpt2_transformer_layer_3d_6:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:raw_score_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:raw_score_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:query_1,ane_gpt2_transformer_layer_3d_6:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:raw_score_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:raw_score_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:query_2,ane_gpt2_transformer_layer_3d_6:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:raw_score_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:raw_score_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:query_3,ane_gpt2_transformer_layer_3d_6:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:raw_score_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:raw_score_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:query_4,ane_gpt2_transformer_layer_3d_6:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:raw_score_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:raw_score_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:query_5,ane_gpt2_transformer_layer_3d_6:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:raw_score_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:raw_score_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:query_6,ane_gpt2_transformer_layer_3d_6:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:raw_score_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:raw_score_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:query_7,ane_gpt2_transformer_layer_3d_6:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:raw_score_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:raw_score_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:raw_score_0,ane_gpt2_transformer_layer_3d_6:raw_score_1,ane_gpt2_transformer_layer_3d_6:raw_score_2,ane_gpt2_transformer_layer_3d_6:raw_score_3,ane_gpt2_transformer_layer_3d_6:raw_score_4,ane_gpt2_transformer_layer_3d_6:raw_score_5,ane_gpt2_transformer_layer_3d_6:raw_score_6,ane_gpt2_transformer_layer_3d_6:raw_score_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:raw_score", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_6:raw_score_join" }, { "bottom": "ane_gpt2_transformer_layer_3d_6:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_6:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_6:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_6:softmax" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_6:softmax", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_6:softmax_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_6:softmax_0,ane_gpt2_transformer_layer_3d_6:softmax_1,ane_gpt2_transformer_layer_3d_6:softmax_2,ane_gpt2_transformer_layer_3d_6:softmax_3,ane_gpt2_transformer_layer_3d_6:softmax_4,ane_gpt2_transformer_layer_3d_6:softmax_5,ane_gpt2_transformer_layer_3d_6:softmax_6,ane_gpt2_transformer_layer_3d_6:softmax_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_6:softmax_0,ane_gpt2_transformer_layer_3d_6:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:weighted_avg_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:weighted_avg_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:softmax_1,ane_gpt2_transformer_layer_3d_6:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:weighted_avg_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:weighted_avg_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:softmax_2,ane_gpt2_transformer_layer_3d_6:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:weighted_avg_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:weighted_avg_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:softmax_3,ane_gpt2_transformer_layer_3d_6:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:weighted_avg_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:weighted_avg_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:softmax_4,ane_gpt2_transformer_layer_3d_6:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:weighted_avg_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:weighted_avg_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:softmax_5,ane_gpt2_transformer_layer_3d_6:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:weighted_avg_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:weighted_avg_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:softmax_6,ane_gpt2_transformer_layer_3d_6:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:weighted_avg_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:weighted_avg_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:softmax_7,ane_gpt2_transformer_layer_3d_6:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:weighted_avg_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:weighted_avg_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:weighted_avg_0,ane_gpt2_transformer_layer_3d_6:weighted_avg_1,ane_gpt2_transformer_layer_3d_6:weighted_avg_2,ane_gpt2_transformer_layer_3d_6:weighted_avg_3,ane_gpt2_transformer_layer_3d_6:weighted_avg_4,ane_gpt2_transformer_layer_3d_6:weighted_avg_5,ane_gpt2_transformer_layer_3d_6:weighted_avg_6,ane_gpt2_transformer_layer_3d_6:weighted_avg_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:weighted_avg", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_6:weighted_avg_join" }, { "name": "ane_gpt2_transformer_layer_3d_6/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_6/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_6/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_6/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_6/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_6/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_6/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 301 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_6/attn_out/act_scale_per_layer", "weights": { "biases": 303, "Qscale": 305 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/residual_ffn_out,ane_gpt2_transformer_layer_3d_6/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_6/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 307, "wGamma": 309 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_6/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 311 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 313, "Qscale": 315 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_6/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_6/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_6/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_6/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_6/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 317 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_6/ffn_act_out/act_scale_per_layer", "weights": { "biases": 319, "Qscale": 321 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_6/residual_attn_out,ane_gpt2_transformer_layer_3d_6/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_6/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 323, "wGamma": 325 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_7:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 327 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7:query_fc/output_raw,ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 329, "Qscale": 331 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_7:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 333 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7:key_fc/output_raw,ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 335, "Qscale": 337 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_7:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 339 }, "nC": 64, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7:value_fc/output_raw,ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 341, "Qscale": 343 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_7:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_7:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_7:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_7:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_7:query" }, { "name": "ane_gpt2_transformer_layer_3d_7:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_7:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_7:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_7:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_7/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_7:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_7:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_7:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_7:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_7/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_7/attn_k_s_in,ane_gpt2_transformer_layer_3d_7/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_7:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_7/attn_v_s_in,ane_gpt2_transformer_layer_3d_7/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_7:value_state_concat" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_7:query", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_7:query_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_7:query_0,ane_gpt2_transformer_layer_3d_7:query_1,ane_gpt2_transformer_layer_3d_7:query_2,ane_gpt2_transformer_layer_3d_7:query_3,ane_gpt2_transformer_layer_3d_7:query_4,ane_gpt2_transformer_layer_3d_7:query_5,ane_gpt2_transformer_layer_3d_7:query_6,ane_gpt2_transformer_layer_3d_7:query_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_7:query_0,ane_gpt2_transformer_layer_3d_7:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:raw_score_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:raw_score_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:query_1,ane_gpt2_transformer_layer_3d_7:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:raw_score_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:raw_score_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:query_2,ane_gpt2_transformer_layer_3d_7:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:raw_score_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:raw_score_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:query_3,ane_gpt2_transformer_layer_3d_7:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:raw_score_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:raw_score_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:query_4,ane_gpt2_transformer_layer_3d_7:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:raw_score_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:raw_score_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:query_5,ane_gpt2_transformer_layer_3d_7:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:raw_score_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:raw_score_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:query_6,ane_gpt2_transformer_layer_3d_7:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:raw_score_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:raw_score_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:query_7,ane_gpt2_transformer_layer_3d_7:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:raw_score_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:raw_score_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:raw_score_0,ane_gpt2_transformer_layer_3d_7:raw_score_1,ane_gpt2_transformer_layer_3d_7:raw_score_2,ane_gpt2_transformer_layer_3d_7:raw_score_3,ane_gpt2_transformer_layer_3d_7:raw_score_4,ane_gpt2_transformer_layer_3d_7:raw_score_5,ane_gpt2_transformer_layer_3d_7:raw_score_6,ane_gpt2_transformer_layer_3d_7:raw_score_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:raw_score", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_7:raw_score_join" }, { "bottom": "ane_gpt2_transformer_layer_3d_7:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_7:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_7:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_7:softmax" }, { "begin_104": 0, "begin_63": 0, "begin_59": 0, "begin_64": 0, "begin_105": 0, "begin_65": 0, "begin_0": 0, "begin_70": 0, "begin_66": 0, "begin_106": 0, "begin_110": 0, "begin_2": 0, "begin_71": 0, "begin_67": 0, "begin_68": 0, "begin_107": 0, "begin_111": 0, "begin_4": 0, "begin_72": 0, "begin_73": 0, "begin_69": 0, "begin_6": 0, "begin_74": 0, "begin_108": 0, "type": "split_nd", "begin_8": 0, "begin_112": 0, "begin_75": 0, "begin_10": 0, "begin_80": 0, "begin_76": 0, "begin_109": 0, "begin_113": 0, "begin_11": 0, "nd_axis": 2, "begin_81": 0, "begin_77": 0, "begin_114": 0, "begin_12": 0, "begin_78": 0, "begin_82": 0, "begin_13": 0, "begin_83": 0, "begin_79": 0, "begin_115": 0, "begin_14": 0, "begin_84": 0, "begin_15": 0, "begin_85": 0, "num_splits": 8, "begin_116": 0, "begin_120": 0, "begin_20": 0, "begin_86": 0, "begin_16": 0, "begin_90": 0, "begin_21": 0, "begin_91": 0, "begin_87": 0, "begin_17": 0, "begin_117": 0, "begin_121": 0, "begin_18": 0, "begin_92": 0, "begin_88": 0, "begin_22": 0, "begin_23": 0, "begin_89": 0, "begin_19": 0, "begin_93": 0, "begin_118": 0, "begin_122": 0, "begin_24": 0, "begin_94": 0, "begin_25": 0, "begin_95": 0, "begin_119": 0, "begin_123": 0, "begin_30": 0, "begin_26": 0, "begin_96": 0, "begin_124": 0, "begin_31": 0, "begin_97": 0, "begin_27": 0, "begin_28": 0, "begin_98": 0, "begin_32": 0, "begin_125": 0, "begin_33": 0, "begin_29": 0, "begin_99": 0, "begin_1": 0, "weights": {}, "begin_34": 0, "bottom": "ane_gpt2_transformer_layer_3d_7:softmax", "begin_3": 0, "begin_126": 0, "begin_35": 0, "begin_5": 0, "begin_36": 0, "begin_40": 0, "begin_127": 0, "name": "ane_gpt2_transformer_layer_3d_7:softmax_split", "begin_41": 0, "begin_37": 0, "begin_7": 0, "begin_42": 0, "begin_38": 0, "begin_9": 0, "begin_39": 0, "begin_43": 0, "begin_44": 0, "begin_45": 0, "begin_50": 0, "begin_46": 0, "begin_51": 0, "begin_47": 0, "begin_100": 0, "begin_52": 0, "debug_info": "", "begin_48": 0, "top": "ane_gpt2_transformer_layer_3d_7:softmax_0,ane_gpt2_transformer_layer_3d_7:softmax_1,ane_gpt2_transformer_layer_3d_7:softmax_2,ane_gpt2_transformer_layer_3d_7:softmax_3,ane_gpt2_transformer_layer_3d_7:softmax_4,ane_gpt2_transformer_layer_3d_7:softmax_5,ane_gpt2_transformer_layer_3d_7:softmax_6,ane_gpt2_transformer_layer_3d_7:softmax_7", "begin_101": 0, "begin_53": 0, "begin_49": 0, "begin_54": 0, "begin_55": 0, "begin_102": 0, "begin_60": 0, "begin_56": 0, "begin_103": 0, "begin_57": 0, "begin_61": 0, "begin_62": 0, "begin_58": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_7:softmax_0,ane_gpt2_transformer_layer_3d_7:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:weighted_avg_0", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:weighted_avg_0", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:softmax_1,ane_gpt2_transformer_layer_3d_7:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:weighted_avg_1", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:weighted_avg_1", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:softmax_2,ane_gpt2_transformer_layer_3d_7:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:weighted_avg_2", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:weighted_avg_2", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:softmax_3,ane_gpt2_transformer_layer_3d_7:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:weighted_avg_3", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:weighted_avg_3", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:softmax_4,ane_gpt2_transformer_layer_3d_7:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:weighted_avg_4", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:weighted_avg_4", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:softmax_5,ane_gpt2_transformer_layer_3d_7:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:weighted_avg_5", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:weighted_avg_5", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:softmax_6,ane_gpt2_transformer_layer_3d_7:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:weighted_avg_6", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:weighted_avg_6", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:softmax_7,ane_gpt2_transformer_layer_3d_7:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:weighted_avg_7", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:weighted_avg_7", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:weighted_avg_0,ane_gpt2_transformer_layer_3d_7:weighted_avg_1,ane_gpt2_transformer_layer_3d_7:weighted_avg_2,ane_gpt2_transformer_layer_3d_7:weighted_avg_3,ane_gpt2_transformer_layer_3d_7:weighted_avg_4,ane_gpt2_transformer_layer_3d_7:weighted_avg_5,ane_gpt2_transformer_layer_3d_7:weighted_avg_6,ane_gpt2_transformer_layer_3d_7:weighted_avg_7", "weights": {}, "axis": 2, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:weighted_avg", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_7:weighted_avg_join" }, { "name": "ane_gpt2_transformer_layer_3d_7/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_7/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_7/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_7/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_7/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_7/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_7/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 345 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_7/attn_out/act_scale_per_layer", "weights": { "biases": 347, "Qscale": 349 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_6/residual_ffn_out,ane_gpt2_transformer_layer_3d_7/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_7/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 351, "wGamma": 353 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_7/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 355 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 357, "Qscale": 359 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_7/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_7/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_7/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_7/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_7/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 361 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_7/ffn_act_out/act_scale_per_layer", "weights": { "biases": 363, "Qscale": 365 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_7/residual_attn_out,ane_gpt2_transformer_layer_3d_7/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_7/residual_ffn", "beta": 0 }, { "name": "decoder/ln_final_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "decoder/ln_final_out_after_input_transpose" }, { "bottom": "decoder/ln_final_out_after_input_transpose", "weights": { "wBeta": 367, "wGamma": 369 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "decoder/ln_final_after_output_transpose", "type": "instancenorm_1d", "name": "decoder/ln_final", "eps_in_square_root": 1 }, { "name": "decoder/ln_final_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "decoder/ln_final_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "decoder/ln_final_out" }, { "dst_cp": -1, "bottom": "decoder/ln_final_out", "weights": {}, "debug_info": "", "top": "decoder/ln_final_out/quantized_per_layer,decoder/ln_final_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "decoder/ln_final_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "lm_head/transform/output_raw", "has_biases": 0, "weights": { "W_int8": 371 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "decoder/ln_final_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "lm_head/transform/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "lm_head/transform/output_raw,decoder/ln_final_out/act_scale_per_layer", "weights": { "biases": 373, "Qscale": 375 }, "debug_info": "", "top": "lm_head/transform_out", "type": "dynamic_dequantize", "name": "lm_head/transform/dequantize", "simple_mode": 1 }, { "bottom": "lm_head/transform_out", "weights": {}, "mode": 22, "debug_info": "", "top": "lm_head/act_out", "type": "activation", "name": "lm_head/act" }, { "name": "lm_head/lnorm_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "lm_head/act_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "lm_head/lnorm_out_after_input_transpose" }, { "bottom": "lm_head/lnorm_out_after_input_transpose", "weights": { "wBeta": 377, "wGamma": 379 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "lm_head/lnorm_after_output_transpose", "type": "instancenorm_1d", "name": "lm_head/lnorm", "eps_in_square_root": 1 }, { "name": "lm_head/lnorm_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "lm_head/lnorm_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "lm_head/lnorm_out" }, { "dst_cp": -1, "bottom": "lm_head/lnorm_out", "weights": {}, "debug_info": "", "top": "lm_head/lnorm_out/quantized_per_layer,lm_head/lnorm_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "lm_head/lnorm_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "final_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 5 }, "nC": 15000, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "lm_head/lnorm_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "final_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "final_fc/output_raw,lm_head/lnorm_out/act_scale_per_layer", "weights": { "Qscale": 3 }, "debug_info": "", "top": "final_fc_out", "type": "dynamic_dequantize", "name": "final_fc/dequantize", "simple_mode": 1 }, { "bottom": "final_fc_out,temperature", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "final_fc_scale_out", "type": "elementwise", "name": "final_fc_scale", "beta": 0 }, { "bottom": "final_fc_scale_out", "weights": {}, "debug_info": "", "top": "output", "C": 2, "type": "softmax", "name": "softmax" } ] }