{ "storage": "unilm_joint.espresso.weights", "analyses": { "N8Espresso23analysis_debug_metadataE": { "path": "", "bundle": "", "product": "", "use_case": "", "name": "LOCALHOST-2023-08-09-CPU-quant", "version": "" } }, "properties": { "mldb_token": "mldb-5qbg63zgxe", "fast_reshape": "1" }, "format_version": 200, "metadata_in_weights": [], "layers": [ { "nB": 50000, "top": "input_out", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 3, "W_t_int8": 5 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "input", "debug_info": "", "has_tanh": 0, "name": "embed", "has_prelu": 0 }, { "nB": 256, "top": "position_out", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 7, "W_t_int8": 9 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "position", "debug_info": "", "has_tanh": 0, "name": "embed_position", "has_prelu": 0 }, { "nB": 1, "top": "segment_out", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 11, "W_t_int8": 13 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "segment", "debug_info": "", "has_tanh": 0, "name": "embed_segment", "has_prelu": 0 }, { "bottom": "position_out,segment_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "embed_pos_seg_out", "type": "elementwise", "name": "embed_pos_seg", "beta": 0 }, { "bottom": "embed_pos_seg_out,input_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "add_out", "type": "elementwise", "name": "add", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "add_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 15, "wGamma": 17 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 19 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d:query_fc/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 21, "Qscale": 23 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 25 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d:key_fc/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 27, "Qscale": 29 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 31 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d:value_fc/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 33, "Qscale": 35 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d:query" }, { "name": "ane_gpt2_transformer_layer_3d:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d/attn_k_s_in,ane_gpt2_transformer_layer_3d/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d/attn_v_s_in,ane_gpt2_transformer_layer_3d/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d:value_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d:query,ane_gpt2_transformer_layer_3d:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d:softmax,ane_gpt2_transformer_layer_3d:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:weighted_avg", "channel_mode": false }, { "name": "ane_gpt2_transformer_layer_3d/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 37 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/attn_fc/output_raw,ane_gpt2_transformer_layer_3d/attn_out/act_scale_per_layer", "weights": { "biases": 39, "Qscale": 41 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "add_out,ane_gpt2_transformer_layer_3d/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 43, "wGamma": 45 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 47 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 49, "Qscale": 51 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 53 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d/ffn_act_out/act_scale_per_layer", "weights": { "biases": 55, "Qscale": 57 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d/residual_attn_out,ane_gpt2_transformer_layer_3d/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 59, "wGamma": 61 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 63 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1:query_fc/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 65, "Qscale": 67 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 69 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1:key_fc/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 71, "Qscale": 73 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 75 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1:value_fc/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 77, "Qscale": 79 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_1:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_1:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_1:query" }, { "name": "ane_gpt2_transformer_layer_3d_1:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_1:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_1/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_1:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_1:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_1/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/attn_k_s_in,ane_gpt2_transformer_layer_3d_1/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_1:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/attn_v_s_in,ane_gpt2_transformer_layer_3d_1/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_1:value_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_1:query,ane_gpt2_transformer_layer_3d_1:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_1:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_1:softmax,ane_gpt2_transformer_layer_3d_1:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:weighted_avg", "channel_mode": false }, { "name": "ane_gpt2_transformer_layer_3d_1/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_1/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_1/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_1/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_1/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 81 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_1/attn_out/act_scale_per_layer", "weights": { "biases": 83, "Qscale": 85 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d/residual_ffn_out,ane_gpt2_transformer_layer_3d_1/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 87, "wGamma": 89 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 91 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 93, "Qscale": 95 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_1/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_1/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_1/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 97 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_1/ffn_act_out/act_scale_per_layer", "weights": { "biases": 99, "Qscale": 101 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/residual_attn_out,ane_gpt2_transformer_layer_3d_1/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_1/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 103, "wGamma": 105 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 107 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2:query_fc/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 109, "Qscale": 111 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 113 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2:key_fc/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 115, "Qscale": 117 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 119 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2:value_fc/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 121, "Qscale": 123 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_2:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_2:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_2:query" }, { "name": "ane_gpt2_transformer_layer_3d_2:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_2:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_2/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_2:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_2:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_2/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/attn_k_s_in,ane_gpt2_transformer_layer_3d_2/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_2:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/attn_v_s_in,ane_gpt2_transformer_layer_3d_2/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_2:value_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_2:query,ane_gpt2_transformer_layer_3d_2:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_2:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_2:softmax,ane_gpt2_transformer_layer_3d_2:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:weighted_avg", "channel_mode": false }, { "name": "ane_gpt2_transformer_layer_3d_2/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_2/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_2/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_2/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_2/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 125 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_2/attn_out/act_scale_per_layer", "weights": { "biases": 127, "Qscale": 129 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/residual_ffn_out,ane_gpt2_transformer_layer_3d_2/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 131, "wGamma": 133 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 135 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 137, "Qscale": 139 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_2/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_2/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_2/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 141 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_2/ffn_act_out/act_scale_per_layer", "weights": { "biases": 143, "Qscale": 145 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/residual_attn_out,ane_gpt2_transformer_layer_3d_2/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_2/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 147, "wGamma": 149 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 151 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3:query_fc/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 153, "Qscale": 155 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 157 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3:key_fc/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 159, "Qscale": 161 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 163 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3:value_fc/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 165, "Qscale": 167 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_3:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_3:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_3:query" }, { "name": "ane_gpt2_transformer_layer_3d_3:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_3:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_3/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_3:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_3:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_3/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/attn_k_s_in,ane_gpt2_transformer_layer_3d_3/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_3:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/attn_v_s_in,ane_gpt2_transformer_layer_3d_3/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_3:value_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_3:query,ane_gpt2_transformer_layer_3d_3:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_3:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_3:softmax,ane_gpt2_transformer_layer_3d_3:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:weighted_avg", "channel_mode": false }, { "name": "ane_gpt2_transformer_layer_3d_3/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_3/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_3/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_3/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_3/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 169 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_3/attn_out/act_scale_per_layer", "weights": { "biases": 171, "Qscale": 173 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/residual_ffn_out,ane_gpt2_transformer_layer_3d_3/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 175, "wGamma": 177 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 179 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 181, "Qscale": 183 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_3/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_3/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_3/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 185 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_3/ffn_act_out/act_scale_per_layer", "weights": { "biases": 187, "Qscale": 189 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/residual_attn_out,ane_gpt2_transformer_layer_3d_3/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_3/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 191, "wGamma": 193 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 195 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4:query_fc/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 197, "Qscale": 199 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 201 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4:key_fc/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 203, "Qscale": 205 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 207 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4:value_fc/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 209, "Qscale": 211 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_4:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_4:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_4:query" }, { "name": "ane_gpt2_transformer_layer_3d_4:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_4:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_4/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_4:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_4:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_4/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/attn_k_s_in,ane_gpt2_transformer_layer_3d_4/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_4:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/attn_v_s_in,ane_gpt2_transformer_layer_3d_4/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_4:value_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_4:query,ane_gpt2_transformer_layer_3d_4:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_4:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_4:softmax,ane_gpt2_transformer_layer_3d_4:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:weighted_avg", "channel_mode": false }, { "name": "ane_gpt2_transformer_layer_3d_4/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_4/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_4/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_4/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_4/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 213 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_4/attn_out/act_scale_per_layer", "weights": { "biases": 215, "Qscale": 217 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/residual_ffn_out,ane_gpt2_transformer_layer_3d_4/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 219, "wGamma": 221 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 223 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 225, "Qscale": 227 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_4/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_4/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_4/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 229 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_4/ffn_act_out/act_scale_per_layer", "weights": { "biases": 231, "Qscale": 233 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/residual_attn_out,ane_gpt2_transformer_layer_3d_4/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_4/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 235, "wGamma": 237 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 239 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5:query_fc/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 241, "Qscale": 243 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 245 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5:key_fc/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 247, "Qscale": 249 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 251 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5:value_fc/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 253, "Qscale": 255 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_5:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_5:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_5:query" }, { "name": "ane_gpt2_transformer_layer_3d_5:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_5:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_5/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_5:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_5:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_5/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/attn_k_s_in,ane_gpt2_transformer_layer_3d_5/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_5:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/attn_v_s_in,ane_gpt2_transformer_layer_3d_5/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_5:value_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_5:query,ane_gpt2_transformer_layer_3d_5:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_5:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_5:softmax,ane_gpt2_transformer_layer_3d_5:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:weighted_avg", "channel_mode": false }, { "name": "ane_gpt2_transformer_layer_3d_5/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_5/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_5/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_5/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_5/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 257 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_5/attn_out/act_scale_per_layer", "weights": { "biases": 259, "Qscale": 261 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/residual_ffn_out,ane_gpt2_transformer_layer_3d_5/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 263, "wGamma": 265 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 267 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 269, "Qscale": 271 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_5/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_5/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_5/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 273 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_5/ffn_act_out/act_scale_per_layer", "weights": { "biases": 275, "Qscale": 277 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/residual_attn_out,ane_gpt2_transformer_layer_3d_5/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_5/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 279, "wGamma": 281 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_6:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 283 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6:query_fc/output_raw,ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 285, "Qscale": 287 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_6:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 289 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6:key_fc/output_raw,ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 291, "Qscale": 293 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_6:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 295 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6:value_fc/output_raw,ane_gpt2_transformer_layer_3d_6/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 297, "Qscale": 299 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_6:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_6:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_6:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_6:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_6:query" }, { "name": "ane_gpt2_transformer_layer_3d_6:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_6:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_6:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_6:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_6/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_6:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_6:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_6:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_6:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_6/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_6/attn_k_s_in,ane_gpt2_transformer_layer_3d_6/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_6:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_6/attn_v_s_in,ane_gpt2_transformer_layer_3d_6/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_6:value_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_6:query,ane_gpt2_transformer_layer_3d_6:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_6:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_6:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_6:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_6:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_6:softmax,ane_gpt2_transformer_layer_3d_6:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_6:weighted_avg", "channel_mode": false }, { "name": "ane_gpt2_transformer_layer_3d_6/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_6/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_6/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_6/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_6/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_6/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_6/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 301 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_6/attn_out/act_scale_per_layer", "weights": { "biases": 303, "Qscale": 305 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/residual_ffn_out,ane_gpt2_transformer_layer_3d_6/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_6/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 307, "wGamma": 309 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_6/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 311 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_6/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 313, "Qscale": 315 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_6/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_6/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_6/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_6/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_6/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 317 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_6/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_6/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_6/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_6/ffn_act_out/act_scale_per_layer", "weights": { "biases": 319, "Qscale": 321 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_6/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_6/residual_attn_out,ane_gpt2_transformer_layer_3d_6/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_6/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_6/residual_ffn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_6/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 323, "wGamma": 325 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_7:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 327 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7:query_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7:query_fc/output_raw,ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 329, "Qscale": 331 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_7:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 333 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7:key_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7:key_fc/output_raw,ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 335, "Qscale": 337 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_7:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 339 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7:value_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7:value_fc/output_raw,ane_gpt2_transformer_layer_3d_7/ln_pre_attn_out/act_scale_per_layer", "weights": { "biases": 341, "Qscale": 343 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_7:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_7:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_7:query_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_7:query_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7:query_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_7:query" }, { "name": "ane_gpt2_transformer_layer_3d_7:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_7:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_7:key_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_7:key_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7:key_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_7/attn_k_s_out" }, { "name": "ane_gpt2_transformer_layer_3d_7:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_7:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_7:value_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_7:value_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7:value_tmp", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_7/attn_v_s_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_7/attn_k_s_in,ane_gpt2_transformer_layer_3d_7/attn_k_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:key_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_7:key_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_7/attn_v_s_in,ane_gpt2_transformer_layer_3d_7/attn_v_s_out", "weights": {}, "axis": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:value_state_concat", "type": "general_concat", "name": "ane_gpt2_transformer_layer_3d_7:value_state_concat" }, { "bottom": "ane_gpt2_transformer_layer_3d_7:query,ane_gpt2_transformer_layer_3d_7:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_7:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_7:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_7:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_7:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_7:softmax,ane_gpt2_transformer_layer_3d_7:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_7:weighted_avg", "channel_mode": false }, { "name": "ane_gpt2_transformer_layer_3d_7/context_transpose", "rank_preserving_mode": true, "dst_w": 0, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7:weighted_avg", "debug_info": "", "dst_seq": 0, "dst_k": 0, "top": "ane_gpt2_transformer_layer_3d_7/context_tmp" }, { "name": "ane_gpt2_transformer_layer_3d_7/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_7/attn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/attn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/attn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_7/attn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_7/attn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_7/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 345 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/attn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7/attn_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_7/attn_out/act_scale_per_layer", "weights": { "biases": 347, "Qscale": 349 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_6/residual_ffn_out,ane_gpt2_transformer_layer_3d_7/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_7/residual_attn", "beta": 0 }, { "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7/residual_attn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out_after_input_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 351, "wGamma": 353 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn", "eps_in_square_root": 1 }, { "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_7/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 355 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7/ffn_expand/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_7/ln_pre_ffn_out/act_scale_per_layer", "weights": { "biases": 357, "Qscale": 359 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_7/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_7/ffn_act" }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/ffn_act_out", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ffn_act_out/quantized_per_layer,ane_gpt2_transformer_layer_3d_7/ffn_act_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_7/ffn_act_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_7/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 361 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_7/ffn_act_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_7/ffn_contract/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_7/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_7/ffn_act_out/act_scale_per_layer", "weights": { "biases": 363, "Qscale": 365 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_7/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_7/residual_attn_out,ane_gpt2_transformer_layer_3d_7/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_7/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_7/residual_ffn", "beta": 0 }, { "name": "decoder/ln_final_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "ane_gpt2_transformer_layer_3d_7/residual_ffn_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "decoder/ln_final_out_after_input_transpose" }, { "bottom": "decoder/ln_final_out_after_input_transpose", "weights": { "wBeta": 367, "wGamma": 369 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "decoder/ln_final_after_output_transpose", "type": "instancenorm_1d", "name": "decoder/ln_final", "eps_in_square_root": 1 }, { "name": "decoder/ln_final_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "decoder/ln_final_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "decoder/ln_final_out" }, { "dst_cp": -1, "bottom": "decoder/ln_final_out", "weights": {}, "debug_info": "", "top": "decoder/ln_final_out/quantized_per_layer,decoder/ln_final_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "decoder/ln_final_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "lm_head/transform/output_raw", "has_biases": 0, "weights": { "W_int8": 371 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "decoder/ln_final_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "lm_head/transform/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "lm_head/transform/output_raw,decoder/ln_final_out/act_scale_per_layer", "weights": { "biases": 373, "Qscale": 375 }, "debug_info": "", "top": "lm_head/transform_out", "type": "dynamic_dequantize", "name": "lm_head/transform/dequantize", "simple_mode": 1 }, { "bottom": "lm_head/transform_out", "weights": {}, "mode": 22, "debug_info": "", "top": "lm_head/act_out", "type": "activation", "name": "lm_head/act" }, { "name": "lm_head/lnorm_input_t", "rank_preserving_mode": true, "dst_w": -1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "lm_head/act_out", "debug_info": "", "dst_seq": 0, "dst_k": 1, "top": "lm_head/lnorm_out_after_input_transpose" }, { "bottom": "lm_head/lnorm_out_after_input_transpose", "weights": { "wBeta": 377, "wGamma": 379 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "lm_head/lnorm_after_output_transpose", "type": "instancenorm_1d", "name": "lm_head/lnorm", "eps_in_square_root": 1 }, { "name": "lm_head/lnorm_output_t", "rank_preserving_mode": true, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "weights": {}, "type": "reshape", "dst_h": 0, "mode": 0, "dynamic_shape": false, "bottom": "lm_head/lnorm_after_output_transpose", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "lm_head/lnorm_out" }, { "dst_cp": -1, "bottom": "lm_head/lnorm_out", "weights": {}, "debug_info": "", "top": "lm_head/lnorm_out/quantized_per_layer,lm_head/lnorm_out/act_scale_per_layer", "type": "dynamic_quantize", "name": "lm_head/lnorm_out/quantize_per_layer", "simple_mode": 1 }, { "nB": 512, "top": "final_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 5 }, "nC": 50000, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "lm_head/lnorm_out/quantized_per_layer", "debug_info": "", "has_tanh": 0, "name": "final_fc/quantized", "has_prelu": 0 }, { "dst_cp": -1, "bottom": "final_fc/output_raw,lm_head/lnorm_out/act_scale_per_layer", "weights": { "Qscale": 3 }, "debug_info": "", "top": "final_fc_out", "type": "dynamic_dequantize", "name": "final_fc/dequantize", "simple_mode": 1 }, { "bottom": "final_fc_out,temperature", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "final_fc_scale_out", "type": "elementwise", "name": "final_fc_scale", "beta": 0 }, { "bottom": "final_fc_scale_out", "weights": {}, "debug_info": "", "top": "output", "C": 2, "type": "softmax", "name": "softmax" } ] }