{ "storage": "unilm_joint.espresso.weights", "analyses": { "N8Espresso23analysis_debug_metadataE": { "path": "", "bundle": "", "product": "", "use_case": "", "name": "LOCALHOST-2023-05-01-CPU-quant", "version": "" } }, "properties": { "fast_reshape": "0", "mldb_token" : "mldb-5qbg63zgxe" }, "format_version": 200, "metadata_in_weights": [], "layers": [ { "nB": 15000, "top": "input_out", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 3, "W_t_int8": 5 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "input", "debug_info": "", "has_tanh": 0, "name": "embed", "has_prelu": 0 }, { "nB": 256, "top": "position_out", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 7, "W_t_int8": 9 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "position", "debug_info": "", "has_tanh": 0, "name": "embed_position", "has_prelu": 0 }, { "nB": 2, "top": "segment_out", "has_biases": 0, "weights": { "Q": 1, "Qscale_t": 11, "W_t_int8": 13 }, "nC": 512, "is_lookup": 1, "quantization_mode": 2, "type": "inner_product", "has_relu": 0, "bottom": "segment", "debug_info": "", "has_tanh": 0, "name": "embed_segment", "has_prelu": 0 }, { "bottom": "position_out,segment_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "embed_pos_seg_out", "type": "elementwise", "name": "embed_pos_seg", "beta": 0 }, { "bottom": "embed_pos_seg_out,input_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "add_out", "type": "elementwise", "name": "add", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "add_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 15, "wGamma": 17 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 19 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d:query_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d:query_fc/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:query_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d:query_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d:query_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 21, "Qscale": 23 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 25 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d:key_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d:key_fc/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:key_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d:key_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d:key_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 27, "Qscale": 29 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 31 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d:value_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d:value_fc/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:value_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d:value_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d:value_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 33, "Qscale": 35 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d:query_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d:query_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:query", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d:query_transpose" }, { "name": "ane_gpt2_transformer_layer_3d:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d:key_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d:key_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:key_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d:key_transpose" }, { "name": "ane_gpt2_transformer_layer_3d:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d:value_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d:value_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:value_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d:value_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d:query,ane_gpt2_transformer_layer_3d:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d:softmax,ane_gpt2_transformer_layer_3d:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d:weighted_avg", "channel_mode": false }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d:weighted_avg", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/context_tmp", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d/context_transpose" }, { "name": "ane_gpt2_transformer_layer_3d/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d/attn_out" }, { "bottom": "ane_gpt2_transformer_layer_3d/attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d/attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d/attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d/attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d/attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d/attn_out,ane_gpt2_transformer_layer_3d/attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d/attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 37 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d/attn_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d/attn_fc/output_raw,ane_gpt2_transformer_layer_3d/attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/attn_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/attn_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/attn_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d/attn_out/act_scale_per_token", "weights": { "biases": 39, "Qscale": 41 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "add_out,ane_gpt2_transformer_layer_3d/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/residual_attn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d/residual_attn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 43, "wGamma": 45 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out,ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/quantized_per_token,ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 47 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d/ffn_expand/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_expand/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/ffn_expand/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ffn_expand/output_scaled_per_token,ane_gpt2_transformer_layer_3d/ln_pre_ffn_out/act_scale_per_token", "weights": { "biases": 49, "Qscale": 51 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d/ffn_act" }, { "bottom": "ane_gpt2_transformer_layer_3d/ffn_act_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_act_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/ffn_act_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d/ffn_act_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d/ffn_act_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d/ffn_act_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d/ffn_act_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_act_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/ffn_act_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d/ffn_act_out,ane_gpt2_transformer_layer_3d/ffn_act_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_act_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/ffn_act_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ffn_act_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_act_out/quantized_per_token,ane_gpt2_transformer_layer_3d/ffn_act_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d/ffn_act_out/quantize_per_token", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 53 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d/ffn_act_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d/ffn_contract/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d/ffn_act_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_contract/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/ffn_contract/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d/ffn_contract/output_scaled_per_token,ane_gpt2_transformer_layer_3d/ffn_act_out/act_scale_per_token", "weights": { "biases": 55, "Qscale": 57 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d/residual_attn_out,ane_gpt2_transformer_layer_3d/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d/residual_ffn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d/residual_ffn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 59, "wGamma": 61 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 63 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1:query_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1:query_fc/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:query_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1:query_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1:query_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 65, "Qscale": 67 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 69 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1:key_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1:key_fc/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:key_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1:key_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1:key_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 71, "Qscale": 73 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 75 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1:value_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1:value_fc/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:value_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1:value_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1:value_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_1/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 77, "Qscale": 79 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_1:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1:query_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:query_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:query", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_1:query_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_1:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1:key_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:key_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:key_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_1:key_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_1:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_1:value_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:value_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:value_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_1:value_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_1:query,ane_gpt2_transformer_layer_3d_1:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_1:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_1:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_1:softmax,ane_gpt2_transformer_layer_3d_1:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_1:weighted_avg", "channel_mode": false }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_1:weighted_avg", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/context_tmp", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_1/context_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_1/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_1/attn_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_1/attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_1/attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/attn_out,ane_gpt2_transformer_layer_3d_1/attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_1/attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 81 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1/attn_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_1/attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/attn_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/attn_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/attn_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_1/attn_out/act_scale_per_token", "weights": { "biases": 83, "Qscale": 85 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d/residual_ffn_out,ane_gpt2_transformer_layer_3d_1/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/residual_attn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_1/residual_attn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 87, "wGamma": 89 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out,ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_1/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 91 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1/ffn_expand/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_expand/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/ffn_expand/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_expand/output_scaled_per_token,ane_gpt2_transformer_layer_3d_1/ln_pre_ffn_out/act_scale_per_token", "weights": { "biases": 93, "Qscale": 95 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_1/ffn_act" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_act_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_act_out,ane_gpt2_transformer_layer_3d_1/ffn_act_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/quantized_per_token,ane_gpt2_transformer_layer_3d_1/ffn_act_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/quantize_per_token", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_1/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 97 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_act_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_1/ffn_contract/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_1/ffn_act_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_contract/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/ffn_contract/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_1/ffn_contract/output_scaled_per_token,ane_gpt2_transformer_layer_3d_1/ffn_act_out/act_scale_per_token", "weights": { "biases": 99, "Qscale": 101 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_1/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/residual_attn_out,ane_gpt2_transformer_layer_3d_1/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_1/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_1/residual_ffn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_1/residual_ffn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 103, "wGamma": 105 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 107 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2:query_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2:query_fc/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:query_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2:query_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2:query_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 109, "Qscale": 111 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 113 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2:key_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2:key_fc/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:key_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2:key_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2:key_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 115, "Qscale": 117 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 119 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2:value_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2:value_fc/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:value_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2:value_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2:value_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_2/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 121, "Qscale": 123 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_2:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2:query_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:query_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:query", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_2:query_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_2:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2:key_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:key_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:key_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_2:key_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_2:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_2:value_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:value_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:value_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_2:value_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_2:query,ane_gpt2_transformer_layer_3d_2:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_2:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_2:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_2:softmax,ane_gpt2_transformer_layer_3d_2:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_2:weighted_avg", "channel_mode": false }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_2:weighted_avg", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/context_tmp", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_2/context_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_2/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_2/attn_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_2/attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_2/attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/attn_out,ane_gpt2_transformer_layer_3d_2/attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_2/attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 125 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2/attn_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_2/attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/attn_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/attn_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/attn_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_2/attn_out/act_scale_per_token", "weights": { "biases": 127, "Qscale": 129 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_1/residual_ffn_out,ane_gpt2_transformer_layer_3d_2/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/residual_attn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_2/residual_attn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 131, "wGamma": 133 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out,ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_2/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 135 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2/ffn_expand/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_expand/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/ffn_expand/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_expand/output_scaled_per_token,ane_gpt2_transformer_layer_3d_2/ln_pre_ffn_out/act_scale_per_token", "weights": { "biases": 137, "Qscale": 139 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_2/ffn_act" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_act_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_act_out,ane_gpt2_transformer_layer_3d_2/ffn_act_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/quantized_per_token,ane_gpt2_transformer_layer_3d_2/ffn_act_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/quantize_per_token", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_2/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 141 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_act_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_2/ffn_contract/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_2/ffn_act_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_contract/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/ffn_contract/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_2/ffn_contract/output_scaled_per_token,ane_gpt2_transformer_layer_3d_2/ffn_act_out/act_scale_per_token", "weights": { "biases": 143, "Qscale": 145 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_2/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/residual_attn_out,ane_gpt2_transformer_layer_3d_2/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_2/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_2/residual_ffn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_2/residual_ffn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 147, "wGamma": 149 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 151 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3:query_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3:query_fc/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:query_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3:query_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3:query_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 153, "Qscale": 155 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 157 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3:key_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3:key_fc/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:key_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3:key_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3:key_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 159, "Qscale": 161 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 163 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3:value_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3:value_fc/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:value_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3:value_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3:value_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_3/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 165, "Qscale": 167 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_3:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3:query_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:query_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:query", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_3:query_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_3:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3:key_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:key_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:key_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_3:key_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_3:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_3:value_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:value_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:value_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_3:value_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_3:query,ane_gpt2_transformer_layer_3d_3:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_3:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_3:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_3:softmax,ane_gpt2_transformer_layer_3d_3:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_3:weighted_avg", "channel_mode": false }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_3:weighted_avg", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/context_tmp", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_3/context_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_3/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_3/attn_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_3/attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_3/attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/attn_out,ane_gpt2_transformer_layer_3d_3/attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_3/attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 169 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3/attn_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_3/attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/attn_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/attn_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/attn_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_3/attn_out/act_scale_per_token", "weights": { "biases": 171, "Qscale": 173 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_2/residual_ffn_out,ane_gpt2_transformer_layer_3d_3/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/residual_attn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_3/residual_attn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 175, "wGamma": 177 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out,ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_3/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 179 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3/ffn_expand/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_expand/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/ffn_expand/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_expand/output_scaled_per_token,ane_gpt2_transformer_layer_3d_3/ln_pre_ffn_out/act_scale_per_token", "weights": { "biases": 181, "Qscale": 183 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_3/ffn_act" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_act_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_act_out,ane_gpt2_transformer_layer_3d_3/ffn_act_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/quantized_per_token,ane_gpt2_transformer_layer_3d_3/ffn_act_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/quantize_per_token", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_3/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 185 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_act_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_3/ffn_contract/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_3/ffn_act_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_contract/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/ffn_contract/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_3/ffn_contract/output_scaled_per_token,ane_gpt2_transformer_layer_3d_3/ffn_act_out/act_scale_per_token", "weights": { "biases": 187, "Qscale": 189 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_3/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/residual_attn_out,ane_gpt2_transformer_layer_3d_3/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_3/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_3/residual_ffn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_3/residual_ffn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 191, "wGamma": 193 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 195 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4:query_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4:query_fc/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:query_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4:query_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4:query_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 197, "Qscale": 199 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 201 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4:key_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4:key_fc/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:key_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4:key_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4:key_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 203, "Qscale": 205 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 207 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4:value_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4:value_fc/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:value_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4:value_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4:value_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_4/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 209, "Qscale": 211 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_4:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4:query_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:query_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:query", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_4:query_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_4:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4:key_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:key_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:key_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_4:key_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_4:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_4:value_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:value_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:value_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_4:value_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_4:query,ane_gpt2_transformer_layer_3d_4:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_4:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_4:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_4:softmax,ane_gpt2_transformer_layer_3d_4:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_4:weighted_avg", "channel_mode": false }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_4:weighted_avg", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/context_tmp", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_4/context_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_4/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_4/attn_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_4/attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_4/attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/attn_out,ane_gpt2_transformer_layer_3d_4/attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_4/attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 213 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4/attn_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_4/attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/attn_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/attn_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/attn_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_4/attn_out/act_scale_per_token", "weights": { "biases": 215, "Qscale": 217 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_3/residual_ffn_out,ane_gpt2_transformer_layer_3d_4/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/residual_attn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_4/residual_attn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 219, "wGamma": 221 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out,ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_4/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 223 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4/ffn_expand/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_expand/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/ffn_expand/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_expand/output_scaled_per_token,ane_gpt2_transformer_layer_3d_4/ln_pre_ffn_out/act_scale_per_token", "weights": { "biases": 225, "Qscale": 227 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_4/ffn_act" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_act_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_act_out,ane_gpt2_transformer_layer_3d_4/ffn_act_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/quantized_per_token,ane_gpt2_transformer_layer_3d_4/ffn_act_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/quantize_per_token", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_4/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 229 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_act_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_4/ffn_contract/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_4/ffn_act_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_contract/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/ffn_contract/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_4/ffn_contract/output_scaled_per_token,ane_gpt2_transformer_layer_3d_4/ffn_act_out/act_scale_per_token", "weights": { "biases": 231, "Qscale": 233 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_4/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/residual_attn_out,ane_gpt2_transformer_layer_3d_4/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_4/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_4/residual_ffn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_4/residual_ffn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out_after_input_transpose", "weights": { "wBeta": 235, "wGamma": 237 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5:query_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 239 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5:query_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5:query_fc/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:query_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5:query_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5:query_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 241, "Qscale": 243 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:query_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5:query_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5:key_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 245 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5:key_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5:key_fc/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:key_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5:key_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5:key_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 247, "Qscale": 249 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:key_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5:key_fc/dequantize", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5:value_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 251 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5:value_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5:value_fc/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:value_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5:value_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5:value_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_5/ln_pre_attn_out/act_scale_per_token", "weights": { "biases": 253, "Qscale": 255 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:value_fc", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5:value_fc/dequantize", "simple_mode": 1 }, { "name": "ane_gpt2_transformer_layer_3d_5:query_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:query_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5:query_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:query_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:query", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_5:query_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_5:key_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:key_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5:key_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:key_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:key_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_5:key_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_5:value_reshape", "weights": {}, "dst_w": 64, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 0, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:value_fc", "debug_info": "", "dst_seq": 0, "dst_k": -1, "top": "ane_gpt2_transformer_layer_3d_5:value_tmp" }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:value_tmp", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:value_state_concat", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_5:value_transpose" }, { "bottom": "ane_gpt2_transformer_layer_3d_5:query,ane_gpt2_transformer_layer_3d_5:key_state_concat", "weights": {}, "transpose_y": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:raw_score", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:raw_score", "channel_mode": false }, { "bottom": "ane_gpt2_transformer_layer_3d_5:raw_score", "alpha": 0.125, "operation": 3, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:scaled_raw_score", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5:scaled_raw_score", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5:scaled_raw_score", "weights": {}, "nd_mode": true, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:softmax", "C": 2, "type": "softmax", "name": "ane_gpt2_transformer_layer_3d_5:softmax" }, { "bottom": "ane_gpt2_transformer_layer_3d_5:softmax,ane_gpt2_transformer_layer_3d_5:value_state_concat", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5:weighted_avg", "type": "batch_matmul", "name": "ane_gpt2_transformer_layer_3d_5:weighted_avg", "channel_mode": false }, { "axis_h": 4, "axis_w": 0, "bottom": "ane_gpt2_transformer_layer_3d_5:weighted_avg", "axis_k": 2, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/context_tmp", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_5/context_transpose" }, { "name": "ane_gpt2_transformer_layer_3d_5/context_reshape", "weights": {}, "dst_w": 1, "version": 1, "dst_n": 0, "dst_nd_rank": 5, "type": "reshape", "dst_h": 1, "mode": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/context_tmp", "debug_info": "", "dst_seq": -1, "dst_k": 512, "top": "ane_gpt2_transformer_layer_3d_5/attn_out" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/attn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/attn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/attn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/attn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_5/attn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_5/attn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/attn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/attn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/attn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/attn_out,ane_gpt2_transformer_layer_3d_5/attn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/attn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/attn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/attn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/attn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_5/attn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/attn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5/attn_fc/output_raw", "has_biases": 0, "weights": { "W_int8": 257 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/attn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5/attn_fc/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/attn_fc/output_raw,ane_gpt2_transformer_layer_3d_5/attn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/attn_fc/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/attn_fc/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/attn_fc/output_scaled_per_token,ane_gpt2_transformer_layer_3d_5/attn_out/act_scale_per_token", "weights": { "biases": 259, "Qscale": 261 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/attn_fc_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5/attn_fc/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_4/residual_ffn_out,ane_gpt2_transformer_layer_3d_5/attn_fc_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/residual_attn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/residual_attn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_5/residual_attn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out_after_input_transpose", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_input_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out_after_input_transpose", "weights": { "wBeta": 263, "wGamma": 265 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_after_output_transpose", "type": "instancenorm_1d", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out", "type": "transpose", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_output_t" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out,ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/quantized_per_token,ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/quantize_per_token", "simple_mode": 1 }, { "nB": 512, "top": "ane_gpt2_transformer_layer_3d_5/ffn_expand/output_raw", "has_biases": 0, "weights": { "W_int8": 267 }, "nC": 2048, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5/ffn_expand/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_expand/output_raw,ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_expand/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/ffn_expand/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_expand/output_scaled_per_token,ane_gpt2_transformer_layer_3d_5/ln_pre_ffn_out/act_scale_per_token", "weights": { "biases": 269, "Qscale": 271 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_expand_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5/ffn_expand/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_expand_out", "weights": {}, "mode": 22, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_act_out", "type": "activation", "name": "ane_gpt2_transformer_layer_3d_5/ffn_act" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_act_out", "alpha": 1, "operation": 24, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/abs", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/normalize_per_token/abs", "beta": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/abs", "weights": {}, "mode": 9, "debug_info": "", "use_version": 1, "top": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/max_abs", "type": "reduce", "name": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/normalize_per_token/max" }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/max_abs", "alpha": 9.999999974752427e-07, "operation": 119, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/tok_scales", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/normalize_per_token/clip", "beta": 1000000 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_act_out,ane_gpt2_transformer_layer_3d_5/ffn_act_out/tok_scales", "alpha": 1, "operation": 102, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/normalize_per_token/div", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/scaled_per_token", "weights": {}, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/quantized_per_token,ane_gpt2_transformer_layer_3d_5/ffn_act_out/act_scale_per_token", "type": "dynamic_quantize", "name": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/quantize_per_token", "simple_mode": 1 }, { "nB": 2048, "top": "ane_gpt2_transformer_layer_3d_5/ffn_contract/output_raw", "has_biases": 0, "weights": { "W_int8": 273 }, "nC": 512, "quantization_mode": 5, "type": "inner_product", "has_relu": 0, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_act_out/quantized_per_token", "debug_info": "", "has_tanh": 0, "name": "ane_gpt2_transformer_layer_3d_5/ffn_contract/quantized", "has_prelu": 0 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_contract/output_raw,ane_gpt2_transformer_layer_3d_5/ffn_act_out/tok_scales", "alpha": 1, "operation": 1, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_contract/output_scaled_per_token", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/ffn_contract/scale_tokens", "beta": 0 }, { "dst_cp": -1, "bottom": "ane_gpt2_transformer_layer_3d_5/ffn_contract/output_scaled_per_token,ane_gpt2_transformer_layer_3d_5/ffn_act_out/act_scale_per_token", "weights": { "biases": 275, "Qscale": 277 }, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/ffn_contract_out", "type": "dynamic_dequantize", "name": "ane_gpt2_transformer_layer_3d_5/ffn_contract/dequantize", "simple_mode": 1 }, { "bottom": "ane_gpt2_transformer_layer_3d_5/residual_attn_out,ane_gpt2_transformer_layer_3d_5/ffn_contract_out", "alpha": 1, "operation": 0, "weights": {}, "fused_relu": 0, "debug_info": "", "top": "ane_gpt2_transformer_layer_3d_5/residual_ffn_out", "type": "elementwise", "name": "ane_gpt2_transformer_layer_3d_5/residual_ffn", "beta": 0 }, { "axis_h": 4, "axis_w": 2, "bottom": "ane_gpt2_transformer_layer_3d_5/residual_ffn_out", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "decoder/ln_final_out_after_input_transpose", "type": "transpose", "name": "decoder/ln_final_input_t" }, { "bottom": "decoder/ln_final_out_after_input_transpose", "weights": { "wBeta": 279, "wGamma": 281 }, "eps": 9.999999747378752e-06, "tf_layernorm": 1, "debug_info": "", "top": "decoder/ln_final_after_output_transpose", "type": "instancenorm_1d", "name": "decoder/ln_final", "eps_in_square_root": 1 }, { "axis_h": 4, "axis_w": 2, "bottom": "decoder/ln_final_after_output_transpose", "axis_k": 0, "axis_n": 3, "axis_seq": 1, "weights": {}, "debug_info": "", "top": "decoder/ln_final_out", "type": "transpose", "name": "decoder/ln_final_output_t" } ] }