{ "layer_shapes" : { "ane_gpt2_transformer_layer_3d_5:scaled_raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:key_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/residual_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:query_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:key_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ln_pre_attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/attn_v_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_2:value_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ln_pre_attn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:query" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:value_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ffn_act_out\/quantized_per_layer" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ln_pre_attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/attn_fc_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:key_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:value_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ffn_act_out\/quantized_per_layer" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/attn_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ln_pre_ffn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:key_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ffn_contract_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:value_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ffn_contract_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "output" : { "k" : 50000, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:value_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ln_pre_ffn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "decoder\/ln_final_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ln_pre_ffn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:key_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/context_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ln_pre_attn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:query" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:key_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:query_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/attn_v_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/attn_k_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_2:query_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ln_pre_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:key_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ffn_act_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ln_pre_ffn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:query_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:softmax" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:value_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ln_pre_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ln_pre_ffn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "input" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/attn_v_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "embed_pos_seg_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/attn_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/attn_fc_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ffn_act_out\/quantized_per_layer" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:query" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ln_pre_ffn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ln_pre_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:value_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "lm_head\/transform_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:key_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:key_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ln_pre_attn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "decoder\/ln_final_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/attn_k_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ffn_expand_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ln_pre_attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/attn_v_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ffn_act_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:scaled_raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ln_pre_attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ln_pre_attn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/residual_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/attn_k_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_5\/residual_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ln_pre_ffn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ln_pre_ffn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ffn_expand\/output_raw" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:query" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ffn_act_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:weighted_avg" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:query_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ln_pre_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "lm_head\/lnorm_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:value_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:query_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ffn_act_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:value_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/attn_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:query_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/attn_v_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_7\/attn_fc_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/attn_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ffn_expand_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "decoder\/ln_final_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "add_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ln_pre_ffn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ffn_contract_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ffn_act_out\/quantized_per_layer" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:softmax" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ln_pre_attn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:query" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/attn_k_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/residual_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:scaled_raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:key_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:query_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:softmax" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ln_pre_ffn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:key_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ln_pre_attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ln_pre_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/attn_k_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_4\/context_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/context_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:key_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ln_pre_attn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ln_pre_ffn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:value_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ln_pre_attn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:query" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ln_pre_ffn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "lm_head\/transform\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ffn_act_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ln_pre_attn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:key_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ffn_act_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "decoder\/ln_final_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/attn_v_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_2\/attn_fc_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "lm_head\/lnorm_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "lm_head\/act_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:scaled_raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:key_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/attn_v_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ffn_act_out\/quantized_per_layer" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:key_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ln_pre_ffn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:weighted_avg" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ffn_act_out\/quantized_per_layer" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "final_fc_scale_out" : { "k" : 50000, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ln_pre_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ffn_act_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ln_pre_attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d:value_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ln_pre_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:value_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/attn_k_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_7\/ln_pre_ffn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ffn_act_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "temperature" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:value_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/residual_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ln_pre_ffn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/residual_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:value_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ln_pre_ffn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ffn_act_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "input_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/attn_v_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/attn_k_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:query_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:weighted_avg" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "position" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:value_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ffn_contract_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:query_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:value_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ln_pre_ffn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ffn_expand_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ln_pre_ffn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/residual_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ffn_expand_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ln_pre_ffn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "lm_head\/lnorm_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:query_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ln_pre_attn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:key_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/context_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:query_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:key_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:query_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/residual_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/attn_k_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_4:query_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/attn_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:query_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ffn_expand_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/attn_k_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ffn_contract_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ln_pre_ffn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ffn_act_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ffn_act_out\/quantized_per_layer" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ffn_act_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:scaled_raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/attn_v_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_3\/attn_fc_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:value_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ln_pre_attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ln_pre_attn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ln_pre_attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ln_pre_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:key_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ffn_expand\/output_raw" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:softmax" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:value_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ln_pre_attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ffn_act_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:query_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:softmax" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/attn_k_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_3\/attn_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ln_pre_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:softmax" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:key_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ln_pre_ffn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ffn_expand\/output_raw" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/attn_v_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ln_pre_ffn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ln_pre_attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/residual_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:weighted_avg" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:key_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/residual_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ffn_act_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:value_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ln_pre_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ffn_expand\/output_raw" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ln_pre_attn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:value_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:key_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "final_fc\/output_raw" : { "k" : 50000, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ln_pre_ffn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ffn_contract\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/attn_v_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ln_pre_ffn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/context_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ffn_expand_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/attn_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/context_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:value_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ln_pre_ffn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ffn_expand\/output_raw" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ln_pre_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ln_pre_attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ffn_expand\/output_raw" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/attn_k_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_4\/attn_v_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:key_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ln_pre_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/attn_k_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:key_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:weighted_avg" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ln_pre_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ffn_contract_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/residual_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:scaled_raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/residual_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ffn_expand\/output_raw" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ln_pre_ffn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/residual_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:value_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/attn_fc_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:value_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "segment_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/attn_v_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_6:key_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:value_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:query_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:value_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "lm_head\/lnorm_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:value_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d:weighted_avg" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ffn_act_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ffn_expand\/output_raw" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:value_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ln_pre_attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ffn_expand_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/attn_k_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ln_pre_attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ffn_act_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ln_pre_attn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:scaled_raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ffn_contract_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:query_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:query_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ln_pre_attn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:scaled_raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:query_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:value_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/residual_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:key_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ln_pre_attn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/attn_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "decoder\/ln_final_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/ln_pre_attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/attn_v_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d\/ln_pre_attn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/attn_fc_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ln_pre_ffn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:query_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/attn_fc_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ln_pre_ffn_out_after_input_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "final_fc_out" : { "k" : 50000, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ffn_contract\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:query_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ln_pre_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ffn_contract\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/residual_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:value_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ffn_contract\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:query_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ln_pre_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "position_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/context_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/context_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/attn_v_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_4\/attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ln_pre_ffn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:key_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/attn_k_s_in" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 0 }, "ane_gpt2_transformer_layer_3d_4\/ln_pre_ffn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/attn_v_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:value_state_concat" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:weighted_avg" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ffn_contract\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:value_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:softmax" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ln_pre_ffn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ffn_contract\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/ffn_contract\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "segment" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ffn_contract\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:query" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4\/ln_pre_ffn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3:softmax" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d\/attn_k_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1\/ln_pre_attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:key_fc\/output_raw" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:key_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d:value_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ln_pre_attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7:key_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:query_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ffn_act_out\/quantized_per_layer" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5:raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_4:key_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/attn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:raw_score" : { "k" : 8, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "lm_head\/lnorm_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ffn_expand_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/attn_out\/quantized_per_layer" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_3\/residual_attn_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6\/ln_pre_ffn_out\/act_scale_per_layer" : { "k" : 1, "w" : 1, "n" : 1, "_rank" : 1, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:key_fc" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_1:key_tmp" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_7\/ln_pre_attn_after_output_transpose" : { "k" : 1, "w" : 512, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2:query" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/attn_k_s_out" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_2\/ffn_contract_out" : { "k" : 512, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_5\/ffn_act_out" : { "k" : 2048, "w" : 1, "n" : 1, "_rank" : 5, "h" : 1 }, "ane_gpt2_transformer_layer_3d_6:weighted_avg" : { "k" : 8, "w" : 64, "n" : 1, "_rank" : 5, "h" : 1 } } }