-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCoreMLInspect-OpenELM-270M-Instruct-Compiled-Model-CPU-NE.txt
1989 lines (1988 loc) · 157 KB
/
CoreMLInspect-OpenELM-270M-Instruct-Compiled-Model-CPU-NE.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
Analyzing model for compute unit [cpuAndNeuralEngine]...
Key: C=CPU, G=GPU, N=NeuralEngine
<Estimate of total operation cost>% <primary compute|supported compute> <operation>
func main(input_ids) {
var_5 = const()
var_22 = const()
inputs_embeds_axis_0 = const()
inputs_embeds_batch_dims_0 = const()
inputs_embeds_validate_indices_0 = const()
model_transformer_token_embeddings_weight_to_fp16 = const()
input_ids_to_int16_dtype_0 = const()
0.00% C| cast_34 = ios17.cast(x: ["input_ids"], dtype: ["input_ids_to_int16_dtype_0"])
7.66% C| inputs_embeds_cast_fp16_cast_int16 = ios17.gather(x: ["model_transformer_token_embeddings_weight_to_fp16"], indices: ["cast_34"], axis: ["inputs_embeds_axis_0"], batch_dims: ["inputs_embeds_batch_dims_0"], validate_indices: ["inputs_embeds_validate_indices_0"])
var_10_promoted_to_fp16 = const()
0.04% N|C var_94_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16"], x: ["inputs_embeds_cast_fp16_cast_int16"])
var_95 = const()
0.02% N|C var_96_cast_fp16 = ios16.reduce_mean(axes: ["var_95"], keep_dims: ["var_22"], x: ["var_94_cast_fp16"])
var_97_to_fp16 = const()
0.00% N|C var_98_cast_fp16 = ios17.add(y: ["var_97_to_fp16"], x: ["var_96_cast_fp16"])
var_99_epsilon_0 = const()
0.00% N|C var_99_cast_fp16 = ios17.rsqrt(x: ["var_98_cast_fp16"], epsilon: ["var_99_epsilon_0"])
0.04% N|C var_100_cast_fp16 = ios17.mul(y: ["var_99_cast_fp16"], x: ["inputs_embeds_cast_fp16_cast_int16"])
model_transformer_layers_0_attn_norm_weight_to_fp16 = const()
0.04% N|C hidden_states_1_cast_fp16 = ios17.mul(y: ["model_transformer_layers_0_attn_norm_weight_to_fp16"], x: ["var_100_cast_fp16"])
model_transformer_layers_0_attn_qkv_proj_weight_to_fp16 = const()
linear_0_bias_0_to_fp16 = const()
0.38% N|C linear_0_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_0_attn_qkv_proj_weight_to_fp16"], bias: ["linear_0_bias_0_to_fp16"], x: ["hidden_states_1_cast_fp16"])
var_114 = const()
0.04% N|C qkv_3_cast_fp16 = ios17.reshape(shape: ["var_114"], x: ["linear_0_cast_fp16"])
var_116_perm_0 = const()
var_117 = const()
var_118_axis_0 = const()
0.08% N|C transpose_95 = ios17.transpose(x: ["qkv_3_cast_fp16"], perm: ["var_116_perm_0"])
0.03% N|C var_118_cast_fp16_0, var_118_cast_fp16_1, var_118_cast_fp16_2 = split(axis: ["var_118_axis_0"], x: ["transpose_95"], split_sizes: ["var_117"])
var_10_promoted_to_fp16_1 = const()
0.03% N|C var_124_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_1"], x: ["var_118_cast_fp16_0"])
var_125 = const()
0.01% N|C var_126_cast_fp16 = ios16.reduce_mean(axes: ["var_125"], x: ["var_124_cast_fp16"], keep_dims: ["var_22"])
var_127_to_fp16 = const()
0.00% N|C var_128_cast_fp16 = ios17.add(y: ["var_127_to_fp16"], x: ["var_126_cast_fp16"])
var_129_epsilon_0 = const()
0.00% N|C var_129_cast_fp16 = ios17.rsqrt(epsilon: ["var_129_epsilon_0"], x: ["var_128_cast_fp16"])
0.02% N|C var_130_cast_fp16 = ios17.mul(x: ["var_118_cast_fp16_0"], y: ["var_129_cast_fp16"])
model_transformer_layers_0_attn_q_norm_weight_to_fp16 = const()
0.02% N|C query_1_cast_fp16 = ios17.mul(y: ["model_transformer_layers_0_attn_q_norm_weight_to_fp16"], x: ["var_130_cast_fp16"])
var_10_promoted_to_fp16_2 = const()
0.01% N|C var_135_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_2"], x: ["var_118_cast_fp16_1"])
var_136 = const()
0.00% N|C var_137_cast_fp16 = ios16.reduce_mean(axes: ["var_136"], keep_dims: ["var_22"], x: ["var_135_cast_fp16"])
var_138_to_fp16 = const()
0.00% N|C var_139_cast_fp16 = ios17.add(y: ["var_138_to_fp16"], x: ["var_137_cast_fp16"])
var_140_epsilon_0 = const()
0.00% N|C var_140_cast_fp16 = ios17.rsqrt(x: ["var_139_cast_fp16"], epsilon: ["var_140_epsilon_0"])
0.01% N|C var_141_cast_fp16 = ios17.mul(y: ["var_140_cast_fp16"], x: ["var_118_cast_fp16_1"])
model_transformer_layers_0_attn_k_norm_weight_to_fp16 = const()
0.01% N|C key_1_cast_fp16 = ios17.mul(x: ["var_141_cast_fp16"], y: ["model_transformer_layers_0_attn_k_norm_weight_to_fp16"])
var_156_to_fp16 = const()
0.02% N|C var_158_cast_fp16 = ios17.mul(x: ["query_1_cast_fp16"], y: ["var_156_to_fp16"])
var_159_split_sizes_0 = const()
var_159_axis_0 = const()
0.02% N|C var_159_cast_fp16_0, var_159_cast_fp16_1 = split(axis: ["var_159_axis_0"], x: ["query_1_cast_fp16"], split_sizes: ["var_159_split_sizes_0"])
const_6_promoted_to_fp16 = const()
0.01% N|C var_161_cast_fp16 = ios17.mul(x: ["var_159_cast_fp16_1"], y: ["const_6_promoted_to_fp16"])
var_163_interleave_0 = const()
0.01% N|C var_163_cast_fp16 = ios17.concat(axis: ["var_5"], interleave: ["var_163_interleave_0"], values: ["var_161_cast_fp16", "var_159_cast_fp16_0"])
var_152_to_fp16 = const()
0.02% N|C var_164_cast_fp16 = ios17.mul(x: ["var_163_cast_fp16"], y: ["var_152_to_fp16"])
0.06% N|C query_float_1_cast_fp16 = ios17.add(y: ["var_164_cast_fp16"], x: ["var_158_cast_fp16"])
0.01% N|C var_170_cast_fp16 = ios17.mul(y: ["var_156_to_fp16"], x: ["key_1_cast_fp16"])
var_171_split_sizes_0 = const()
var_171_axis_0 = const()
0.00% N|C var_171_cast_fp16_0, var_171_cast_fp16_1 = split(split_sizes: ["var_171_split_sizes_0"], x: ["key_1_cast_fp16"], axis: ["var_171_axis_0"])
const_7_promoted_to_fp16 = const()
0.00% N|C var_173_cast_fp16 = ios17.mul(x: ["var_171_cast_fp16_1"], y: ["const_7_promoted_to_fp16"])
var_175_interleave_0 = const()
0.00% N|C var_175_cast_fp16 = ios17.concat(axis: ["var_5"], values: ["var_173_cast_fp16", "var_171_cast_fp16_0"], interleave: ["var_175_interleave_0"])
0.01% N|C var_176_cast_fp16 = ios17.mul(y: ["var_152_to_fp16"], x: ["var_175_cast_fp16"])
0.01% N|C key_float_1_cast_fp16 = ios17.add(y: ["var_176_cast_fp16"], x: ["var_170_cast_fp16"])
reshape_0_shape_0 = const()
0.01% N|C reshape_0_cast_fp16 = ios17.reshape(x: ["key_float_1_cast_fp16"], shape: ["reshape_0_shape_0"])
tile_0_reps_0 = const()
0.02% N|C tile_0_cast_fp16 = tile(x: ["reshape_0_cast_fp16"], reps: ["tile_0_reps_0"])
reshape_1_shape_0 = const()
0.03% N|C reshape_1_cast_fp16 = ios17.reshape(x: ["tile_0_cast_fp16"], shape: ["reshape_1_shape_0"])
transpose_0_perm_0 = const()
keys_3_shape_0 = const()
0.05% N|C transpose_94 = ios17.transpose(perm: ["transpose_0_perm_0"], x: ["reshape_1_cast_fp16"])
0.03% N|C keys_3_cast_fp16 = ios17.reshape(x: ["transpose_94"], shape: ["keys_3_shape_0"])
reshape_2_shape_0 = const()
0.01% N|C reshape_2_cast_fp16 = ios17.reshape(shape: ["reshape_2_shape_0"], x: ["var_118_cast_fp16_2"])
tile_1_reps_0 = const()
0.02% N|C tile_1_cast_fp16 = tile(reps: ["tile_1_reps_0"], x: ["reshape_2_cast_fp16"])
reshape_3_shape_0 = const()
0.03% N|C reshape_3_cast_fp16 = ios17.reshape(shape: ["reshape_3_shape_0"], x: ["tile_1_cast_fp16"])
transpose_1_perm_0 = const()
values_3_shape_0 = const()
0.05% N|C transpose_93 = ios17.transpose(perm: ["transpose_1_perm_0"], x: ["reshape_3_cast_fp16"])
0.03% N|C values_3_cast_fp16 = ios17.reshape(x: ["transpose_93"], shape: ["values_3_shape_0"])
mul_0_y_0_to_fp16 = const()
0.02% N|C mul_0_cast_fp16 = ios17.mul(x: ["query_float_1_cast_fp16"], y: ["mul_0_y_0_to_fp16"])
matmul_0_transpose_y_0 = const()
matmul_0_transpose_x_0 = const()
0.05% N|C matmul_0_cast_fp16 = ios17.matmul(transpose_y: ["matmul_0_transpose_y_0"], transpose_x: ["matmul_0_transpose_x_0"], y: ["keys_3_cast_fp16"], x: ["mul_0_cast_fp16"])
causal_mask_3_to_fp16 = const()
0.06% N|C add_1_cast_fp16 = ios17.add(y: ["causal_mask_3_to_fp16"], x: ["matmul_0_cast_fp16"])
softmax_0_axis_0 = const()
0.05% N|C softmax_0_cast_fp16 = ios16.softmax(axis: ["softmax_0_axis_0"], x: ["add_1_cast_fp16"])
attn_output_1_transpose_x_0 = const()
attn_output_1_transpose_y_0 = const()
0.05% N|C attn_output_1_cast_fp16 = ios17.matmul(transpose_x: ["attn_output_1_transpose_x_0"], x: ["softmax_0_cast_fp16"], y: ["values_3_cast_fp16"], transpose_y: ["attn_output_1_transpose_y_0"])
var_192_perm_0 = const()
var_194 = const()
0.05% N|C transpose_92 = ios17.transpose(x: ["attn_output_1_cast_fp16"], perm: ["var_192_perm_0"])
0.03% N|C input_3_cast_fp16 = ios17.reshape(x: ["transpose_92"], shape: ["var_194"])
model_transformer_layers_0_attn_out_proj_weight_to_fp16 = const()
linear_1_bias_0_to_fp16 = const()
0.14% N|C linear_1_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], weight: ["model_transformer_layers_0_attn_out_proj_weight_to_fp16"], x: ["input_3_cast_fp16"])
0.09% N|C x_15_cast_fp16 = ios17.add(y: ["linear_1_cast_fp16"], x: ["inputs_embeds_cast_fp16_cast_int16"])
var_10_promoted_to_fp16_3 = const()
0.04% N|C var_201_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_3"], x: ["x_15_cast_fp16"])
var_202 = const()
0.02% N|C var_203_cast_fp16 = ios16.reduce_mean(axes: ["var_202"], x: ["var_201_cast_fp16"], keep_dims: ["var_22"])
var_204_to_fp16 = const()
0.00% N|C var_205_cast_fp16 = ios17.add(y: ["var_204_to_fp16"], x: ["var_203_cast_fp16"])
var_206_epsilon_0 = const()
0.00% N|C var_206_cast_fp16 = ios17.rsqrt(x: ["var_205_cast_fp16"], epsilon: ["var_206_epsilon_0"])
0.04% N|C var_207_cast_fp16 = ios17.mul(x: ["x_15_cast_fp16"], y: ["var_206_cast_fp16"])
model_transformer_layers_0_ffn_norm_weight_to_fp16 = const()
0.04% N|C input_5_cast_fp16 = ios17.mul(x: ["var_207_cast_fp16"], y: ["model_transformer_layers_0_ffn_norm_weight_to_fp16"])
model_transformer_layers_0_ffn_proj_1_weight_to_fp16 = const()
linear_2_bias_0_to_fp16 = const()
0.38% N|C linear_2_cast_fp16 = ios17.linear(x: ["input_5_cast_fp16"], bias: ["linear_2_bias_0_to_fp16"], weight: ["model_transformer_layers_0_ffn_proj_1_weight_to_fp16"])
var_217_split_sizes_0 = const()
var_217_axis_0 = const()
0.04% N|C var_217_cast_fp16_0, var_217_cast_fp16_1 = split(split_sizes: ["var_217_split_sizes_0"], x: ["linear_2_cast_fp16"], axis: ["var_217_axis_0"])
0.03% N|C var_219_cast_fp16 = ios16.silu(x: ["var_217_cast_fp16_0"])
0.05% N|C input_9_cast_fp16 = ios17.mul(x: ["var_219_cast_fp16"], y: ["var_217_cast_fp16_1"])
model_transformer_layers_0_ffn_proj_2_weight_to_fp16 = const()
0.14% N|C linear_3_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_0_ffn_proj_2_weight_to_fp16"], x: ["input_9_cast_fp16"], bias: ["linear_1_bias_0_to_fp16"])
0.09% N|C x_19_cast_fp16 = ios17.add(x: ["x_15_cast_fp16"], y: ["linear_3_cast_fp16"])
var_10_promoted_to_fp16_4 = const()
0.04% N|C var_230_cast_fp16 = ios17.pow(x: ["x_19_cast_fp16"], y: ["var_10_promoted_to_fp16_4"])
var_231 = const()
0.02% N|C var_232_cast_fp16 = ios16.reduce_mean(axes: ["var_231"], keep_dims: ["var_22"], x: ["var_230_cast_fp16"])
var_233_to_fp16 = const()
0.00% N|C var_234_cast_fp16 = ios17.add(y: ["var_233_to_fp16"], x: ["var_232_cast_fp16"])
var_235_epsilon_0 = const()
0.00% N|C var_235_cast_fp16 = ios17.rsqrt(x: ["var_234_cast_fp16"], epsilon: ["var_235_epsilon_0"])
0.04% N|C var_236_cast_fp16 = ios17.mul(x: ["x_19_cast_fp16"], y: ["var_235_cast_fp16"])
model_transformer_layers_1_attn_norm_weight_to_fp16 = const()
0.04% N|C hidden_states_7_cast_fp16 = ios17.mul(y: ["model_transformer_layers_1_attn_norm_weight_to_fp16"], x: ["var_236_cast_fp16"])
model_transformer_layers_1_attn_qkv_proj_weight_to_fp16 = const()
0.38% N|C linear_4_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_1_attn_qkv_proj_weight_to_fp16"], bias: ["linear_0_bias_0_to_fp16"], x: ["hidden_states_7_cast_fp16"])
var_250 = const()
0.04% N|C qkv_7_cast_fp16 = ios17.reshape(shape: ["var_250"], x: ["linear_4_cast_fp16"])
var_252_perm_0 = const()
var_253 = const()
var_254_axis_0 = const()
0.08% N|C transpose_91 = ios17.transpose(perm: ["var_252_perm_0"], x: ["qkv_7_cast_fp16"])
0.03% N|C var_254_cast_fp16_0, var_254_cast_fp16_1, var_254_cast_fp16_2 = split(x: ["transpose_91"], axis: ["var_254_axis_0"], split_sizes: ["var_253"])
var_10_promoted_to_fp16_5 = const()
0.03% N|C var_260_cast_fp16 = ios17.pow(x: ["var_254_cast_fp16_0"], y: ["var_10_promoted_to_fp16_5"])
var_261 = const()
0.01% N|C var_262_cast_fp16 = ios16.reduce_mean(x: ["var_260_cast_fp16"], axes: ["var_261"], keep_dims: ["var_22"])
var_263_to_fp16 = const()
0.00% N|C var_264_cast_fp16 = ios17.add(y: ["var_263_to_fp16"], x: ["var_262_cast_fp16"])
var_265_epsilon_0 = const()
0.00% N|C var_265_cast_fp16 = ios17.rsqrt(epsilon: ["var_265_epsilon_0"], x: ["var_264_cast_fp16"])
0.02% N|C var_266_cast_fp16 = ios17.mul(x: ["var_254_cast_fp16_0"], y: ["var_265_cast_fp16"])
model_transformer_layers_1_attn_q_norm_weight_to_fp16 = const()
0.02% N|C query_3_cast_fp16 = ios17.mul(y: ["model_transformer_layers_1_attn_q_norm_weight_to_fp16"], x: ["var_266_cast_fp16"])
var_10_promoted_to_fp16_6 = const()
0.01% N|C var_271_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_6"], x: ["var_254_cast_fp16_1"])
var_272 = const()
0.00% N|C var_273_cast_fp16 = ios16.reduce_mean(axes: ["var_272"], keep_dims: ["var_22"], x: ["var_271_cast_fp16"])
var_274_to_fp16 = const()
0.00% N|C var_275_cast_fp16 = ios17.add(x: ["var_273_cast_fp16"], y: ["var_274_to_fp16"])
var_276_epsilon_0 = const()
0.00% N|C var_276_cast_fp16 = ios17.rsqrt(x: ["var_275_cast_fp16"], epsilon: ["var_276_epsilon_0"])
0.01% N|C var_277_cast_fp16 = ios17.mul(y: ["var_276_cast_fp16"], x: ["var_254_cast_fp16_1"])
model_transformer_layers_1_attn_k_norm_weight_to_fp16 = const()
0.01% N|C key_3_cast_fp16 = ios17.mul(y: ["model_transformer_layers_1_attn_k_norm_weight_to_fp16"], x: ["var_277_cast_fp16"])
0.02% N|C var_294_cast_fp16 = ios17.mul(y: ["var_156_to_fp16"], x: ["query_3_cast_fp16"])
var_295_split_sizes_0 = const()
var_295_axis_0 = const()
0.02% N|C var_295_cast_fp16_0, var_295_cast_fp16_1 = split(split_sizes: ["var_295_split_sizes_0"], axis: ["var_295_axis_0"], x: ["query_3_cast_fp16"])
const_13_promoted_to_fp16 = const()
0.01% N|C var_297_cast_fp16 = ios17.mul(x: ["var_295_cast_fp16_1"], y: ["const_13_promoted_to_fp16"])
var_299_interleave_0 = const()
0.01% N|C var_299_cast_fp16 = ios17.concat(axis: ["var_5"], interleave: ["var_299_interleave_0"], values: ["var_297_cast_fp16", "var_295_cast_fp16_0"])
0.02% N|C var_300_cast_fp16 = ios17.mul(y: ["var_152_to_fp16"], x: ["var_299_cast_fp16"])
0.06% N|C query_float_3_cast_fp16 = ios17.add(y: ["var_300_cast_fp16"], x: ["var_294_cast_fp16"])
0.01% N|C var_306_cast_fp16 = ios17.mul(x: ["key_3_cast_fp16"], y: ["var_156_to_fp16"])
var_307_split_sizes_0 = const()
var_307_axis_0 = const()
0.00% N|C var_307_cast_fp16_0, var_307_cast_fp16_1 = split(split_sizes: ["var_307_split_sizes_0"], axis: ["var_307_axis_0"], x: ["key_3_cast_fp16"])
const_14_promoted_to_fp16 = const()
0.00% N|C var_309_cast_fp16 = ios17.mul(y: ["const_14_promoted_to_fp16"], x: ["var_307_cast_fp16_1"])
var_311_interleave_0 = const()
0.00% N|C var_311_cast_fp16 = ios17.concat(values: ["var_309_cast_fp16", "var_307_cast_fp16_0"], axis: ["var_5"], interleave: ["var_311_interleave_0"])
0.01% N|C var_312_cast_fp16 = ios17.mul(y: ["var_152_to_fp16"], x: ["var_311_cast_fp16"])
0.01% N|C key_float_3_cast_fp16 = ios17.add(y: ["var_312_cast_fp16"], x: ["var_306_cast_fp16"])
reshape_4_shape_0 = const()
0.01% N|C reshape_4_cast_fp16 = ios17.reshape(x: ["key_float_3_cast_fp16"], shape: ["reshape_4_shape_0"])
tile_2_reps_0 = const()
0.02% N|C tile_2_cast_fp16 = tile(x: ["reshape_4_cast_fp16"], reps: ["tile_2_reps_0"])
reshape_5_shape_0 = const()
0.03% N|C reshape_5_cast_fp16 = ios17.reshape(shape: ["reshape_5_shape_0"], x: ["tile_2_cast_fp16"])
transpose_2_perm_0 = const()
keys_7_shape_0 = const()
0.05% N|C transpose_90 = ios17.transpose(perm: ["transpose_2_perm_0"], x: ["reshape_5_cast_fp16"])
0.03% N|C keys_7_cast_fp16 = ios17.reshape(shape: ["keys_7_shape_0"], x: ["transpose_90"])
reshape_6_shape_0 = const()
0.01% N|C reshape_6_cast_fp16 = ios17.reshape(shape: ["reshape_6_shape_0"], x: ["var_254_cast_fp16_2"])
tile_3_reps_0 = const()
0.02% N|C tile_3_cast_fp16 = tile(reps: ["tile_3_reps_0"], x: ["reshape_6_cast_fp16"])
reshape_7_shape_0 = const()
0.03% N|C reshape_7_cast_fp16 = ios17.reshape(x: ["tile_3_cast_fp16"], shape: ["reshape_7_shape_0"])
transpose_3_perm_0 = const()
values_7_shape_0 = const()
0.05% N|C transpose_89 = ios17.transpose(x: ["reshape_7_cast_fp16"], perm: ["transpose_3_perm_0"])
0.03% N|C values_7_cast_fp16 = ios17.reshape(shape: ["values_7_shape_0"], x: ["transpose_89"])
mul_1_y_0_to_fp16 = const()
0.02% N|C mul_1_cast_fp16 = ios17.mul(y: ["mul_1_y_0_to_fp16"], x: ["query_float_3_cast_fp16"])
matmul_1_transpose_y_0 = const()
matmul_1_transpose_x_0 = const()
0.05% N|C matmul_1_cast_fp16 = ios17.matmul(x: ["mul_1_cast_fp16"], transpose_x: ["matmul_1_transpose_x_0"], y: ["keys_7_cast_fp16"], transpose_y: ["matmul_1_transpose_y_0"])
0.06% N|C add_3_cast_fp16 = ios17.add(x: ["matmul_1_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_1_axis_0 = const()
0.05% N|C softmax_1_cast_fp16 = ios16.softmax(axis: ["softmax_1_axis_0"], x: ["add_3_cast_fp16"])
attn_output_5_transpose_x_0 = const()
attn_output_5_transpose_y_0 = const()
0.05% N|C attn_output_5_cast_fp16 = ios17.matmul(x: ["softmax_1_cast_fp16"], y: ["values_7_cast_fp16"], transpose_y: ["attn_output_5_transpose_y_0"], transpose_x: ["attn_output_5_transpose_x_0"])
var_328_perm_0 = const()
var_330 = const()
0.05% N|C transpose_88 = ios17.transpose(perm: ["var_328_perm_0"], x: ["attn_output_5_cast_fp16"])
0.03% N|C input_11_cast_fp16 = ios17.reshape(shape: ["var_330"], x: ["transpose_88"])
model_transformer_layers_1_attn_out_proj_weight_to_fp16 = const()
0.14% N|C linear_5_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], x: ["input_11_cast_fp16"], weight: ["model_transformer_layers_1_attn_out_proj_weight_to_fp16"])
0.09% N|C x_35_cast_fp16 = ios17.add(y: ["linear_5_cast_fp16"], x: ["x_19_cast_fp16"])
var_10_promoted_to_fp16_7 = const()
0.04% N|C var_337_cast_fp16 = ios17.pow(x: ["x_35_cast_fp16"], y: ["var_10_promoted_to_fp16_7"])
var_338 = const()
0.02% N|C var_339_cast_fp16 = ios16.reduce_mean(x: ["var_337_cast_fp16"], keep_dims: ["var_22"], axes: ["var_338"])
var_340_to_fp16 = const()
0.00% N|C var_341_cast_fp16 = ios17.add(x: ["var_339_cast_fp16"], y: ["var_340_to_fp16"])
var_342_epsilon_0 = const()
0.00% N|C var_342_cast_fp16 = ios17.rsqrt(x: ["var_341_cast_fp16"], epsilon: ["var_342_epsilon_0"])
0.04% N|C var_343_cast_fp16 = ios17.mul(y: ["var_342_cast_fp16"], x: ["x_35_cast_fp16"])
model_transformer_layers_1_ffn_norm_weight_to_fp16 = const()
0.04% N|C input_13_cast_fp16 = ios17.mul(y: ["model_transformer_layers_1_ffn_norm_weight_to_fp16"], x: ["var_343_cast_fp16"])
model_transformer_layers_1_ffn_proj_1_weight_to_fp16 = const()
linear_6_bias_0_to_fp16 = const()
0.38% N|C linear_6_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_1_ffn_proj_1_weight_to_fp16"], x: ["input_13_cast_fp16"], bias: ["linear_6_bias_0_to_fp16"])
var_353_split_sizes_0 = const()
var_353_axis_0 = const()
0.05% N|C var_353_cast_fp16_0, var_353_cast_fp16_1 = split(x: ["linear_6_cast_fp16"], split_sizes: ["var_353_split_sizes_0"], axis: ["var_353_axis_0"])
0.03% N|C var_355_cast_fp16 = ios16.silu(x: ["var_353_cast_fp16_0"])
0.06% N|C input_17_cast_fp16 = ios17.mul(y: ["var_353_cast_fp16_1"], x: ["var_355_cast_fp16"])
model_transformer_layers_1_ffn_proj_2_weight_to_fp16 = const()
0.24% N|C linear_7_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], x: ["input_17_cast_fp16"], weight: ["model_transformer_layers_1_ffn_proj_2_weight_to_fp16"])
0.09% N|C x_39_cast_fp16 = ios17.add(y: ["linear_7_cast_fp16"], x: ["x_35_cast_fp16"])
var_10_promoted_to_fp16_8 = const()
0.04% N|C var_366_cast_fp16 = ios17.pow(x: ["x_39_cast_fp16"], y: ["var_10_promoted_to_fp16_8"])
var_367 = const()
0.02% N|C var_368_cast_fp16 = ios16.reduce_mean(x: ["var_366_cast_fp16"], keep_dims: ["var_22"], axes: ["var_367"])
var_369_to_fp16 = const()
0.00% N|C var_370_cast_fp16 = ios17.add(y: ["var_369_to_fp16"], x: ["var_368_cast_fp16"])
var_371_epsilon_0 = const()
0.00% N|C var_371_cast_fp16 = ios17.rsqrt(epsilon: ["var_371_epsilon_0"], x: ["var_370_cast_fp16"])
0.04% N|C var_372_cast_fp16 = ios17.mul(x: ["x_39_cast_fp16"], y: ["var_371_cast_fp16"])
model_transformer_layers_2_attn_norm_weight_to_fp16 = const()
0.04% N|C hidden_states_13_cast_fp16 = ios17.mul(x: ["var_372_cast_fp16"], y: ["model_transformer_layers_2_attn_norm_weight_to_fp16"])
model_transformer_layers_2_attn_qkv_proj_weight_to_fp16 = const()
0.38% N|C linear_8_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_2_attn_qkv_proj_weight_to_fp16"], x: ["hidden_states_13_cast_fp16"], bias: ["linear_0_bias_0_to_fp16"])
var_386 = const()
0.04% N|C qkv_11_cast_fp16 = ios17.reshape(x: ["linear_8_cast_fp16"], shape: ["var_386"])
var_388_perm_0 = const()
var_389 = const()
var_390_axis_0 = const()
0.08% N|C transpose_87 = ios17.transpose(perm: ["var_388_perm_0"], x: ["qkv_11_cast_fp16"])
0.03% N|C var_390_cast_fp16_0, var_390_cast_fp16_1, var_390_cast_fp16_2 = split(axis: ["var_390_axis_0"], x: ["transpose_87"], split_sizes: ["var_389"])
var_10_promoted_to_fp16_9 = const()
0.03% N|C var_396_cast_fp16 = ios17.pow(x: ["var_390_cast_fp16_0"], y: ["var_10_promoted_to_fp16_9"])
var_397 = const()
0.01% N|C var_398_cast_fp16 = ios16.reduce_mean(axes: ["var_397"], x: ["var_396_cast_fp16"], keep_dims: ["var_22"])
var_399_to_fp16 = const()
0.00% N|C var_400_cast_fp16 = ios17.add(y: ["var_399_to_fp16"], x: ["var_398_cast_fp16"])
var_401_epsilon_0 = const()
0.00% N|C var_401_cast_fp16 = ios17.rsqrt(x: ["var_400_cast_fp16"], epsilon: ["var_401_epsilon_0"])
0.02% N|C var_402_cast_fp16 = ios17.mul(y: ["var_401_cast_fp16"], x: ["var_390_cast_fp16_0"])
model_transformer_layers_2_attn_q_norm_weight_to_fp16 = const()
0.02% N|C query_5_cast_fp16 = ios17.mul(x: ["var_402_cast_fp16"], y: ["model_transformer_layers_2_attn_q_norm_weight_to_fp16"])
var_10_promoted_to_fp16_10 = const()
0.01% N|C var_407_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_10"], x: ["var_390_cast_fp16_1"])
var_408 = const()
0.00% N|C var_409_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_407_cast_fp16"], axes: ["var_408"])
var_410_to_fp16 = const()
0.00% N|C var_411_cast_fp16 = ios17.add(y: ["var_410_to_fp16"], x: ["var_409_cast_fp16"])
var_412_epsilon_0 = const()
0.00% N|C var_412_cast_fp16 = ios17.rsqrt(x: ["var_411_cast_fp16"], epsilon: ["var_412_epsilon_0"])
0.01% N|C var_413_cast_fp16 = ios17.mul(x: ["var_390_cast_fp16_1"], y: ["var_412_cast_fp16"])
model_transformer_layers_2_attn_k_norm_weight_to_fp16 = const()
0.01% N|C key_5_cast_fp16 = ios17.mul(y: ["model_transformer_layers_2_attn_k_norm_weight_to_fp16"], x: ["var_413_cast_fp16"])
0.02% N|C var_430_cast_fp16 = ios17.mul(x: ["query_5_cast_fp16"], y: ["var_156_to_fp16"])
var_431_split_sizes_0 = const()
var_431_axis_0 = const()
0.02% N|C var_431_cast_fp16_0, var_431_cast_fp16_1 = split(axis: ["var_431_axis_0"], split_sizes: ["var_431_split_sizes_0"], x: ["query_5_cast_fp16"])
const_20_promoted_to_fp16 = const()
0.01% N|C var_433_cast_fp16 = ios17.mul(y: ["const_20_promoted_to_fp16"], x: ["var_431_cast_fp16_1"])
var_435_interleave_0 = const()
0.01% N|C var_435_cast_fp16 = ios17.concat(axis: ["var_5"], interleave: ["var_435_interleave_0"], values: ["var_433_cast_fp16", "var_431_cast_fp16_0"])
0.02% N|C var_436_cast_fp16 = ios17.mul(y: ["var_152_to_fp16"], x: ["var_435_cast_fp16"])
0.06% N|C query_float_5_cast_fp16 = ios17.add(y: ["var_436_cast_fp16"], x: ["var_430_cast_fp16"])
0.01% N|C var_442_cast_fp16 = ios17.mul(x: ["key_5_cast_fp16"], y: ["var_156_to_fp16"])
var_443_split_sizes_0 = const()
var_443_axis_0 = const()
0.00% N|C var_443_cast_fp16_0, var_443_cast_fp16_1 = split(split_sizes: ["var_443_split_sizes_0"], x: ["key_5_cast_fp16"], axis: ["var_443_axis_0"])
const_21_promoted_to_fp16 = const()
0.00% N|C var_445_cast_fp16 = ios17.mul(x: ["var_443_cast_fp16_1"], y: ["const_21_promoted_to_fp16"])
var_447_interleave_0 = const()
0.00% N|C var_447_cast_fp16 = ios17.concat(values: ["var_445_cast_fp16", "var_443_cast_fp16_0"], interleave: ["var_447_interleave_0"], axis: ["var_5"])
0.01% N|C var_448_cast_fp16 = ios17.mul(y: ["var_152_to_fp16"], x: ["var_447_cast_fp16"])
0.01% N|C key_float_5_cast_fp16 = ios17.add(y: ["var_448_cast_fp16"], x: ["var_442_cast_fp16"])
reshape_8_shape_0 = const()
0.01% N|C reshape_8_cast_fp16 = ios17.reshape(x: ["key_float_5_cast_fp16"], shape: ["reshape_8_shape_0"])
tile_4_reps_0 = const()
0.02% N|C tile_4_cast_fp16 = tile(x: ["reshape_8_cast_fp16"], reps: ["tile_4_reps_0"])
reshape_9_shape_0 = const()
0.03% N|C reshape_9_cast_fp16 = ios17.reshape(x: ["tile_4_cast_fp16"], shape: ["reshape_9_shape_0"])
transpose_4_perm_0 = const()
keys_11_shape_0 = const()
0.05% N|C transpose_86 = ios17.transpose(perm: ["transpose_4_perm_0"], x: ["reshape_9_cast_fp16"])
0.03% N|C keys_11_cast_fp16 = ios17.reshape(x: ["transpose_86"], shape: ["keys_11_shape_0"])
reshape_10_shape_0 = const()
0.01% N|C reshape_10_cast_fp16 = ios17.reshape(x: ["var_390_cast_fp16_2"], shape: ["reshape_10_shape_0"])
tile_5_reps_0 = const()
0.02% N|C tile_5_cast_fp16 = tile(reps: ["tile_5_reps_0"], x: ["reshape_10_cast_fp16"])
reshape_11_shape_0 = const()
0.03% N|C reshape_11_cast_fp16 = ios17.reshape(x: ["tile_5_cast_fp16"], shape: ["reshape_11_shape_0"])
transpose_5_perm_0 = const()
values_11_shape_0 = const()
0.05% N|C transpose_85 = ios17.transpose(x: ["reshape_11_cast_fp16"], perm: ["transpose_5_perm_0"])
0.03% N|C values_11_cast_fp16 = ios17.reshape(shape: ["values_11_shape_0"], x: ["transpose_85"])
mul_2_y_0_to_fp16 = const()
0.02% N|C mul_2_cast_fp16 = ios17.mul(y: ["mul_2_y_0_to_fp16"], x: ["query_float_5_cast_fp16"])
matmul_2_transpose_y_0 = const()
matmul_2_transpose_x_0 = const()
0.05% N|C matmul_2_cast_fp16 = ios17.matmul(transpose_y: ["matmul_2_transpose_y_0"], transpose_x: ["matmul_2_transpose_x_0"], y: ["keys_11_cast_fp16"], x: ["mul_2_cast_fp16"])
0.06% N|C add_5_cast_fp16 = ios17.add(x: ["matmul_2_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_2_axis_0 = const()
0.05% N|C softmax_2_cast_fp16 = ios16.softmax(axis: ["softmax_2_axis_0"], x: ["add_5_cast_fp16"])
attn_output_9_transpose_x_0 = const()
attn_output_9_transpose_y_0 = const()
0.05% N|C attn_output_9_cast_fp16 = ios17.matmul(transpose_y: ["attn_output_9_transpose_y_0"], x: ["softmax_2_cast_fp16"], transpose_x: ["attn_output_9_transpose_x_0"], y: ["values_11_cast_fp16"])
var_464_perm_0 = const()
var_466 = const()
0.05% N|C transpose_84 = ios17.transpose(perm: ["var_464_perm_0"], x: ["attn_output_9_cast_fp16"])
0.03% N|C input_19_cast_fp16 = ios17.reshape(shape: ["var_466"], x: ["transpose_84"])
model_transformer_layers_2_attn_out_proj_weight_to_fp16 = const()
0.14% N|C linear_9_cast_fp16 = ios17.linear(x: ["input_19_cast_fp16"], weight: ["model_transformer_layers_2_attn_out_proj_weight_to_fp16"], bias: ["linear_1_bias_0_to_fp16"])
0.09% N|C x_55_cast_fp16 = ios17.add(x: ["x_39_cast_fp16"], y: ["linear_9_cast_fp16"])
var_10_promoted_to_fp16_11 = const()
0.04% N|C var_473_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_11"], x: ["x_55_cast_fp16"])
var_474 = const()
0.02% N|C var_475_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], axes: ["var_474"], x: ["var_473_cast_fp16"])
var_476_to_fp16 = const()
0.00% N|C var_477_cast_fp16 = ios17.add(x: ["var_475_cast_fp16"], y: ["var_476_to_fp16"])
var_478_epsilon_0 = const()
0.00% N|C var_478_cast_fp16 = ios17.rsqrt(x: ["var_477_cast_fp16"], epsilon: ["var_478_epsilon_0"])
0.04% N|C var_479_cast_fp16 = ios17.mul(y: ["var_478_cast_fp16"], x: ["x_55_cast_fp16"])
model_transformer_layers_2_ffn_norm_weight_to_fp16 = const()
0.04% N|C input_21_cast_fp16 = ios17.mul(x: ["var_479_cast_fp16"], y: ["model_transformer_layers_2_ffn_norm_weight_to_fp16"])
model_transformer_layers_2_ffn_proj_1_weight_to_fp16 = const()
linear_10_bias_0_to_fp16 = const()
0.15% N|C linear_10_cast_fp16 = ios17.linear(bias: ["linear_10_bias_0_to_fp16"], x: ["input_21_cast_fp16"], weight: ["model_transformer_layers_2_ffn_proj_1_weight_to_fp16"])
var_489_split_sizes_0 = const()
var_489_axis_0 = const()
0.06% N|C var_489_cast_fp16_0, var_489_cast_fp16_1 = split(axis: ["var_489_axis_0"], x: ["linear_10_cast_fp16"], split_sizes: ["var_489_split_sizes_0"])
0.04% N|C var_491_cast_fp16 = ios16.silu(x: ["var_489_cast_fp16_0"])
0.08% N|C input_25_cast_fp16 = ios17.mul(y: ["var_489_cast_fp16_1"], x: ["var_491_cast_fp16"])
model_transformer_layers_2_ffn_proj_2_weight_to_fp16 = const()
0.38% N|C linear_11_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_2_ffn_proj_2_weight_to_fp16"], bias: ["linear_1_bias_0_to_fp16"], x: ["input_25_cast_fp16"])
0.09% N|C x_59_cast_fp16 = ios17.add(x: ["x_55_cast_fp16"], y: ["linear_11_cast_fp16"])
var_10_promoted_to_fp16_12 = const()
0.04% N|C var_502_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_12"], x: ["x_59_cast_fp16"])
var_503 = const()
0.02% N|C var_504_cast_fp16 = ios16.reduce_mean(axes: ["var_503"], keep_dims: ["var_22"], x: ["var_502_cast_fp16"])
var_505_to_fp16 = const()
0.00% N|C var_506_cast_fp16 = ios17.add(y: ["var_505_to_fp16"], x: ["var_504_cast_fp16"])
var_507_epsilon_0 = const()
0.00% N|C var_507_cast_fp16 = ios17.rsqrt(x: ["var_506_cast_fp16"], epsilon: ["var_507_epsilon_0"])
0.04% N|C var_508_cast_fp16 = ios17.mul(y: ["var_507_cast_fp16"], x: ["x_59_cast_fp16"])
model_transformer_layers_3_attn_norm_weight_to_fp16 = const()
0.04% N|C hidden_states_19_cast_fp16 = ios17.mul(y: ["model_transformer_layers_3_attn_norm_weight_to_fp16"], x: ["var_508_cast_fp16"])
model_transformer_layers_3_attn_qkv_proj_weight_to_fp16 = const()
0.38% N|C linear_12_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_3_attn_qkv_proj_weight_to_fp16"], x: ["hidden_states_19_cast_fp16"], bias: ["linear_0_bias_0_to_fp16"])
var_522 = const()
0.04% N|C qkv_15_cast_fp16 = ios17.reshape(shape: ["var_522"], x: ["linear_12_cast_fp16"])
var_524_perm_0 = const()
var_525 = const()
var_526_axis_0 = const()
0.08% N|C transpose_83 = ios17.transpose(x: ["qkv_15_cast_fp16"], perm: ["var_524_perm_0"])
0.03% N|C var_526_cast_fp16_0, var_526_cast_fp16_1, var_526_cast_fp16_2 = split(x: ["transpose_83"], axis: ["var_526_axis_0"], split_sizes: ["var_525"])
var_10_promoted_to_fp16_13 = const()
0.03% N|C var_532_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_13"], x: ["var_526_cast_fp16_0"])
var_533 = const()
0.01% N|C var_534_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_532_cast_fp16"], axes: ["var_533"])
var_535_to_fp16 = const()
0.00% N|C var_536_cast_fp16 = ios17.add(y: ["var_535_to_fp16"], x: ["var_534_cast_fp16"])
var_537_epsilon_0 = const()
0.00% N|C var_537_cast_fp16 = ios17.rsqrt(x: ["var_536_cast_fp16"], epsilon: ["var_537_epsilon_0"])
0.02% N|C var_538_cast_fp16 = ios17.mul(x: ["var_526_cast_fp16_0"], y: ["var_537_cast_fp16"])
model_transformer_layers_3_attn_q_norm_weight_to_fp16 = const()
0.02% N|C query_7_cast_fp16 = ios17.mul(y: ["model_transformer_layers_3_attn_q_norm_weight_to_fp16"], x: ["var_538_cast_fp16"])
var_10_promoted_to_fp16_14 = const()
0.01% N|C var_543_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_14"], x: ["var_526_cast_fp16_1"])
var_544 = const()
0.00% N|C var_545_cast_fp16 = ios16.reduce_mean(x: ["var_543_cast_fp16"], keep_dims: ["var_22"], axes: ["var_544"])
var_546_to_fp16 = const()
0.00% N|C var_547_cast_fp16 = ios17.add(x: ["var_545_cast_fp16"], y: ["var_546_to_fp16"])
var_548_epsilon_0 = const()
0.00% N|C var_548_cast_fp16 = ios17.rsqrt(x: ["var_547_cast_fp16"], epsilon: ["var_548_epsilon_0"])
0.01% N|C var_549_cast_fp16 = ios17.mul(y: ["var_548_cast_fp16"], x: ["var_526_cast_fp16_1"])
model_transformer_layers_3_attn_k_norm_weight_to_fp16 = const()
0.01% N|C key_7_cast_fp16 = ios17.mul(y: ["model_transformer_layers_3_attn_k_norm_weight_to_fp16"], x: ["var_549_cast_fp16"])
0.02% N|C var_566_cast_fp16 = ios17.mul(x: ["query_7_cast_fp16"], y: ["var_156_to_fp16"])
var_567_split_sizes_0 = const()
var_567_axis_0 = const()
0.02% N|C var_567_cast_fp16_0, var_567_cast_fp16_1 = split(x: ["query_7_cast_fp16"], axis: ["var_567_axis_0"], split_sizes: ["var_567_split_sizes_0"])
const_27_promoted_to_fp16 = const()
0.01% N|C var_569_cast_fp16 = ios17.mul(x: ["var_567_cast_fp16_1"], y: ["const_27_promoted_to_fp16"])
var_571_interleave_0 = const()
0.01% N|C var_571_cast_fp16 = ios17.concat(values: ["var_569_cast_fp16", "var_567_cast_fp16_0"], axis: ["var_5"], interleave: ["var_571_interleave_0"])
0.02% N|C var_572_cast_fp16 = ios17.mul(y: ["var_152_to_fp16"], x: ["var_571_cast_fp16"])
0.06% N|C query_float_7_cast_fp16 = ios17.add(y: ["var_572_cast_fp16"], x: ["var_566_cast_fp16"])
0.01% N|C var_578_cast_fp16 = ios17.mul(x: ["key_7_cast_fp16"], y: ["var_156_to_fp16"])
var_579_split_sizes_0 = const()
var_579_axis_0 = const()
0.00% N|C var_579_cast_fp16_0, var_579_cast_fp16_1 = split(axis: ["var_579_axis_0"], x: ["key_7_cast_fp16"], split_sizes: ["var_579_split_sizes_0"])
const_28_promoted_to_fp16 = const()
0.00% N|C var_581_cast_fp16 = ios17.mul(y: ["const_28_promoted_to_fp16"], x: ["var_579_cast_fp16_1"])
var_583_interleave_0 = const()
0.00% N|C var_583_cast_fp16 = ios17.concat(values: ["var_581_cast_fp16", "var_579_cast_fp16_0"], axis: ["var_5"], interleave: ["var_583_interleave_0"])
0.01% N|C var_584_cast_fp16 = ios17.mul(y: ["var_152_to_fp16"], x: ["var_583_cast_fp16"])
0.01% N|C key_float_7_cast_fp16 = ios17.add(x: ["var_578_cast_fp16"], y: ["var_584_cast_fp16"])
reshape_12_shape_0 = const()
0.01% N|C reshape_12_cast_fp16 = ios17.reshape(x: ["key_float_7_cast_fp16"], shape: ["reshape_12_shape_0"])
tile_6_reps_0 = const()
0.02% N|C tile_6_cast_fp16 = tile(x: ["reshape_12_cast_fp16"], reps: ["tile_6_reps_0"])
reshape_13_shape_0 = const()
0.03% N|C reshape_13_cast_fp16 = ios17.reshape(x: ["tile_6_cast_fp16"], shape: ["reshape_13_shape_0"])
transpose_6_perm_0 = const()
keys_15_shape_0 = const()
0.05% N|C transpose_82 = ios17.transpose(x: ["reshape_13_cast_fp16"], perm: ["transpose_6_perm_0"])
0.03% N|C keys_15_cast_fp16 = ios17.reshape(x: ["transpose_82"], shape: ["keys_15_shape_0"])
reshape_14_shape_0 = const()
0.01% N|C reshape_14_cast_fp16 = ios17.reshape(shape: ["reshape_14_shape_0"], x: ["var_526_cast_fp16_2"])
tile_7_reps_0 = const()
0.02% N|C tile_7_cast_fp16 = tile(reps: ["tile_7_reps_0"], x: ["reshape_14_cast_fp16"])
reshape_15_shape_0 = const()
0.03% N|C reshape_15_cast_fp16 = ios17.reshape(shape: ["reshape_15_shape_0"], x: ["tile_7_cast_fp16"])
transpose_7_perm_0 = const()
values_15_shape_0 = const()
0.05% N|C transpose_81 = ios17.transpose(perm: ["transpose_7_perm_0"], x: ["reshape_15_cast_fp16"])
0.03% N|C values_15_cast_fp16 = ios17.reshape(x: ["transpose_81"], shape: ["values_15_shape_0"])
mul_3_y_0_to_fp16 = const()
0.02% N|C mul_3_cast_fp16 = ios17.mul(y: ["mul_3_y_0_to_fp16"], x: ["query_float_7_cast_fp16"])
matmul_3_transpose_y_0 = const()
matmul_3_transpose_x_0 = const()
0.05% N|C matmul_3_cast_fp16 = ios17.matmul(y: ["keys_15_cast_fp16"], transpose_x: ["matmul_3_transpose_x_0"], transpose_y: ["matmul_3_transpose_y_0"], x: ["mul_3_cast_fp16"])
0.06% N|C add_7_cast_fp16 = ios17.add(x: ["matmul_3_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_3_axis_0 = const()
0.05% N|C softmax_3_cast_fp16 = ios16.softmax(axis: ["softmax_3_axis_0"], x: ["add_7_cast_fp16"])
attn_output_13_transpose_x_0 = const()
attn_output_13_transpose_y_0 = const()
0.05% N|C attn_output_13_cast_fp16 = ios17.matmul(y: ["values_15_cast_fp16"], x: ["softmax_3_cast_fp16"], transpose_y: ["attn_output_13_transpose_y_0"], transpose_x: ["attn_output_13_transpose_x_0"])
var_600_perm_0 = const()
var_602 = const()
0.05% N|C transpose_80 = ios17.transpose(x: ["attn_output_13_cast_fp16"], perm: ["var_600_perm_0"])
0.03% N|C input_27_cast_fp16 = ios17.reshape(x: ["transpose_80"], shape: ["var_602"])
model_transformer_layers_3_attn_out_proj_weight_to_fp16 = const()
0.14% N|C linear_13_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_3_attn_out_proj_weight_to_fp16"], x: ["input_27_cast_fp16"], bias: ["linear_1_bias_0_to_fp16"])
0.09% N|C x_75_cast_fp16 = ios17.add(x: ["x_59_cast_fp16"], y: ["linear_13_cast_fp16"])
var_10_promoted_to_fp16_15 = const()
0.04% N|C var_609_cast_fp16 = ios17.pow(x: ["x_75_cast_fp16"], y: ["var_10_promoted_to_fp16_15"])
var_610 = const()
0.02% N|C var_611_cast_fp16 = ios16.reduce_mean(x: ["var_609_cast_fp16"], keep_dims: ["var_22"], axes: ["var_610"])
var_612_to_fp16 = const()
0.00% N|C var_613_cast_fp16 = ios17.add(x: ["var_611_cast_fp16"], y: ["var_612_to_fp16"])
var_614_epsilon_0 = const()
0.00% N|C var_614_cast_fp16 = ios17.rsqrt(epsilon: ["var_614_epsilon_0"], x: ["var_613_cast_fp16"])
0.04% N|C var_615_cast_fp16 = ios17.mul(x: ["x_75_cast_fp16"], y: ["var_614_cast_fp16"])
model_transformer_layers_3_ffn_norm_weight_to_fp16 = const()
0.04% N|C input_29_cast_fp16 = ios17.mul(x: ["var_615_cast_fp16"], y: ["model_transformer_layers_3_ffn_norm_weight_to_fp16"])
model_transformer_layers_3_ffn_proj_1_weight_to_fp16 = const()
linear_14_bias_0_to_fp16 = const()
0.15% N|C linear_14_cast_fp16 = ios17.linear(bias: ["linear_14_bias_0_to_fp16"], weight: ["model_transformer_layers_3_ffn_proj_1_weight_to_fp16"], x: ["input_29_cast_fp16"])
var_625_split_sizes_0 = const()
var_625_axis_0 = const()
0.08% N|C var_625_cast_fp16_0, var_625_cast_fp16_1 = split(x: ["linear_14_cast_fp16"], axis: ["var_625_axis_0"], split_sizes: ["var_625_split_sizes_0"])
0.05% N|C var_627_cast_fp16 = ios16.silu(x: ["var_625_cast_fp16_0"])
0.09% N|C input_33_cast_fp16 = ios17.mul(x: ["var_627_cast_fp16"], y: ["var_625_cast_fp16_1"])
model_transformer_layers_3_ffn_proj_2_weight_to_fp16 = const()
0.54% N|C linear_15_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_3_ffn_proj_2_weight_to_fp16"], bias: ["linear_1_bias_0_to_fp16"], x: ["input_33_cast_fp16"])
0.09% N|C x_79_cast_fp16 = ios17.add(y: ["linear_15_cast_fp16"], x: ["x_75_cast_fp16"])
var_10_promoted_to_fp16_16 = const()
0.04% N|C var_638_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_16"], x: ["x_79_cast_fp16"])
var_639 = const()
0.02% N|C var_640_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_638_cast_fp16"], axes: ["var_639"])
var_641_to_fp16 = const()
0.00% N|C var_642_cast_fp16 = ios17.add(y: ["var_641_to_fp16"], x: ["var_640_cast_fp16"])
var_643_epsilon_0 = const()
0.00% N|C var_643_cast_fp16 = ios17.rsqrt(x: ["var_642_cast_fp16"], epsilon: ["var_643_epsilon_0"])
0.04% N|C var_644_cast_fp16 = ios17.mul(y: ["var_643_cast_fp16"], x: ["x_79_cast_fp16"])
model_transformer_layers_4_attn_norm_weight_to_fp16 = const()
0.04% N|C hidden_states_25_cast_fp16 = ios17.mul(x: ["var_644_cast_fp16"], y: ["model_transformer_layers_4_attn_norm_weight_to_fp16"])
model_transformer_layers_4_attn_qkv_proj_weight_to_fp16 = const()
0.38% N|C linear_16_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_4_attn_qkv_proj_weight_to_fp16"], x: ["hidden_states_25_cast_fp16"], bias: ["linear_0_bias_0_to_fp16"])
var_658 = const()
0.04% N|C qkv_19_cast_fp16 = ios17.reshape(x: ["linear_16_cast_fp16"], shape: ["var_658"])
var_660_perm_0 = const()
var_661 = const()
var_662_axis_0 = const()
0.08% N|C transpose_79 = ios17.transpose(perm: ["var_660_perm_0"], x: ["qkv_19_cast_fp16"])
0.03% N|C var_662_cast_fp16_0, var_662_cast_fp16_1, var_662_cast_fp16_2 = split(axis: ["var_662_axis_0"], x: ["transpose_79"], split_sizes: ["var_661"])
var_10_promoted_to_fp16_17 = const()
0.03% N|C var_668_cast_fp16 = ios17.pow(x: ["var_662_cast_fp16_0"], y: ["var_10_promoted_to_fp16_17"])
var_669 = const()
0.01% N|C var_670_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], axes: ["var_669"], x: ["var_668_cast_fp16"])
var_671_to_fp16 = const()
0.00% N|C var_672_cast_fp16 = ios17.add(y: ["var_671_to_fp16"], x: ["var_670_cast_fp16"])
var_673_epsilon_0 = const()
0.00% N|C var_673_cast_fp16 = ios17.rsqrt(x: ["var_672_cast_fp16"], epsilon: ["var_673_epsilon_0"])
0.02% N|C var_674_cast_fp16 = ios17.mul(x: ["var_662_cast_fp16_0"], y: ["var_673_cast_fp16"])
model_transformer_layers_4_attn_q_norm_weight_to_fp16 = const()
0.02% N|C query_9_cast_fp16 = ios17.mul(y: ["model_transformer_layers_4_attn_q_norm_weight_to_fp16"], x: ["var_674_cast_fp16"])
var_10_promoted_to_fp16_18 = const()
0.01% N|C var_679_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_18"], x: ["var_662_cast_fp16_1"])
var_680 = const()
0.00% N|C var_681_cast_fp16 = ios16.reduce_mean(x: ["var_679_cast_fp16"], axes: ["var_680"], keep_dims: ["var_22"])
var_682_to_fp16 = const()
0.00% N|C var_683_cast_fp16 = ios17.add(y: ["var_682_to_fp16"], x: ["var_681_cast_fp16"])
var_684_epsilon_0 = const()
0.00% N|C var_684_cast_fp16 = ios17.rsqrt(x: ["var_683_cast_fp16"], epsilon: ["var_684_epsilon_0"])
0.01% N|C var_685_cast_fp16 = ios17.mul(x: ["var_662_cast_fp16_1"], y: ["var_684_cast_fp16"])
model_transformer_layers_4_attn_k_norm_weight_to_fp16 = const()
0.01% N|C key_9_cast_fp16 = ios17.mul(x: ["var_685_cast_fp16"], y: ["model_transformer_layers_4_attn_k_norm_weight_to_fp16"])
0.02% N|C var_702_cast_fp16 = ios17.mul(y: ["var_156_to_fp16"], x: ["query_9_cast_fp16"])
var_703_split_sizes_0 = const()
var_703_axis_0 = const()
0.02% N|C var_703_cast_fp16_0, var_703_cast_fp16_1 = split(split_sizes: ["var_703_split_sizes_0"], x: ["query_9_cast_fp16"], axis: ["var_703_axis_0"])
const_34_promoted_to_fp16 = const()
0.01% N|C var_705_cast_fp16 = ios17.mul(y: ["const_34_promoted_to_fp16"], x: ["var_703_cast_fp16_1"])
var_707_interleave_0 = const()
0.01% N|C var_707_cast_fp16 = ios17.concat(values: ["var_705_cast_fp16", "var_703_cast_fp16_0"], interleave: ["var_707_interleave_0"], axis: ["var_5"])
0.02% N|C var_708_cast_fp16 = ios17.mul(x: ["var_707_cast_fp16"], y: ["var_152_to_fp16"])
0.06% N|C query_float_9_cast_fp16 = ios17.add(y: ["var_708_cast_fp16"], x: ["var_702_cast_fp16"])
0.01% N|C var_714_cast_fp16 = ios17.mul(y: ["var_156_to_fp16"], x: ["key_9_cast_fp16"])
var_715_split_sizes_0 = const()
var_715_axis_0 = const()
0.00% N|C var_715_cast_fp16_0, var_715_cast_fp16_1 = split(axis: ["var_715_axis_0"], x: ["key_9_cast_fp16"], split_sizes: ["var_715_split_sizes_0"])
const_35_promoted_to_fp16 = const()
0.00% N|C var_717_cast_fp16 = ios17.mul(x: ["var_715_cast_fp16_1"], y: ["const_35_promoted_to_fp16"])
var_719_interleave_0 = const()
0.00% N|C var_719_cast_fp16 = ios17.concat(values: ["var_717_cast_fp16", "var_715_cast_fp16_0"], axis: ["var_5"], interleave: ["var_719_interleave_0"])
0.01% N|C var_720_cast_fp16 = ios17.mul(y: ["var_152_to_fp16"], x: ["var_719_cast_fp16"])
0.01% N|C key_float_9_cast_fp16 = ios17.add(x: ["var_714_cast_fp16"], y: ["var_720_cast_fp16"])
reshape_16_shape_0 = const()
0.01% N|C reshape_16_cast_fp16 = ios17.reshape(shape: ["reshape_16_shape_0"], x: ["key_float_9_cast_fp16"])
tile_8_reps_0 = const()
0.02% N|C tile_8_cast_fp16 = tile(x: ["reshape_16_cast_fp16"], reps: ["tile_8_reps_0"])
reshape_17_shape_0 = const()
0.03% N|C reshape_17_cast_fp16 = ios17.reshape(x: ["tile_8_cast_fp16"], shape: ["reshape_17_shape_0"])
transpose_8_perm_0 = const()
keys_19_shape_0 = const()
0.05% N|C transpose_78 = ios17.transpose(perm: ["transpose_8_perm_0"], x: ["reshape_17_cast_fp16"])
0.03% N|C keys_19_cast_fp16 = ios17.reshape(x: ["transpose_78"], shape: ["keys_19_shape_0"])
reshape_18_shape_0 = const()
0.01% N|C reshape_18_cast_fp16 = ios17.reshape(shape: ["reshape_18_shape_0"], x: ["var_662_cast_fp16_2"])
tile_9_reps_0 = const()
0.02% N|C tile_9_cast_fp16 = tile(x: ["reshape_18_cast_fp16"], reps: ["tile_9_reps_0"])
reshape_19_shape_0 = const()
0.03% N|C reshape_19_cast_fp16 = ios17.reshape(shape: ["reshape_19_shape_0"], x: ["tile_9_cast_fp16"])
transpose_9_perm_0 = const()
values_19_shape_0 = const()
0.05% N|C transpose_77 = ios17.transpose(x: ["reshape_19_cast_fp16"], perm: ["transpose_9_perm_0"])
0.03% N|C values_19_cast_fp16 = ios17.reshape(shape: ["values_19_shape_0"], x: ["transpose_77"])
mul_4_y_0_to_fp16 = const()
0.02% N|C mul_4_cast_fp16 = ios17.mul(y: ["mul_4_y_0_to_fp16"], x: ["query_float_9_cast_fp16"])
matmul_4_transpose_y_0 = const()
matmul_4_transpose_x_0 = const()
0.05% N|C matmul_4_cast_fp16 = ios17.matmul(x: ["mul_4_cast_fp16"], transpose_y: ["matmul_4_transpose_y_0"], transpose_x: ["matmul_4_transpose_x_0"], y: ["keys_19_cast_fp16"])
0.06% N|C add_9_cast_fp16 = ios17.add(x: ["matmul_4_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_4_axis_0 = const()
0.05% N|C softmax_4_cast_fp16 = ios16.softmax(axis: ["softmax_4_axis_0"], x: ["add_9_cast_fp16"])
attn_output_17_transpose_x_0 = const()
attn_output_17_transpose_y_0 = const()
0.05% N|C attn_output_17_cast_fp16 = ios17.matmul(x: ["softmax_4_cast_fp16"], y: ["values_19_cast_fp16"], transpose_x: ["attn_output_17_transpose_x_0"], transpose_y: ["attn_output_17_transpose_y_0"])
var_736_perm_0 = const()
var_738 = const()
0.05% N|C transpose_76 = ios17.transpose(x: ["attn_output_17_cast_fp16"], perm: ["var_736_perm_0"])
0.03% N|C input_35_cast_fp16 = ios17.reshape(shape: ["var_738"], x: ["transpose_76"])
model_transformer_layers_4_attn_out_proj_weight_to_fp16 = const()
0.14% N|C linear_17_cast_fp16 = ios17.linear(x: ["input_35_cast_fp16"], bias: ["linear_1_bias_0_to_fp16"], weight: ["model_transformer_layers_4_attn_out_proj_weight_to_fp16"])
0.09% N|C x_95_cast_fp16 = ios17.add(y: ["linear_17_cast_fp16"], x: ["x_79_cast_fp16"])
var_10_promoted_to_fp16_19 = const()
0.04% N|C var_745_cast_fp16 = ios17.pow(x: ["x_95_cast_fp16"], y: ["var_10_promoted_to_fp16_19"])
var_746 = const()
0.02% N|C var_747_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_745_cast_fp16"], axes: ["var_746"])
var_748_to_fp16 = const()
0.00% N|C var_749_cast_fp16 = ios17.add(x: ["var_747_cast_fp16"], y: ["var_748_to_fp16"])
var_750_epsilon_0 = const()
0.00% N|C var_750_cast_fp16 = ios17.rsqrt(x: ["var_749_cast_fp16"], epsilon: ["var_750_epsilon_0"])
0.04% N|C var_751_cast_fp16 = ios17.mul(y: ["var_750_cast_fp16"], x: ["x_95_cast_fp16"])
model_transformer_layers_4_ffn_norm_weight_to_fp16 = const()
0.04% N|C input_37_cast_fp16 = ios17.mul(y: ["model_transformer_layers_4_ffn_norm_weight_to_fp16"], x: ["var_751_cast_fp16"])
model_transformer_layers_4_ffn_proj_1_weight_to_fp16 = const()
linear_18_bias_0_to_fp16 = const()
0.15% N|C linear_18_cast_fp16 = ios17.linear(bias: ["linear_18_bias_0_to_fp16"], x: ["input_37_cast_fp16"], weight: ["model_transformer_layers_4_ffn_proj_1_weight_to_fp16"])
var_761_split_sizes_0 = const()
var_761_axis_0 = const()
0.09% N|C var_761_cast_fp16_0, var_761_cast_fp16_1 = split(axis: ["var_761_axis_0"], x: ["linear_18_cast_fp16"], split_sizes: ["var_761_split_sizes_0"])
0.06% N|C var_763_cast_fp16 = ios16.silu(x: ["var_761_cast_fp16_0"])
0.11% N|C input_41_cast_fp16 = ios17.mul(x: ["var_763_cast_fp16"], y: ["var_761_cast_fp16_1"])
model_transformer_layers_4_ffn_proj_2_weight_to_fp16 = const()
0.74% N|C linear_19_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_4_ffn_proj_2_weight_to_fp16"], bias: ["linear_1_bias_0_to_fp16"], x: ["input_41_cast_fp16"])
0.09% N|C x_99_cast_fp16 = ios17.add(y: ["linear_19_cast_fp16"], x: ["x_95_cast_fp16"])
var_10_promoted_to_fp16_20 = const()
0.04% N|C var_774_cast_fp16 = ios17.pow(x: ["x_99_cast_fp16"], y: ["var_10_promoted_to_fp16_20"])
var_775 = const()
0.02% N|C var_776_cast_fp16 = ios16.reduce_mean(x: ["var_774_cast_fp16"], keep_dims: ["var_22"], axes: ["var_775"])
var_777_to_fp16 = const()
0.00% N|C var_778_cast_fp16 = ios17.add(y: ["var_777_to_fp16"], x: ["var_776_cast_fp16"])
var_779_epsilon_0 = const()
0.00% N|C var_779_cast_fp16 = ios17.rsqrt(x: ["var_778_cast_fp16"], epsilon: ["var_779_epsilon_0"])
0.04% N|C var_780_cast_fp16 = ios17.mul(x: ["x_99_cast_fp16"], y: ["var_779_cast_fp16"])
model_transformer_layers_5_attn_norm_weight_to_fp16 = const()
0.04% N|C hidden_states_31_cast_fp16 = ios17.mul(x: ["var_780_cast_fp16"], y: ["model_transformer_layers_5_attn_norm_weight_to_fp16"])
model_transformer_layers_5_attn_qkv_proj_weight_to_fp16 = const()
0.38% N|C linear_20_cast_fp16 = ios17.linear(bias: ["linear_2_bias_0_to_fp16"], x: ["hidden_states_31_cast_fp16"], weight: ["model_transformer_layers_5_attn_qkv_proj_weight_to_fp16"])
var_794 = const()
0.05% N|C qkv_23_cast_fp16 = ios17.reshape(shape: ["var_794"], x: ["linear_20_cast_fp16"])
var_796_perm_0 = const()
var_797 = const()
var_798_axis_0 = const()
0.11% N|C transpose_75 = ios17.transpose(x: ["qkv_23_cast_fp16"], perm: ["var_796_perm_0"])
0.04% N|C var_798_cast_fp16_0, var_798_cast_fp16_1, var_798_cast_fp16_2 = split(split_sizes: ["var_797"], axis: ["var_798_axis_0"], x: ["transpose_75"])
var_10_promoted_to_fp16_21 = const()
0.03% N|C var_804_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_21"], x: ["var_798_cast_fp16_0"])
var_805 = const()
0.02% N|C var_806_cast_fp16 = ios16.reduce_mean(x: ["var_804_cast_fp16"], keep_dims: ["var_22"], axes: ["var_805"])
var_807_to_fp16 = const()
0.00% N|C var_808_cast_fp16 = ios17.add(y: ["var_807_to_fp16"], x: ["var_806_cast_fp16"])
var_809_epsilon_0 = const()
0.00% N|C var_809_cast_fp16 = ios17.rsqrt(x: ["var_808_cast_fp16"], epsilon: ["var_809_epsilon_0"])
0.03% N|C var_810_cast_fp16 = ios17.mul(y: ["var_809_cast_fp16"], x: ["var_798_cast_fp16_0"])
model_transformer_layers_5_attn_q_norm_weight_to_fp16 = const()
0.03% N|C query_11_cast_fp16 = ios17.mul(y: ["model_transformer_layers_5_attn_q_norm_weight_to_fp16"], x: ["var_810_cast_fp16"])
var_10_promoted_to_fp16_22 = const()
0.01% N|C var_815_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_22"], x: ["var_798_cast_fp16_1"])
var_816 = const()
0.00% N|C var_817_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], axes: ["var_816"], x: ["var_815_cast_fp16"])
var_818_to_fp16 = const()
0.00% N|C var_819_cast_fp16 = ios17.add(y: ["var_818_to_fp16"], x: ["var_817_cast_fp16"])
var_820_epsilon_0 = const()
0.00% N|C var_820_cast_fp16 = ios17.rsqrt(x: ["var_819_cast_fp16"], epsilon: ["var_820_epsilon_0"])
0.01% N|C var_821_cast_fp16 = ios17.mul(x: ["var_798_cast_fp16_1"], y: ["var_820_cast_fp16"])
model_transformer_layers_5_attn_k_norm_weight_to_fp16 = const()
0.01% N|C key_11_cast_fp16 = ios17.mul(y: ["model_transformer_layers_5_attn_k_norm_weight_to_fp16"], x: ["var_821_cast_fp16"])
0.03% N|C var_838_cast_fp16 = ios17.mul(y: ["var_156_to_fp16"], x: ["query_11_cast_fp16"])
var_839_split_sizes_0 = const()
var_839_axis_0 = const()
0.03% N|C var_839_cast_fp16_0, var_839_cast_fp16_1 = split(x: ["query_11_cast_fp16"], split_sizes: ["var_839_split_sizes_0"], axis: ["var_839_axis_0"])
const_41_promoted_to_fp16 = const()
0.02% N|C var_841_cast_fp16 = ios17.mul(x: ["var_839_cast_fp16_1"], y: ["const_41_promoted_to_fp16"])
var_843_interleave_0 = const()
0.01% N|C var_843_cast_fp16 = ios17.concat(interleave: ["var_843_interleave_0"], axis: ["var_5"], values: ["var_841_cast_fp16", "var_839_cast_fp16_0"])
0.03% N|C var_844_cast_fp16 = ios17.mul(y: ["var_152_to_fp16"], x: ["var_843_cast_fp16"])
0.08% N|C query_float_11_cast_fp16 = ios17.add(x: ["var_838_cast_fp16"], y: ["var_844_cast_fp16"])
0.01% N|C var_850_cast_fp16 = ios17.mul(x: ["key_11_cast_fp16"], y: ["var_156_to_fp16"])
var_851_split_sizes_0 = const()
var_851_axis_0 = const()
0.01% N|C var_851_cast_fp16_0, var_851_cast_fp16_1 = split(x: ["key_11_cast_fp16"], axis: ["var_851_axis_0"], split_sizes: ["var_851_split_sizes_0"])
const_42_promoted_to_fp16 = const()
0.00% N|C var_853_cast_fp16 = ios17.mul(y: ["const_42_promoted_to_fp16"], x: ["var_851_cast_fp16_1"])
var_855_interleave_0 = const()
0.00% N|C var_855_cast_fp16 = ios17.concat(interleave: ["var_855_interleave_0"], values: ["var_853_cast_fp16", "var_851_cast_fp16_0"], axis: ["var_5"])
0.01% N|C var_856_cast_fp16 = ios17.mul(x: ["var_855_cast_fp16"], y: ["var_152_to_fp16"])
0.02% N|C key_float_11_cast_fp16 = ios17.add(x: ["var_850_cast_fp16"], y: ["var_856_cast_fp16"])
reshape_20_shape_0 = const()
0.01% N|C reshape_20_cast_fp16 = ios17.reshape(shape: ["reshape_20_shape_0"], x: ["key_float_11_cast_fp16"])
tile_10_reps_0 = const()
0.02% N|C tile_10_cast_fp16 = tile(x: ["reshape_20_cast_fp16"], reps: ["tile_10_reps_0"])
reshape_21_shape_0 = const()
0.03% N|C reshape_21_cast_fp16 = ios17.reshape(x: ["tile_10_cast_fp16"], shape: ["reshape_21_shape_0"])
transpose_10_perm_0 = const()
keys_23_shape_0 = const()
0.07% N|C transpose_74 = ios17.transpose(x: ["reshape_21_cast_fp16"], perm: ["transpose_10_perm_0"])
0.03% N|C keys_23_cast_fp16 = ios17.reshape(shape: ["keys_23_shape_0"], x: ["transpose_74"])
reshape_22_shape_0 = const()
0.01% N|C reshape_22_cast_fp16 = ios17.reshape(x: ["var_798_cast_fp16_2"], shape: ["reshape_22_shape_0"])
tile_11_reps_0 = const()
0.02% N|C tile_11_cast_fp16 = tile(x: ["reshape_22_cast_fp16"], reps: ["tile_11_reps_0"])
reshape_23_shape_0 = const()
0.03% N|C reshape_23_cast_fp16 = ios17.reshape(x: ["tile_11_cast_fp16"], shape: ["reshape_23_shape_0"])
transpose_11_perm_0 = const()
values_23_shape_0 = const()
0.07% N|C transpose_73 = ios17.transpose(x: ["reshape_23_cast_fp16"], perm: ["transpose_11_perm_0"])
0.03% N|C values_23_cast_fp16 = ios17.reshape(shape: ["values_23_shape_0"], x: ["transpose_73"])
mul_5_y_0_to_fp16 = const()
0.03% N|C mul_5_cast_fp16 = ios17.mul(y: ["mul_5_y_0_to_fp16"], x: ["query_float_11_cast_fp16"])
matmul_5_transpose_y_0 = const()
matmul_5_transpose_x_0 = const()
0.07% N|C matmul_5_cast_fp16 = ios17.matmul(transpose_x: ["matmul_5_transpose_x_0"], x: ["mul_5_cast_fp16"], transpose_y: ["matmul_5_transpose_y_0"], y: ["keys_23_cast_fp16"])
0.08% N|C add_11_cast_fp16 = ios17.add(x: ["matmul_5_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_5_axis_0 = const()
0.07% N|C softmax_5_cast_fp16 = ios16.softmax(x: ["add_11_cast_fp16"], axis: ["softmax_5_axis_0"])
attn_output_21_transpose_x_0 = const()
attn_output_21_transpose_y_0 = const()
0.07% N|C attn_output_21_cast_fp16 = ios17.matmul(transpose_x: ["attn_output_21_transpose_x_0"], y: ["values_23_cast_fp16"], x: ["softmax_5_cast_fp16"], transpose_y: ["attn_output_21_transpose_y_0"])
var_872_perm_0 = const()
var_874 = const()
0.07% N|C transpose_72 = ios17.transpose(x: ["attn_output_21_cast_fp16"], perm: ["var_872_perm_0"])
0.03% N|C input_43_cast_fp16 = ios17.reshape(shape: ["var_874"], x: ["transpose_72"])
model_transformer_layers_5_attn_out_proj_weight_to_fp16 = const()
0.24% N|C linear_21_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_5_attn_out_proj_weight_to_fp16"], x: ["input_43_cast_fp16"], bias: ["linear_1_bias_0_to_fp16"])
0.09% N|C x_115_cast_fp16 = ios17.add(x: ["x_99_cast_fp16"], y: ["linear_21_cast_fp16"])
var_10_promoted_to_fp16_23 = const()
0.04% N|C var_881_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_23"], x: ["x_115_cast_fp16"])
var_882 = const()
0.02% N|C var_883_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], axes: ["var_882"], x: ["var_881_cast_fp16"])
var_884_to_fp16 = const()
0.00% N|C var_885_cast_fp16 = ios17.add(x: ["var_883_cast_fp16"], y: ["var_884_to_fp16"])
var_886_epsilon_0 = const()
0.00% N|C var_886_cast_fp16 = ios17.rsqrt(epsilon: ["var_886_epsilon_0"], x: ["var_885_cast_fp16"])
0.04% N|C var_887_cast_fp16 = ios17.mul(y: ["var_886_cast_fp16"], x: ["x_115_cast_fp16"])
model_transformer_layers_5_ffn_norm_weight_to_fp16 = const()
0.04% N|C input_45_cast_fp16 = ios17.mul(y: ["model_transformer_layers_5_ffn_norm_weight_to_fp16"], x: ["var_887_cast_fp16"])
model_transformer_layers_5_ffn_proj_1_weight_to_fp16 = const()
linear_22_bias_0_to_fp16 = const()
0.15% N|C linear_22_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_5_ffn_proj_1_weight_to_fp16"], bias: ["linear_22_bias_0_to_fp16"], x: ["input_45_cast_fp16"])
var_897_split_sizes_0 = const()
var_897_axis_0 = const()
0.10% N|C var_897_cast_fp16_0, var_897_cast_fp16_1 = split(axis: ["var_897_axis_0"], split_sizes: ["var_897_split_sizes_0"], x: ["linear_22_cast_fp16"])
0.07% N|C var_899_cast_fp16 = ios16.silu(x: ["var_897_cast_fp16_0"])
0.12% N|C input_49_cast_fp16 = ios17.mul(x: ["var_899_cast_fp16"], y: ["var_897_cast_fp16_1"])
model_transformer_layers_5_ffn_proj_2_weight_to_fp16 = const()
0.96% N|C linear_23_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], weight: ["model_transformer_layers_5_ffn_proj_2_weight_to_fp16"], x: ["input_49_cast_fp16"])
0.09% N|C x_119_cast_fp16 = ios17.add(y: ["linear_23_cast_fp16"], x: ["x_115_cast_fp16"])
var_10_promoted_to_fp16_24 = const()
0.04% N|C var_910_cast_fp16 = ios17.pow(x: ["x_119_cast_fp16"], y: ["var_10_promoted_to_fp16_24"])
var_911 = const()
0.02% N|C var_912_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_910_cast_fp16"], axes: ["var_911"])
var_913_to_fp16 = const()
0.00% N|C var_914_cast_fp16 = ios17.add(y: ["var_913_to_fp16"], x: ["var_912_cast_fp16"])
var_915_epsilon_0 = const()
0.00% N|C var_915_cast_fp16 = ios17.rsqrt(x: ["var_914_cast_fp16"], epsilon: ["var_915_epsilon_0"])
0.04% N|C var_916_cast_fp16 = ios17.mul(x: ["x_119_cast_fp16"], y: ["var_915_cast_fp16"])
model_transformer_layers_6_attn_norm_weight_to_fp16 = const()
0.04% N|C hidden_states_37_cast_fp16 = ios17.mul(y: ["model_transformer_layers_6_attn_norm_weight_to_fp16"], x: ["var_916_cast_fp16"])
model_transformer_layers_6_attn_qkv_proj_weight_to_fp16 = const()
0.38% N|C linear_24_cast_fp16 = ios17.linear(bias: ["linear_2_bias_0_to_fp16"], weight: ["model_transformer_layers_6_attn_qkv_proj_weight_to_fp16"], x: ["hidden_states_37_cast_fp16"])
var_930 = const()
0.05% N|C qkv_27_cast_fp16 = ios17.reshape(x: ["linear_24_cast_fp16"], shape: ["var_930"])
var_932_perm_0 = const()
var_933 = const()
var_934_axis_0 = const()
0.11% N|C transpose_71 = ios17.transpose(x: ["qkv_27_cast_fp16"], perm: ["var_932_perm_0"])
0.04% N|C var_934_cast_fp16_0, var_934_cast_fp16_1, var_934_cast_fp16_2 = split(x: ["transpose_71"], split_sizes: ["var_933"], axis: ["var_934_axis_0"])
var_10_promoted_to_fp16_25 = const()
0.03% N|C var_940_cast_fp16 = ios17.pow(x: ["var_934_cast_fp16_0"], y: ["var_10_promoted_to_fp16_25"])
var_941 = const()
0.02% N|C var_942_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_940_cast_fp16"], axes: ["var_941"])
var_943_to_fp16 = const()
0.00% N|C var_944_cast_fp16 = ios17.add(y: ["var_943_to_fp16"], x: ["var_942_cast_fp16"])
var_945_epsilon_0 = const()
0.00% N|C var_945_cast_fp16 = ios17.rsqrt(x: ["var_944_cast_fp16"], epsilon: ["var_945_epsilon_0"])
0.03% N|C var_946_cast_fp16 = ios17.mul(x: ["var_934_cast_fp16_0"], y: ["var_945_cast_fp16"])
model_transformer_layers_6_attn_q_norm_weight_to_fp16 = const()
0.03% N|C query_13_cast_fp16 = ios17.mul(y: ["model_transformer_layers_6_attn_q_norm_weight_to_fp16"], x: ["var_946_cast_fp16"])
var_10_promoted_to_fp16_26 = const()
0.01% N|C var_951_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_26"], x: ["var_934_cast_fp16_1"])
var_952 = const()
0.00% N|C var_953_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_951_cast_fp16"], axes: ["var_952"])
var_954_to_fp16 = const()
0.00% N|C var_955_cast_fp16 = ios17.add(x: ["var_953_cast_fp16"], y: ["var_954_to_fp16"])
var_956_epsilon_0 = const()
0.00% N|C var_956_cast_fp16 = ios17.rsqrt(x: ["var_955_cast_fp16"], epsilon: ["var_956_epsilon_0"])
0.01% N|C var_957_cast_fp16 = ios17.mul(x: ["var_934_cast_fp16_1"], y: ["var_956_cast_fp16"])
model_transformer_layers_6_attn_k_norm_weight_to_fp16 = const()
0.01% N|C key_13_cast_fp16 = ios17.mul(y: ["model_transformer_layers_6_attn_k_norm_weight_to_fp16"], x: ["var_957_cast_fp16"])
0.03% N|C var_974_cast_fp16 = ios17.mul(y: ["var_156_to_fp16"], x: ["query_13_cast_fp16"])
var_975_split_sizes_0 = const()
var_975_axis_0 = const()
0.03% N|C var_975_cast_fp16_0, var_975_cast_fp16_1 = split(x: ["query_13_cast_fp16"], split_sizes: ["var_975_split_sizes_0"], axis: ["var_975_axis_0"])
const_48_promoted_to_fp16 = const()
0.02% N|C var_977_cast_fp16 = ios17.mul(y: ["const_48_promoted_to_fp16"], x: ["var_975_cast_fp16_1"])
var_979_interleave_0 = const()
0.01% N|C var_979_cast_fp16 = ios17.concat(interleave: ["var_979_interleave_0"], axis: ["var_5"], values: ["var_977_cast_fp16", "var_975_cast_fp16_0"])
0.03% N|C var_980_cast_fp16 = ios17.mul(x: ["var_979_cast_fp16"], y: ["var_152_to_fp16"])
0.08% N|C query_float_13_cast_fp16 = ios17.add(y: ["var_980_cast_fp16"], x: ["var_974_cast_fp16"])
0.01% N|C var_986_cast_fp16 = ios17.mul(y: ["var_156_to_fp16"], x: ["key_13_cast_fp16"])
var_987_split_sizes_0 = const()
var_987_axis_0 = const()
0.01% N|C var_987_cast_fp16_0, var_987_cast_fp16_1 = split(axis: ["var_987_axis_0"], split_sizes: ["var_987_split_sizes_0"], x: ["key_13_cast_fp16"])
const_49_promoted_to_fp16 = const()
0.00% N|C var_989_cast_fp16 = ios17.mul(x: ["var_987_cast_fp16_1"], y: ["const_49_promoted_to_fp16"])
var_991_interleave_0 = const()
0.00% N|C var_991_cast_fp16 = ios17.concat(interleave: ["var_991_interleave_0"], values: ["var_989_cast_fp16", "var_987_cast_fp16_0"], axis: ["var_5"])
0.01% N|C var_992_cast_fp16 = ios17.mul(y: ["var_152_to_fp16"], x: ["var_991_cast_fp16"])
0.02% N|C key_float_13_cast_fp16 = ios17.add(x: ["var_986_cast_fp16"], y: ["var_992_cast_fp16"])
reshape_24_shape_0 = const()
0.01% N|C reshape_24_cast_fp16 = ios17.reshape(x: ["key_float_13_cast_fp16"], shape: ["reshape_24_shape_0"])
tile_12_reps_0 = const()
0.02% N|C tile_12_cast_fp16 = tile(reps: ["tile_12_reps_0"], x: ["reshape_24_cast_fp16"])
reshape_25_shape_0 = const()
0.03% N|C reshape_25_cast_fp16 = ios17.reshape(x: ["tile_12_cast_fp16"], shape: ["reshape_25_shape_0"])
transpose_12_perm_0 = const()
keys_27_shape_0 = const()
0.07% N|C transpose_70 = ios17.transpose(x: ["reshape_25_cast_fp16"], perm: ["transpose_12_perm_0"])
0.03% N|C keys_27_cast_fp16 = ios17.reshape(shape: ["keys_27_shape_0"], x: ["transpose_70"])
reshape_26_shape_0 = const()
0.01% N|C reshape_26_cast_fp16 = ios17.reshape(shape: ["reshape_26_shape_0"], x: ["var_934_cast_fp16_2"])
tile_13_reps_0 = const()
0.02% N|C tile_13_cast_fp16 = tile(x: ["reshape_26_cast_fp16"], reps: ["tile_13_reps_0"])
reshape_27_shape_0 = const()
0.03% N|C reshape_27_cast_fp16 = ios17.reshape(shape: ["reshape_27_shape_0"], x: ["tile_13_cast_fp16"])
transpose_13_perm_0 = const()
values_27_shape_0 = const()
0.07% N|C transpose_69 = ios17.transpose(perm: ["transpose_13_perm_0"], x: ["reshape_27_cast_fp16"])
0.03% N|C values_27_cast_fp16 = ios17.reshape(x: ["transpose_69"], shape: ["values_27_shape_0"])
mul_6_y_0_to_fp16 = const()
0.03% N|C mul_6_cast_fp16 = ios17.mul(x: ["query_float_13_cast_fp16"], y: ["mul_6_y_0_to_fp16"])
matmul_6_transpose_y_0 = const()
matmul_6_transpose_x_0 = const()
0.07% N|C matmul_6_cast_fp16 = ios17.matmul(transpose_y: ["matmul_6_transpose_y_0"], transpose_x: ["matmul_6_transpose_x_0"], y: ["keys_27_cast_fp16"], x: ["mul_6_cast_fp16"])
0.08% N|C add_13_cast_fp16 = ios17.add(x: ["matmul_6_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_6_axis_0 = const()
0.07% N|C softmax_6_cast_fp16 = ios16.softmax(axis: ["softmax_6_axis_0"], x: ["add_13_cast_fp16"])
attn_output_25_transpose_x_0 = const()
attn_output_25_transpose_y_0 = const()
0.07% N|C attn_output_25_cast_fp16 = ios17.matmul(transpose_y: ["attn_output_25_transpose_y_0"], transpose_x: ["attn_output_25_transpose_x_0"], x: ["softmax_6_cast_fp16"], y: ["values_27_cast_fp16"])
var_1008_perm_0 = const()
var_1010 = const()
0.07% N|C transpose_68 = ios17.transpose(perm: ["var_1008_perm_0"], x: ["attn_output_25_cast_fp16"])
0.03% N|C input_51_cast_fp16 = ios17.reshape(x: ["transpose_68"], shape: ["var_1010"])
model_transformer_layers_6_attn_out_proj_weight_to_fp16 = const()
0.24% N|C linear_25_cast_fp16 = ios17.linear(weight: ["model_transformer_layers_6_attn_out_proj_weight_to_fp16"], x: ["input_51_cast_fp16"], bias: ["linear_1_bias_0_to_fp16"])
0.09% N|C x_135_cast_fp16 = ios17.add(x: ["x_119_cast_fp16"], y: ["linear_25_cast_fp16"])
var_10_promoted_to_fp16_27 = const()
0.04% N|C var_1017_cast_fp16 = ios17.pow(x: ["x_135_cast_fp16"], y: ["var_10_promoted_to_fp16_27"])
var_1018 = const()
0.02% N|C var_1019_cast_fp16 = ios16.reduce_mean(x: ["var_1017_cast_fp16"], keep_dims: ["var_22"], axes: ["var_1018"])
var_1020_to_fp16 = const()
0.00% N|C var_1021_cast_fp16 = ios17.add(y: ["var_1020_to_fp16"], x: ["var_1019_cast_fp16"])
var_1022_epsilon_0 = const()
0.00% N|C var_1022_cast_fp16 = ios17.rsqrt(x: ["var_1021_cast_fp16"], epsilon: ["var_1022_epsilon_0"])
0.04% N|C var_1023_cast_fp16 = ios17.mul(y: ["var_1022_cast_fp16"], x: ["x_135_cast_fp16"])
model_transformer_layers_6_ffn_norm_weight_to_fp16 = const()
0.04% N|C input_53_cast_fp16 = ios17.mul(x: ["var_1023_cast_fp16"], y: ["model_transformer_layers_6_ffn_norm_weight_to_fp16"])
model_transformer_layers_6_ffn_proj_1_weight_to_fp16 = const()
linear_26_bias_0_to_fp16 = const()
0.15% N|C linear_26_cast_fp16 = ios17.linear(x: ["input_53_cast_fp16"], weight: ["model_transformer_layers_6_ffn_proj_1_weight_to_fp16"], bias: ["linear_26_bias_0_to_fp16"])
var_1033_split_sizes_0 = const()
var_1033_axis_0 = const()
0.13% N|C var_1033_cast_fp16_0, var_1033_cast_fp16_1 = split(axis: ["var_1033_axis_0"], x: ["linear_26_cast_fp16"], split_sizes: ["var_1033_split_sizes_0"])
0.09% N|C var_1035_cast_fp16 = ios16.silu(x: ["var_1033_cast_fp16_0"])
0.15% N|C input_57_cast_fp16 = ios17.mul(x: ["var_1035_cast_fp16"], y: ["var_1033_cast_fp16_1"])
model_transformer_layers_6_ffn_proj_2_weight_to_fp16 = const()
1.50% N|C linear_27_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], x: ["input_57_cast_fp16"], weight: ["model_transformer_layers_6_ffn_proj_2_weight_to_fp16"])
0.09% N|C x_139_cast_fp16 = ios17.add(y: ["linear_27_cast_fp16"], x: ["x_135_cast_fp16"])
var_10_promoted_to_fp16_28 = const()
0.04% N|C var_1046_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_28"], x: ["x_139_cast_fp16"])
var_1047 = const()
0.02% N|C var_1048_cast_fp16 = ios16.reduce_mean(keep_dims: ["var_22"], x: ["var_1046_cast_fp16"], axes: ["var_1047"])
var_1049_to_fp16 = const()
0.00% N|C var_1050_cast_fp16 = ios17.add(x: ["var_1048_cast_fp16"], y: ["var_1049_to_fp16"])
var_1051_epsilon_0 = const()
0.00% N|C var_1051_cast_fp16 = ios17.rsqrt(x: ["var_1050_cast_fp16"], epsilon: ["var_1051_epsilon_0"])
0.04% N|C var_1052_cast_fp16 = ios17.mul(x: ["x_139_cast_fp16"], y: ["var_1051_cast_fp16"])
model_transformer_layers_7_attn_norm_weight_to_fp16 = const()
0.04% N|C hidden_states_43_cast_fp16 = ios17.mul(y: ["model_transformer_layers_7_attn_norm_weight_to_fp16"], x: ["var_1052_cast_fp16"])
model_transformer_layers_7_attn_qkv_proj_weight_to_fp16 = const()
0.38% N|C linear_28_cast_fp16 = ios17.linear(bias: ["linear_2_bias_0_to_fp16"], weight: ["model_transformer_layers_7_attn_qkv_proj_weight_to_fp16"], x: ["hidden_states_43_cast_fp16"])
var_1066 = const()
0.05% N|C qkv_31_cast_fp16 = ios17.reshape(x: ["linear_28_cast_fp16"], shape: ["var_1066"])
var_1068_perm_0 = const()
var_1069 = const()
var_1070_axis_0 = const()
0.11% N|C transpose_67 = ios17.transpose(perm: ["var_1068_perm_0"], x: ["qkv_31_cast_fp16"])
0.04% N|C var_1070_cast_fp16_0, var_1070_cast_fp16_1, var_1070_cast_fp16_2 = split(axis: ["var_1070_axis_0"], x: ["transpose_67"], split_sizes: ["var_1069"])
var_10_promoted_to_fp16_29 = const()
0.03% N|C var_1076_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_29"], x: ["var_1070_cast_fp16_0"])
var_1077 = const()
0.02% N|C var_1078_cast_fp16 = ios16.reduce_mean(x: ["var_1076_cast_fp16"], keep_dims: ["var_22"], axes: ["var_1077"])
var_1079_to_fp16 = const()
0.00% N|C var_1080_cast_fp16 = ios17.add(y: ["var_1079_to_fp16"], x: ["var_1078_cast_fp16"])
var_1081_epsilon_0 = const()
0.00% N|C var_1081_cast_fp16 = ios17.rsqrt(epsilon: ["var_1081_epsilon_0"], x: ["var_1080_cast_fp16"])
0.03% N|C var_1082_cast_fp16 = ios17.mul(y: ["var_1081_cast_fp16"], x: ["var_1070_cast_fp16_0"])
model_transformer_layers_7_attn_q_norm_weight_to_fp16 = const()
0.03% N|C query_15_cast_fp16 = ios17.mul(y: ["model_transformer_layers_7_attn_q_norm_weight_to_fp16"], x: ["var_1082_cast_fp16"])
var_10_promoted_to_fp16_30 = const()
0.01% N|C var_1087_cast_fp16 = ios17.pow(y: ["var_10_promoted_to_fp16_30"], x: ["var_1070_cast_fp16_1"])
var_1088 = const()
0.00% N|C var_1089_cast_fp16 = ios16.reduce_mean(x: ["var_1087_cast_fp16"], keep_dims: ["var_22"], axes: ["var_1088"])
var_1090_to_fp16 = const()
0.00% N|C var_1091_cast_fp16 = ios17.add(y: ["var_1090_to_fp16"], x: ["var_1089_cast_fp16"])
var_1092_epsilon_0 = const()
0.00% N|C var_1092_cast_fp16 = ios17.rsqrt(x: ["var_1091_cast_fp16"], epsilon: ["var_1092_epsilon_0"])
0.01% N|C var_1093_cast_fp16 = ios17.mul(x: ["var_1070_cast_fp16_1"], y: ["var_1092_cast_fp16"])
model_transformer_layers_7_attn_k_norm_weight_to_fp16 = const()
0.01% N|C key_15_cast_fp16 = ios17.mul(y: ["model_transformer_layers_7_attn_k_norm_weight_to_fp16"], x: ["var_1093_cast_fp16"])
0.03% N|C var_1110_cast_fp16 = ios17.mul(y: ["var_156_to_fp16"], x: ["query_15_cast_fp16"])
var_1111_split_sizes_0 = const()
var_1111_axis_0 = const()
0.03% N|C var_1111_cast_fp16_0, var_1111_cast_fp16_1 = split(x: ["query_15_cast_fp16"], axis: ["var_1111_axis_0"], split_sizes: ["var_1111_split_sizes_0"])
const_55_promoted_to_fp16 = const()
0.02% N|C var_1113_cast_fp16 = ios17.mul(y: ["const_55_promoted_to_fp16"], x: ["var_1111_cast_fp16_1"])
var_1115_interleave_0 = const()
0.01% N|C var_1115_cast_fp16 = ios17.concat(axis: ["var_5"], values: ["var_1113_cast_fp16", "var_1111_cast_fp16_0"], interleave: ["var_1115_interleave_0"])
0.03% N|C var_1116_cast_fp16 = ios17.mul(x: ["var_1115_cast_fp16"], y: ["var_152_to_fp16"])
0.08% N|C query_float_15_cast_fp16 = ios17.add(x: ["var_1110_cast_fp16"], y: ["var_1116_cast_fp16"])
0.01% N|C var_1122_cast_fp16 = ios17.mul(y: ["var_156_to_fp16"], x: ["key_15_cast_fp16"])
var_1123_split_sizes_0 = const()
var_1123_axis_0 = const()
0.01% N|C var_1123_cast_fp16_0, var_1123_cast_fp16_1 = split(x: ["key_15_cast_fp16"], axis: ["var_1123_axis_0"], split_sizes: ["var_1123_split_sizes_0"])
const_56_promoted_to_fp16 = const()
0.00% N|C var_1125_cast_fp16 = ios17.mul(y: ["const_56_promoted_to_fp16"], x: ["var_1123_cast_fp16_1"])
var_1127_interleave_0 = const()
0.00% N|C var_1127_cast_fp16 = ios17.concat(values: ["var_1125_cast_fp16", "var_1123_cast_fp16_0"], axis: ["var_5"], interleave: ["var_1127_interleave_0"])
0.01% N|C var_1128_cast_fp16 = ios17.mul(y: ["var_152_to_fp16"], x: ["var_1127_cast_fp16"])
0.02% N|C key_float_15_cast_fp16 = ios17.add(x: ["var_1122_cast_fp16"], y: ["var_1128_cast_fp16"])
reshape_28_shape_0 = const()
0.01% N|C reshape_28_cast_fp16 = ios17.reshape(x: ["key_float_15_cast_fp16"], shape: ["reshape_28_shape_0"])
tile_14_reps_0 = const()
0.02% N|C tile_14_cast_fp16 = tile(x: ["reshape_28_cast_fp16"], reps: ["tile_14_reps_0"])
reshape_29_shape_0 = const()
0.03% N|C reshape_29_cast_fp16 = ios17.reshape(shape: ["reshape_29_shape_0"], x: ["tile_14_cast_fp16"])
transpose_14_perm_0 = const()
keys_31_shape_0 = const()
0.07% N|C transpose_66 = ios17.transpose(perm: ["transpose_14_perm_0"], x: ["reshape_29_cast_fp16"])
0.03% N|C keys_31_cast_fp16 = ios17.reshape(shape: ["keys_31_shape_0"], x: ["transpose_66"])
reshape_30_shape_0 = const()
0.01% N|C reshape_30_cast_fp16 = ios17.reshape(shape: ["reshape_30_shape_0"], x: ["var_1070_cast_fp16_2"])
tile_15_reps_0 = const()
0.02% N|C tile_15_cast_fp16 = tile(x: ["reshape_30_cast_fp16"], reps: ["tile_15_reps_0"])
reshape_31_shape_0 = const()
0.03% N|C reshape_31_cast_fp16 = ios17.reshape(shape: ["reshape_31_shape_0"], x: ["tile_15_cast_fp16"])
transpose_15_perm_0 = const()
values_31_shape_0 = const()
0.07% N|C transpose_65 = ios17.transpose(x: ["reshape_31_cast_fp16"], perm: ["transpose_15_perm_0"])
0.03% N|C values_31_cast_fp16 = ios17.reshape(shape: ["values_31_shape_0"], x: ["transpose_65"])
mul_7_y_0_to_fp16 = const()
0.03% N|C mul_7_cast_fp16 = ios17.mul(x: ["query_float_15_cast_fp16"], y: ["mul_7_y_0_to_fp16"])
matmul_7_transpose_y_0 = const()
matmul_7_transpose_x_0 = const()
0.07% N|C matmul_7_cast_fp16 = ios17.matmul(x: ["mul_7_cast_fp16"], transpose_x: ["matmul_7_transpose_x_0"], transpose_y: ["matmul_7_transpose_y_0"], y: ["keys_31_cast_fp16"])
0.08% N|C add_15_cast_fp16 = ios17.add(x: ["matmul_7_cast_fp16"], y: ["causal_mask_3_to_fp16"])
softmax_7_axis_0 = const()
0.07% N|C softmax_7_cast_fp16 = ios16.softmax(x: ["add_15_cast_fp16"], axis: ["softmax_7_axis_0"])
attn_output_29_transpose_x_0 = const()
attn_output_29_transpose_y_0 = const()
0.07% N|C attn_output_29_cast_fp16 = ios17.matmul(x: ["softmax_7_cast_fp16"], y: ["values_31_cast_fp16"], transpose_y: ["attn_output_29_transpose_y_0"], transpose_x: ["attn_output_29_transpose_x_0"])
var_1144_perm_0 = const()
var_1146 = const()
0.07% N|C transpose_64 = ios17.transpose(perm: ["var_1144_perm_0"], x: ["attn_output_29_cast_fp16"])
0.03% N|C input_59_cast_fp16 = ios17.reshape(x: ["transpose_64"], shape: ["var_1146"])
model_transformer_layers_7_attn_out_proj_weight_to_fp16 = const()
0.24% N|C linear_29_cast_fp16 = ios17.linear(x: ["input_59_cast_fp16"], bias: ["linear_1_bias_0_to_fp16"], weight: ["model_transformer_layers_7_attn_out_proj_weight_to_fp16"])
0.09% N|C x_155_cast_fp16 = ios17.add(x: ["x_139_cast_fp16"], y: ["linear_29_cast_fp16"])
var_10_promoted_to_fp16_31 = const()
0.04% N|C var_1153_cast_fp16 = ios17.pow(x: ["x_155_cast_fp16"], y: ["var_10_promoted_to_fp16_31"])
var_1154 = const()
0.02% N|C var_1155_cast_fp16 = ios16.reduce_mean(x: ["var_1153_cast_fp16"], axes: ["var_1154"], keep_dims: ["var_22"])
var_1156_to_fp16 = const()
0.00% N|C var_1157_cast_fp16 = ios17.add(y: ["var_1156_to_fp16"], x: ["var_1155_cast_fp16"])
var_1158_epsilon_0 = const()
0.00% N|C var_1158_cast_fp16 = ios17.rsqrt(x: ["var_1157_cast_fp16"], epsilon: ["var_1158_epsilon_0"])
0.04% N|C var_1159_cast_fp16 = ios17.mul(y: ["var_1158_cast_fp16"], x: ["x_155_cast_fp16"])
model_transformer_layers_7_ffn_norm_weight_to_fp16 = const()
0.04% N|C input_61_cast_fp16 = ios17.mul(x: ["var_1159_cast_fp16"], y: ["model_transformer_layers_7_ffn_norm_weight_to_fp16"])
model_transformer_layers_7_ffn_proj_1_weight_to_fp16 = const()
linear_30_bias_0_to_fp16 = const()
0.15% N|C linear_30_cast_fp16 = ios17.linear(bias: ["linear_30_bias_0_to_fp16"], weight: ["model_transformer_layers_7_ffn_proj_1_weight_to_fp16"], x: ["input_61_cast_fp16"])
var_1169_split_sizes_0 = const()
var_1169_axis_0 = const()
0.14% N|C var_1169_cast_fp16_0, var_1169_cast_fp16_1 = split(axis: ["var_1169_axis_0"], split_sizes: ["var_1169_split_sizes_0"], x: ["linear_30_cast_fp16"])
0.09% N|C var_1171_cast_fp16 = ios16.silu(x: ["var_1169_cast_fp16_0"])
0.17% N|C input_65_cast_fp16 = ios17.mul(y: ["var_1169_cast_fp16_1"], x: ["var_1171_cast_fp16"])
model_transformer_layers_7_ffn_proj_2_weight_to_fp16 = const()
1.82% N|C linear_31_cast_fp16 = ios17.linear(bias: ["linear_1_bias_0_to_fp16"], weight: ["model_transformer_layers_7_ffn_proj_2_weight_to_fp16"], x: ["input_65_cast_fp16"])
0.09% N|C x_159_cast_fp16 = ios17.add(y: ["linear_31_cast_fp16"], x: ["x_155_cast_fp16"])
var_10_promoted_to_fp16_32 = const()
0.04% N|C var_1182_cast_fp16 = ios17.pow(x: ["x_159_cast_fp16"], y: ["var_10_promoted_to_fp16_32"])
var_1183 = const()
0.02% N|C var_1184_cast_fp16 = ios16.reduce_mean(axes: ["var_1183"], keep_dims: ["var_22"], x: ["var_1182_cast_fp16"])
var_1185_to_fp16 = const()