From 32bd8eafeab5504f5cc5703f0b0efeddb674927d Mon Sep 17 00:00:00 2001 From: KyleShao1016 Date: Sun, 8 Jun 2025 21:12:07 -0700 Subject: [PATCH] Implement 1p1d --- .vscode/settings.json | 3 + .../__pycache__/cluster.cpython-312.pyc | Bin 0 -> 9100 bytes .../__pycache__/device.cpython-312.pyc | Bin 0 -> 746 bytes .../cluster/__pycache__/gpu.cpython-312.pyc | Bin 0 -> 3700 bytes .../__pycache__/plan.cpython-312.pyc | Bin 0 -> 1243 bytes .../ir/__pycache__/block.cpython-312.pyc | Bin 0 -> 620 bytes apex_plus/ir/__pycache__/cell.cpython-312.pyc | Bin 0 -> 1707 bytes apex_plus/ir/__pycache__/task.cpython-312.pyc | Bin 0 -> 1413 bytes .../__pycache__/transformer.cpython-312.pyc | Bin 0 -> 3451 bytes .../__pycache__/attention.cpython-312.pyc | Bin 0 -> 9541 bytes .../__pycache__/embedding.cpython-312.pyc | Bin 0 -> 656 bytes .../ir/cells/__pycache__/ffn.cpython-312.pyc | Bin 0 -> 7610 bytes .../cells/__pycache__/sampler.cpython-312.pyc | Bin 0 -> 650 bytes .../__pycache__/attention.cpython-312.pyc | Bin 0 -> 5586 bytes .../ir/tasks/__pycache__/ffn.cpython-312.pyc | Bin 0 -> 5538 bytes .../__pycache__/CLIP_vision.cpython-312.pyc | Bin 0 -> 3299 bytes .../models/__pycache__/bloom.cpython-312.pyc | Bin 0 -> 2926 bytes .../models/__pycache__/gpt2.cpython-312.pyc | Bin 0 -> 2882 bytes .../__pycache__/gpt_bigcode.cpython-312.pyc | Bin 0 -> 3299 bytes .../__pycache__/gpt_neox.cpython-312.pyc | Bin 0 -> 3321 bytes .../models/__pycache__/gptj.cpython-312.pyc | Bin 0 -> 2771 bytes .../models/__pycache__/llama.cpython-312.pyc | Bin 0 -> 2879 bytes .../models/__pycache__/llama3.cpython-312.pyc | Bin 0 -> 3201 bytes .../__pycache__/mistral.cpython-312.pyc | Bin 0 -> 3316 bytes .../__pycache__/mixtral.cpython-312.pyc | Bin 0 -> 4100 bytes .../models/__pycache__/model.cpython-312.pyc | Bin 0 -> 838 bytes .../models/__pycache__/moe.cpython-312.pyc | Bin 0 -> 3929 bytes .../models/__pycache__/opt.cpython-312.pyc | Bin 0 -> 2910 bytes .../__pycache__/registry.cpython-312.pyc | Bin 0 -> 2581 bytes .../models/__pycache__/t5.cpython-312.pyc | Bin 0 -> 4218 bytes .../__pycache__/whisper.cpython-312.pyc | Bin 0 -> 4262 bytes .../parallel/__pycache__/comm.cpython-312.pyc | Bin 0 -> 2161 bytes .../__pycache__/reshard.cpython-312.pyc | Bin 0 -> 5575 bytes .../__pycache__/schedule.cpython-312.pyc | Bin 0 -> 6061 bytes .../__pycache__/task_parallel.cpython-312.pyc | Bin 0 -> 3343 bytes .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 963 bytes .../__pycache__/attention.cpython-312.pyc | Bin 0 -> 3378 bytes .../__pycache__/default.cpython-312.pyc | Bin 0 -> 2298 bytes .../templates/__pycache__/ffn.cpython-312.pyc | Bin 0 -> 2246 bytes .../search/__pycache__/engine.cpython-312.pyc | Bin 0 -> 14193 bytes apex_plus/search/engine.py | 15 +- .../__pycache__/comm_profile.cpython-312.pyc | Bin 0 -> 5911 bytes .../__pycache__/comp_profile.cpython-312.pyc | Bin 0 -> 11518 bytes .../__pycache__/simulator.cpython-312.pyc | Bin 0 -> 34613 bytes .../simulator_origin.cpython-312.pyc | Bin 0 -> 32236 bytes .../__pycache__/trace.cpython-312.pyc | Bin 0 -> 3661 bytes apex_plus/simulator/simulator.py | 799 +++++++-------- apex_plus/simulator/simulator_origin.py | 914 ++++++++++++++++++ .../utils/__pycache__/dtype.cpython-312.pyc | Bin 0 -> 1753 bytes main.py | 21 +- 50 files changed, 1286 insertions(+), 466 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 apex_plus/cluster/__pycache__/cluster.cpython-312.pyc create mode 100644 apex_plus/cluster/__pycache__/device.cpython-312.pyc create mode 100644 apex_plus/cluster/__pycache__/gpu.cpython-312.pyc create mode 100644 apex_plus/execution/__pycache__/plan.cpython-312.pyc create mode 100644 apex_plus/ir/__pycache__/block.cpython-312.pyc create mode 100644 apex_plus/ir/__pycache__/cell.cpython-312.pyc create mode 100644 apex_plus/ir/__pycache__/task.cpython-312.pyc create mode 100644 apex_plus/ir/__pycache__/transformer.cpython-312.pyc create mode 100644 apex_plus/ir/cells/__pycache__/attention.cpython-312.pyc create mode 100644 apex_plus/ir/cells/__pycache__/embedding.cpython-312.pyc create mode 100644 apex_plus/ir/cells/__pycache__/ffn.cpython-312.pyc create mode 100644 apex_plus/ir/cells/__pycache__/sampler.cpython-312.pyc create mode 100644 apex_plus/ir/tasks/__pycache__/attention.cpython-312.pyc create mode 100644 apex_plus/ir/tasks/__pycache__/ffn.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/CLIP_vision.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/bloom.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/gpt2.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/gpt_bigcode.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/gpt_neox.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/gptj.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/llama.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/llama3.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/mistral.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/mixtral.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/model.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/moe.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/opt.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/registry.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/t5.cpython-312.pyc create mode 100644 apex_plus/models/__pycache__/whisper.cpython-312.pyc create mode 100644 apex_plus/parallel/__pycache__/comm.cpython-312.pyc create mode 100644 apex_plus/parallel/__pycache__/reshard.cpython-312.pyc create mode 100644 apex_plus/parallel/__pycache__/schedule.cpython-312.pyc create mode 100644 apex_plus/parallel/__pycache__/task_parallel.cpython-312.pyc create mode 100644 apex_plus/parallel/templates/__pycache__/__init__.cpython-312.pyc create mode 100644 apex_plus/parallel/templates/__pycache__/attention.cpython-312.pyc create mode 100644 apex_plus/parallel/templates/__pycache__/default.cpython-312.pyc create mode 100644 apex_plus/parallel/templates/__pycache__/ffn.cpython-312.pyc create mode 100644 apex_plus/search/__pycache__/engine.cpython-312.pyc create mode 100644 apex_plus/simulator/__pycache__/comm_profile.cpython-312.pyc create mode 100644 apex_plus/simulator/__pycache__/comp_profile.cpython-312.pyc create mode 100644 apex_plus/simulator/__pycache__/simulator.cpython-312.pyc create mode 100644 apex_plus/simulator/__pycache__/simulator_origin.cpython-312.pyc create mode 100644 apex_plus/simulator/__pycache__/trace.cpython-312.pyc create mode 100644 apex_plus/simulator/simulator_origin.py create mode 100644 apex_plus/utils/__pycache__/dtype.cpython-312.pyc diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..e8aab8b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "workbench.colorTheme": "Default Dark Modern" +} \ No newline at end of file diff --git a/apex_plus/cluster/__pycache__/cluster.cpython-312.pyc b/apex_plus/cluster/__pycache__/cluster.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dfda0e3c43d9aa2275e256f4d333642165988cda GIT binary patch literal 9100 zcmbU`TWlLwc6T_#hxial4Mj@SYb05bDBH4S%XZ>gvbB<}#J1$PcCwC&EJ2eaSvEx} zGn5mtRAOZPAt%9_+Ah{KwhL?91tKFZqM#qu_NVzyWW13J=2Tg&x=m#CSXgBCb z&$%;04rw~rb^us)O%@2Amm@LqdlAoY##&Q7KxAu6D70E zBr|K6G%(mVMA=zxlA|yiHO`tQO$;%RF%sdflZf#PgR;V;dC?RwKgCYKkmpX%$nm`8 z%zS)iE*6Rgn7nCJygCyW0bD$SaDdM@#eW7k+l2>Pu zw|W)B67lLVJXJA#I*j_N7!5iM@1iBpm^Y6^7v#7oCF)Pr zl;n+3@v0cjTVOI8k;GWu9HGLH^EI)B*Y6-|;! zl3;NK0=mENz)HeCyiHt1Bv;}hdgj(k-%!Y zw`gF*fOrb0|A*bdX(%*{B*0Eccytn;Wxyn%O-r~h$&QT!nmii?;X0?qco5~P3OQiT zbFvtns!HKTM5-4WdfrI8vKbZ>l;`{7d)|^oN#1j1F)GTJLUVgYMfpm6Zhp_3i}6cy zu|1)A@q^$z=$sBxRdA1{it~$kOE5SSn~4X5374w2ecA}_M*b3-Yot(1yiLp8XVzSU z0Dq30uND66xyBCotNqqiyBxo{bYp4L*0Rdnx3zEa-c)X<)TqW+7NXgG}p+J+UE56JCnC3)2G*L>!W`& z{?+*U(E?#c7+=oUghEKgR)-5Lia{3o1Vyd?h^{KLEF0Vl3y#U1lP{P@R?# zQO~JEQ*Y|F67`xUl?`_x5!%Hyl5_GmKfLi_#?ta4H)v0k8wr4yaU-=d<@+}Iy&3!7Coq0w<;W_tf@=K(Co%-9(G$_@HjLq;?eFpr z?pl)lIV^PBq7iu`*(~j>4`I+eVAWrBRz0`0r&r{Ez+3On{SI04g@X$_umAl^>Z*_+G+7 zKTOJppt(l2sziva_vFUt-;Mw6_@?*dCVw(xKl!aLY-H>^sA}YySXeJX3#>=A<3C^& zjDdmt1fxB;=V_)wih4RA!$*_ik|AkO;=lCMBvYnicmx%XMny9rW|93v@smsfcvymz zd5Mc-5TZ|#(bj`}<`~Rlwo~P;Nn?^tfHo(M5hekYO3#n^4|lnky(w#A8%KvlA6O3^ zZN-x`-Q%>1+%tZmI!oPYjFe8J;tb$? zs5Gs(1K4R5Efy>tXRl2>by{JUX+3* zVrvnU^URF$X1|`e0^f&oG^Z$^P~1`j&X!iBC`!JC4m0mkQs@$bjC)bCXSJnymb4p& z6K>6k74gw?$$t%=8*(h0J;i)-Fz0DV9b6v&*5^+T+!?$*xakWl+jBNwy8phd`H9o@ zi~Yr{_r7xnWM5l7JJ&|PJbm}{X3uLIXR|#+8GBc*v1$3_R=q#n|H}(0Cg*Rxv+MS* zoTon1(4Y16ubP)4m3Ue)i+0qecHMWh=A2FGP}bSD+OIH?3mn*RX9I^b_E*%r3vzU4d#<dV7@x8bO-tiMy|OoWdRN} z4!1l_xrXLD*4x%xdmz{G+H*5k@BL5KchM&Jw$*k>yXhoUi7)71b55s!WqJBQmLJF*7~kY4 zGWH22C9zD;F9hR@^J3nn^L1f0m;lFwU=m8z?J5Ab;Kpo8pEMXW0YSikQVx=XA}w6f zFj|m?#DKkAOlNUffhX<9cW&CLd*Yt{mo~bJL3n zzb42R=RN{*8KOIgvS24oEz8`N+nf5+uic%?!&`OrsZhFMv#w)#BxiTsJayw#ntf<* zE*MGOj%VN}__TX% z2Imw_;6R5ArQ`-Q!IT&pk;G736hg(U;r*HTC1GkI8r?k$_ea9S+oxw@SJ3pO&~7|V zAv_n0iDAq(v~&VX8|nd`qL zs&3LzfMir#MctrPG|oe{Dt0vrT$XgMxlHFOX3=-ZKZ6Fe&PBSqmn~bq=JY#X`?{CM za;?JhC`92*)1I&E_okRFZ)3VUv*XpQ|J9BDtpCWyrA_Zxip{w^w`?o6^vFY(01D~t z2gl*4zj@}yncE|)p>@ySyB{=#}wYsv9V>EV?>%Gg`K z;l1gzYYiLkJmAMN_Aw6O(9JW^+of^&t~1crQ-k!ni{3D;8WugI^9YWp!mv*?o3 zD9(Vw7Eqye3aeA*o2IZ~nuHCsE;W@ZPKUjTw8UH zqjAzk6`M_(K^@h|x@3v{tSW_xQgl)ZPNRbl37nslsEgz9SwKHNZMbLAz2V)eZ>XVf z7^iDk|KRtSUiO}f6-*JE9<#_PeB98F>p9iKa4yQ7dDYr3sdppay~G_Txe2-hJLU$E zZ>Z2^yvnV3mTHj0WKD#nGDnx;ClIeXCs7>sv?mXTcc|&}e!+`jYMl6j$ z^AvUcDSG5Y`x~*Vq3BFR00(|il!Un{0lqxR0+a~?`hvqki3|!BWSb@!((3?CXd~3) zyn&0N1w5s347E!dg$9C|_3cn}K|C%=bCS*@(fbSxcXU}OfnMg8q~nN$!H@c-yg95? zRry-&HaAEwcx7a-=%!RZr9{U0@QhgUQst4cZ0-&$A>X4#2KXquu$s);_HWfS=Un?U zhfZYvh!TXfEvF9e@Z_Bjpf6zFvZDc*Q-!u-ShM%*8U} zXc#4m4!S~#qTqHS19iwip@+CzQVTh|2fcNA?4f-}VVprPJ;79W(@$EuRztV{rV?#`*Qe7Y!)ebd#D@+~1j7Uqw~$(Iy5YW~o#O4x z@;edF-&4n85 z!3BY@fY?xoSZlW(2=#JCYq}Fo%V;gw8Dnc&-X`!@FmcAt^a&jA#PLIGBRGBt$GcW} z9PcXF4*>rD0)fBPSJy_@8}FXUb{^Trej&y4alxAAp`5+DE+unjW>Eu^iZP(wj0Qri$z$?51T zl>U(M9fckNm7anMJp~nd1XX%EDlv3{CgTu&Km4oY2r1MEDZ~I#+GhrU6d9uACIXhc z2}% zpW?=in?_LZ2ZB8y%*uPaJK#dR(C3_6k9*Ip{@m~P7{*t4y#9{-lagaNt*9QL*fEQ} zV3KW_2Dm_qU1L-2P+a^RDKgz2C!j2ilCO+^zU+j4~T4d&B zlE*4hsZsJdKn63rbhL_BKqG>SpsB`WGP#@kZ?ppKUly^_>mc*zN?)2R_g@ueouz(| zt1FSm8~v9GF*OSO#_UC2lpP_WG%`Y551XMbjtkUB)0^M0+a4QEzjV$k%veZ+9OFMj zK?e^(7h%TX6iL(J37jF8qqoY4L?sy%A`J2%j7)L;sCDp5Rk{WP#`tkKGLuzVKnFY>Tmm5<}@>sXH>pX3|J%jtWR`(gA RIM|%?-&T)Le=&sm!XMtstu6on literal 0 HcmV?d00001 diff --git a/apex_plus/cluster/__pycache__/gpu.cpython-312.pyc b/apex_plus/cluster/__pycache__/gpu.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3713a9980effe308d79ea3961777f276f889f752 GIT binary patch literal 3700 zcmb_eT}&I<6~6Q1v9UdNg28}+#Qbcs4j2bE+UBREg+Ku9@}od@7jLyw*E1moW4qVm zP+LTzRVtBct0dY7K)b7@+J_CwLw{b{K6PLERH?#BE37G%klH@vZMLK@dFnZ1j|Wm` z_oY{wbH97ex#ym9@4090pG46~pk287o!cJ*`Ueio=Bj|h->`&i5ShqKl0>m*lgu(3 zWf{a=l3V7ZJYYU*W62niZTE;QC}h!X8nXH)Tp360ihc2f#WxXf>rHI+z-_QbCX(%+ za8XxnJF-Ku6>NmmwkOwBrM2uj%hCiFb-%(ou3E!S8TD&fbb~Fiwk?HUY%?RWdRYYi zDQj#Z*^^*vul));-dET;0sNq8)#~Mw39fbw|3CY%7bk1)g~=6R?8{CR2S+Qg{T|K`}RCXVBH_Pt5z;&~W)#s@0H{tNwM z!#m^<-G4C@xtUdHHnNmYD%snybYw)yE~)8EWG1iPPNyQVjPfHXlgwq0pg0Uwg+>-K zxxP%E3Sh?|bpWzGK<*LULHws5&TY&;n*ZqiwW(4=`@@lqiANJ3y|p%3YCL61emq67 z02s4FT)_w-j3I^e+KZS^ZO4@w_U4%UI2vr@u&TZsB=)*m(~%GzWF=l-hDrt3`j6EE>NS zQ&R3L`gW76R(}Ftt6%j-t(MoX)pwcp^CZMh6~vpVrBr$)6@)Fm91OWBx}n)}nT$ed z0I?{6NV+jPHZePQV_tK<6HDflQA*QPb3m?3#v%9(B`hG)+_W?~Iyrq~UV{7OR?})M zDJ?6@X`0u(hStpJ&|A{=x2I=jHD^YNElIbM=}cB@vV<^S8XlS&d3R!DZd|K3l)!}a zgIG#lNyzH$5U253C3(y6)^Ib6CmIX}=}Gh%szf4{P$jA02Iu?C(HO|Gc_3>-=9F%?B?3C(A`w&whQ|=H18dKc3uezx=fR%C7s$ ze)DVFOU32^m@DqL4gBiDZwG%pSZo`@X)XJK(Btl3hIhinz(r8-?ss1LUH2ctzYiBX z$JhD4I!+rGJ6_2baD))VMyBFsL#C2N!2Um??&5KLY@d9JoWZeES$4Js_#x z7f`M=kZ>TuyZBNA2@jI$y|Tn0vAxoo@TqH-gzQrV55)F*wH^!>JiWmqC1J4O?zIwY zu+UH*9m}A>g4la>$_u79#9Qe^yTK)T21yT)qni!6MU@P-a;rRwqT2U>tdRpx+veP7 zA8dWF+t#ZSCfr(TJ-yf3Q*72V^vv|z(_`}?mYzqIAdoDczj^)Kn0D~HOefO|`N9b^5lv1U z_0jGc`KK?q?fatT7cHN+7JcD$$G*3F`@N#KZ`aXh#2Ru^EQk~z8Wc;N#wAj!#${DX zUq@aqk_ZaiSv966;>!yBp=J68^1Vo~j2cyeHX}kZ3STw`1b)f9$2P$n`24yJfi4h# zi*7fdgE&2hbp{MFwvO%DLjs*H*lo_uu0sNyUQdL2on>v2%`Tllx83z`><&5Le}(+= z>bPd8fjXhnnyYVzzRHWCubhVzPfBaF#5pyQ%=XDxUn!oE#$iR}N8xc(eqG=Xg^Eb< zI+u^D1JM8w%?>4YXzq=H3g~}R6AaEp+J!o=Au)F3_%6M0e7pRtx=OEt3Ran{1XSl3 zhWQ(D{fqScot!-o#5L}@ATS;4@`J?v#B%~r50Iu)d;Z>{zqi!bw%2&B*m$nwZ!dGc z67471H4$`d?ma$G|Mqz^;SkR`9gxxu=7Pte^Q0fzC*j{{lcf?s@7A&D8WSK{Gt$TKDrj=1 zk?Je@;Xjj{ZzQHu{llDZ3{I!|<~e^wG#10%HNxU(Y~DzU-A2v%VYJHr$4@(chyU;# z)ec<5)1RGFeX*%89;cT)Aj<@qnmA3)G?O#O>JyC2s%Pr`Q&n%O`tkVVuf&su*sLc? zktj>x9$JJSSk9HGVYpGlC{_4#Ks;a=?`t`$onbOAWt?5c>0yJYT+wzI8$xsh{#JhZ zjE})r9N0^!cEOVrdFo*YJ+N_QS!yEDIN{)&5z nTq$#HE}k(J6MR7S2r5q`gq*|hPk3|=PcK6W68#OR{6GH!*CH)R literal 0 HcmV?d00001 diff --git a/apex_plus/ir/__pycache__/block.cpython-312.pyc b/apex_plus/ir/__pycache__/block.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab05365b9657108dcdc3fbbebdbe33ae05aabd3e GIT binary patch literal 620 zcmYjNziZn-6n-b26ywGzY4d|(3L%?1Sp5S^hBlBTm@Zkg9Io$D3!f$RPKGFC=-{Cp z3f=rSluY?MGItRa3L!%_Q5OQ4`koY<9^7}|ckkZ&-o5KirwzD%npdMMJa0%W7k?y+ z*GT3dK{5>!7BNQL(ayw;Tm}vtfplje`K8ljM7;0<|Bgh!N`4&MoX&Sj>tXYv!Ii;2 z>NzNgn1slYOu34`z&q(k_moRMn;s3hv8)|7$!6vH(0bE!dSJFNz;N4b Wgvw=%{esf?~SyI?3$c9FmK-Hy!Yn4H&1i9 zSpw@vakcT5LCCL2q{|v3!)t)_2_uY#q($qLB5s66%dDG}80163%w58)L%0D~-RfqF z_8*8Asj@z4$^_5Pe7O}MGhGu03^;wlNu4rMH`pv(XVpz^G0HRR2D5gp!?8`>2Hp&J z-T)dT8PLlDXBKc9@HsZa=Yh_xTg(A^LE{F?0-o<0#W`i&pnf8wF9b_YH{*Ws~z$dj}p#45F!@9wj>YNC0ef@-za@9xsau;ZpdZB zk4v|>+)Cnh>CA>Yf1UuxX zh090IBE;2e=u8aumP7)RnWr6D=J_k&<&D zl=A>~$S?WAH`@=kkDNEfA~4R5s} zy4+K*ahxr0@QB}Si%Rd3eu=SxTMpyE4`s!jDBuqu5&Ta6-XTYY>NB^p&mLaymmih; z6(IhR_`lr0xI)35=|^3$4D{)~6PJM5o1b>}I&dTw3WhCTcvh%BW>2Dn=m)p*yik3f zuO2y7-HA}s8PKW~&xs{C)vV{WV%7;^sMNgYecAED(TEA`U^N$nzLYHv{{T~_OoY&t z#YYwxsSZz>%_tF=ma^kJn>H>%nLYboU4wxd?De55O)NS~PQIU=w?B zV9?CU-qH&K$Iwg}0~_Rf*Y~aO-W!4hQ~~VYoWkE7ApbF&u-53_`9FcTMs?!aW38A` zHq1^l+7Ot7TAciCZd15e+EH3M#i Nm*^UOK@e)ge*qWhX50V( literal 0 HcmV?d00001 diff --git a/apex_plus/ir/__pycache__/task.cpython-312.pyc b/apex_plus/ir/__pycache__/task.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf59a7824f649d44d297ea50ffc01de65a69f448 GIT binary patch literal 1413 zcmb7DJ8Tm{5Z%3BE{Pp09~1GD5|9!(g-rt$P$eKFWFY|sZdQkPLk^$s%-Q3LV+nKjBvv2bGF#_woTw2?w zgnU8cU|E^cKLbjaaKdRonzTkK>PBER&6-JxK`s++ZW35>lF zq17D_v86;sI*~Z8f0zvZ5ht zxz&kX#{AINjIHO9Ncq5YA-J+ZJR%?6TptuN#0`@t+{&PI$wP7%A%Cp9S!)K}*NGej zyJW@jEaR+!P>yUt#*$#?y@tg@t#8 zg}3g4EZ{*KZ>VCMSW_A!h(5 ziy#!no1051P9Ue2C%q<9{<>J7P6|2f#8iE(=_B%WV*af=pOE}7T5=YSOPs`^?30+N zai8g^A;OfvkwXGQeyo-}jsgob)J?F{Zf284OGfpQ#77EG$gBryuq7kDVxHFFdM?S6 zSw{ez4b11lOp>BxNPKJ6QbA(wc#N@T#M=S%InFV5uk8g{*RedYskD^SsO6OQv|n!u zm|!k(%p?+vO+VD~7@8;198bn4Ma9^)H)&dR)6{NDLFz{$fhceapcW1m)HvRk#?5+d-a1ud zxgte5ZN%JXmL&CHuO zZ{Ezy&(Uaz!1pVgSr(#%{EkHL^ID+u0e~$c6PZ&;h0AjsVvpjf@Oj?EyyC6+@;(!L z6@Mj=4{*do-Y2qegUJ3zp01_5uojSnKar3)UHGt8FIUx~0@(MCQZ20j2wtizQdusm z*MJCIE>>y^Wu>lqcT8ABrvYG#P?G0llK03QAAau_(sb=MJOvJ{ge8{S9Oa`})f(D0H+j)Hz+9{xICK%loslWUR!^3AX50gAIJ9w_TL2x)qOLQV5)`he;kI%)cW^4W6%p!_yP zuG=otnDUwkKFe>vV8g)ymgZSYsbObG|bvQ_3RRI`(ocJuqQW} zeXF?GS?~82qH!QQb{zKnaoIaW^SX=VKCfJ4ti2xv3;pM8EqA-$XE-G{8Rsf%#9hj< zdpO~PSdxSVhKJucxj)Ut%Bofu7pbUL)srf{R;-tA&{^?XwJtU;IBtL~xlmQ9SY8rc zjLpr7r!3`p7iAT!!cwm>FmLOdzB=xgW_#e~t3%Y5>|e?up^5#zI;OhHF1|XXG|~1| z5?Bzz%djr9Fb)wMpg0p2!vSxX9{RMXtkO%2RhcB{UX3bC2T;>6n?&n-`EKqLjWR8_ zvZhdNxmeA;OSP4HwU+y6t-f4UbHy6HS*R(iS|2L2Tz#+VWov7?P$-nua=lP!4EEK- zY>$iE&^F<&uM=aCj2+qZeiLg?o!#{QA{=gKX7G1xADun~Vv+XvF`ORR4aaxFQ?2mS zgO4AFk3CIJ7#=Q_Zl}k0(%DuzyFHvUcy8cyhv!ozDMUyl z@&CHWlSI}?{8v9gYo-GS0{L^JS}HD@J*5;tj`d;cnXP02z!s_RH4d)n>Gknjf%;z4 z=;sR_C^XGLlZVFQg9ajij%EEawEP9h!?2?;EO}B;=Szyl(#XJ^cFlP6Dqz}chtUO7c*$n&^ zPqrgN?Z}~aWVAP?+L3f8E+j$*5rQE$3?@o;IDkN(%~1Ua0G>zK#{j+h-GiqR%((PR znEA)fC1Wpg=?tKQeZ{3Kzzp^R^Ev1A0cNnaV;OnngBjs|9F8Wq&=~HE-*NjfGuIfJ zb*asBjl`_zsL8gVv#yzf4%S>FG%H$fo4H18R_uG;%)R6!DR6*7CUGnUd(rSbtYm;X z#AuD}uR~q#XDEIPhIO(Vn|^TR`>Wqw-JU*c5bn*X_SCVRsdKHVbK8^WfpHs))b7R)hZ+%qZzeyJ=<1 zENec#&B*B!j)@Xg6fJ9Kc{kQ$E+ST6mvId}L&Bobsu*VOLu%gv1aoeEZxOs4vCFc(>dWrQ4?5JrRv z&xl8$vM1-A5JyDLdvnr+JR(o{Mtl?g5kHrSxxhqlBq%TsJI*rFTP!0h;s;)%-;ofP z`XKc?q+u=%KpJ#NBU~DSH0+Q@xikW4)FF*c$CB~SX^^C+ES}72g|hTQK0BFD3T5%A zlFKE%W#3aLj+{`^88FWpOkgl;%1|xv)X8U#Wpf2Z9W|6SsB9Lh!(c8lg^dUqHsZ+$ ziZ|oIpEo0p$#_r9Nye*4irk8NW1fr*i4PJlcprE_-Z+4Aa7@euP!3It$za)w@ymhy z*@@IRG}FqV@oXleS`KNgU~@&hXGS>F#hB)KlKlX^-a%W8f^qG!3O=h__pv@OH5_8^ct<^_*{J6` zI~aSzTL{?gyl5#}rCRQ;m9@Lx^_<@+qPI8DI%Vx-HVuiurLVE8o?(^LOv-8)nHm8p z2WaYz8JE;_{duBSPXyfV0-XTRh zQ<$9UeP+5aKAG=LPbsgbrgCRB2bWcQN3kOzDioA_0h_dEYP#%CrLy^KA(bkIu_t=0 z5*=0ThahjVN|0^daZWrRU2N+-CocF~uE!cnv8{S+>s-hCJ@57CvAz$Fd>Y$7@83Tk z+rQZQK&dsUwH(bAEUpIE)1TkdOgj-KM$Vz=-KnfFC4j0 zxcL2dzdu(f?HbT`4O|O9G(ULi2Guwx{v>#FiGi!qynMX9Yp_uqwKo|=#Pcn=84R|h z>jry{T@{9}3Th+jAZtRVQNx6~3A7wItrSv)w01@-M(F+RVhf&DX|dj9i?POcifbzBrl%CG*lZ12FKv(}Za{Uesnqni1TjbSPgJ)K-CJ z)-p$qmT2W7rB;}8&+xINSPo{jY+fs*^P`FyLetuAzUtviq3(iIBLx%Yf<(sCT1rb# zC@F1987(%@{M3oIP>Y8_-eiA?wa)JRS>r-%=VD{a^>F;1;kSopmCKWtCiU=xpN3C; z*iqUysP7y6jZ!*xO2=>CsfyPV2=e(%hH3BTX{1y+m`Y7dX3pk_52sSEo=xY>C*Ewn zpwebgX`=ZNRY?VPCrDYIeEAh+v``jbo}A37dr00vBte8WnDN^5?qoog) zUTG6;U63}<_Rsk)Jz8a8E3zOp%pE1Zp(0CCaPB~bA-j@U zrpy?Q1{uYlovhWlKM*2u5UZ`I8pXi>#|(S+?-n!EIJ$BIhS~sux`7DoC2rBwjo@rp zFm@bm)?$>ej&06yD4BK+aI%O=HTC&x-SUJUKVM-ke=Om zdCR3OS2mWC2leE^YvG3%o4RjMCDW#ZQ^{Sav!v^^<|>&+wVlWc`^t}HF=o8DZN~?w z=2jvJkh&33uD9Llc1Vl^ys~dP?ZuW_-}UC2%#qqjBte9X+W&@~R+CUQDXQJ1Hs4jY zf1>V&%xRjrc`G&=b5LiJAEla3lia!x+x?YHvicw?(nF+|NFR}!_2nAc)>qw2PxldV zv%LNEfCw#&ZFwW6amP3Lk-4oHbm zqGpiG4S+61olr8_bOCS**WR?zP8Wbw&d^h-3oX61}QpRneKs?Pdf7C z@YY?&a(MZo!^;;7gpWMUh^sg>uCd20UOrltnF5dImFkj=mVUAB!Sw_P#-~W4mhE=G2nzEp|Wp`hof3jGJLd}N#N}eyEBb^ zQ;I|kuZnYXoJsjYM_Gve8MYYiN$UED5lu*pNc8Y2bY5!yGm`l^|sv$O?!Y| zEkLh54)ihunEul4Bl_+m*TRE~O?!Y}Ereg;n@c{#BsZ62lr@@~kCRXC6$b@E>Oq`#!Z>LBUh>MU> zT!hT<#%Sa^bf&uvO4S!ZRzqhXw3Ly@g8UKMWsnKXWL2$1Q_QW$_?-eX0hA4w?tr|C z)d3K!xQw&3apbGCg`L2-v_*Jw!0lQvEtxP4ckT_TK-0m~jD&y8?VuI;SZ?r$`ZhFM z{Q!hbt#DEIHGWZtep5c{xqVS5Sy1U$39{4^M0n7}@9W~VwVYB^ETR#MQ9s;5BUU3@ z>(*A~m`R#(%yfcg7~8}~$_doaehC7kjIu;8L1X)3dl%)ci>=#lxrN)I$9BwRN}U5b zezAd%_I?^WH19t&A3L;|=qM%n^+f+iV(FnLbo>%eP&W@O$4&g})!TX(n)<3Se{=9g zlr`@K0!CP5<6^u4zYEWon!0uT;@z+(7o+b+ZL{)4K#D|xnH0lq7@>G5rIFO+r;==jA4mb`cXtfO5!xAXnp_j>jC-X#e_AWcKlE#O!Q zNEQT!*MLBN+SeO_^4TDAhTdungn+LZz}MPdk&r{3O`9t|;{0smrb>V~_@}Lv5OHCG zzzA`Dx4RGY8@Pvza1WWWU1{Jt+#^4PQZ)l|JKO{I$r;~Rm`9x=KBT!tr-Xa~JR=mO zXf&7!Av%?LbQ(rEGVM!79MS2sJUVqTtVT3q+#bqYL4();h)-`&_qJc0>Lb&703>5? z0sujjTYS0-q(PgzghLz99kAxAfg?_o9dIZYYH^}GBSTM@b9mQqh<;PHt*Fz~xgO@y zxM;cqz%3UVt+nHNy0nD$_9#|efV1hU*k92|8~ZJ7aF*?e7Mu~$hFmQTk5QjQ|102G z7*NI?S$)EjLY}4joh=St>No5A$NQY8EqVE7y^XZ5z;gW+@7Cl~K*rq#U% z+vl*L$!dZetz4u|pTlSP98aV2>XyLW@u)@H#_HAiAiqY3+655H2dPEdE5A=hH~!r}X;Y10iRn-E>xvo6ZVx zhOB-A literal 0 HcmV?d00001 diff --git a/apex_plus/ir/cells/__pycache__/embedding.cpython-312.pyc b/apex_plus/ir/cells/__pycache__/embedding.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53807437d8514b98b5df5b49497c959083868d51 GIT binary patch literal 656 zcmYjOzi-n(6nhmd9zyd>6?T zNRTXr5sMfD3toe?W*}|FzSzyX$SQf@{2~#siXW;p9W-B!%INUlIZ#3V$PWXhHn zezxQ%u5@NR;+V*-)?bZ|m6S>LzVbijgZNmO|85MW>UO(#Z^Cy*X%n87sWQVj508{N zE%I^rwk(Eu7RF=sNsQBp`Hv+!9N->IsFt(z#%1LRkz`3B#M#zr#@_OdS}_k1&Y|{U z=fws8eE-|d?ghW`p3LzY0*4Z0@kj|#`9h3xIZ26c3h`kQr>)0MvO;g-`)kl0G8NJiB#0ws*<@0GdVAUznem6#xJL literal 0 HcmV?d00001 diff --git a/apex_plus/ir/cells/__pycache__/ffn.cpython-312.pyc b/apex_plus/ir/cells/__pycache__/ffn.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6271b6a9e1bc634fbea69affc5db3f8b0a72bfcb GIT binary patch literal 7610 zcmeHMO>7&-6`tk(@JCCLl=V+Fk|oPFWkrf*yFYGHSF&uksG}Nn475yLg5jE5)U(H}1{);=Zgu?&pYyoFj^Gn<%2n-{Y23QvJ>I7tW?Lx=Ky| zx$~E5tbFl$x{>oY%5+$joUc>XBrGb`GSxX?)`&{toI>Irg;ROO1Akr-<|ReEEyM+d zS4CCogL3m8MFNQzBs}n5;C+fu^+P@|FDQPZ&D@sH$J1xw%wLyKF>0pqQ`z8ORzbaV3&qFOZZxkmKyEZm%*akt-fs+DPx! zO3soye9E&xQl85rgxutHuU)3ZLrpv5NOx$jHqQ4vE29Fmb`xqR1iM-RBpj1oC!5?{ zOr$crqCp_0NRzn*mEzn??>t*XZBsA>P0h?Zig9TTn;U}X)0xRjno70FtA&iJEhh7m zv#NGg&o510DCmp%++=b|y^&bTT+$ zf*Kn4yi3lDs~Tl=RleP!E6x})pI%h97o^yq`GsjA5L2L9ePwaRVtB-p*0 zT<4kRZ<%w;vc- z+?L9}NN5;dZ*sK5N;)CYu<8))25R~jR6U_5wX0e&i1Vsv2jFcLwSJ3ih4$Sk{kC-9 z7p+2`xJH2jHVRbs8Yys_p1F^){Y^r#31?p&*m&+*mi3p=3rnh2?6x~~%LkzY3z zrkFLwyuPT?lyh@pr>yrCVAhCU*H4Ml>*em%IE^d70l0#7nw}EZN_zD$AvZYM$DwZR zBkrgPwOUBA)OGVp00?zKfv@FlW}k7u*ZZMNF=g(0WRSB>ai4!AtPa_|xNRk-1`^^f0WJB4; z#Oe+^Nx{GV*sJuC%rm%*#=6pW==Oim3Ntg}w8Jz46Q!tHW?Cpj4Ww>yttH3ZQj|eG zb!+)57VkxZ9?)e-_F~Rx6q{%=FKjm!gVz~Kn_I>$ zUhKA8c3Y}9u+}INw%74Ie>F@L1w%z*0*&F=PavuLSULt-r&VGnx)?zXxNOn)vD!Ei zoP12CY=I|GFp1WBPCP(%CgLZZI)$ogz~VB8oO-%!dEv&O{iL;1{SL!y^vKEk!R)Gf$Q zar&Q=1?h7%?lvNCV=rHMBl2CG#q*3vXH5w%JMs(MGR~N$`$6dN3&k|gu|#ZF@75Br zc_H~B4)Y?AJ%lc@5q)*{u?zO%GHw_x;ZsEwHK3&*Aa4U+0>l*HFRFqfDc*Tm@!bX( zu1Kn+dTUS<;iL~F2sH!1`vFJ?Am0X1Q--`;@W$E!YUa*;5iAhc&I7=tP7I5QHb{ z7*)5`vIFdCpw-Ji1C%Pc%C`Ujj`F9VyjMq9XN^KCd0Qjc)==VJt@WEec+c)s?3S$b z3WiNhPswNZ&ykgBfvmhOkb(u=Vt$IpqOlD0G-hXknC;dYC2APh3dm4Jrj%S-Qgh0t z-@+I>wLrVDFn7fax?A#t7;!UuoS_`-j=GgkLm}<=KmhoL>%cd9&;@|EI(wIe75~=Y z1d0b9pZFH0(MOT)RlVGE$mlurQRMLQ^rNnR8zp~)==jmlbvU)%Kc+T|JcTC zd3?qgpQ-Ym{T-`8Knjtra(KiDkCek>MtE%FjfdgyJn@t6V^4U}5&LYgx0m1&u<}`$ z)(+t%s-W&1eg);K7!ELuTn-Hzq2c?!;TP_oEm-Ry)b8KyKYrabkU=9fc;7c@1x!1K zq;~X| zPVCIIfCRhIehCCTR+tQrJ_&Y;CmG^~6wXn1U&V`@53#o&xc~`uRN9b}N&jF4%U9YF zkB5*O=fn}f;~^vZ8alNR1mr^&ARnqoUJH;zf`A+nYXjmyzC@>n5j?DQ^RB2H(*L_K zSRcpp#knxc(6WmJ^f~k#k{A|a6flA>*zU(KDxdmZw{FnCM=wJb51@wXukakl y{f+eelMMgMBXOQj2$1cLC>K~huuUM@e!ZE0t0o_{%0RMxfy2yOpAbk`pZ@{lMJG7` literal 0 HcmV?d00001 diff --git a/apex_plus/ir/cells/__pycache__/sampler.cpython-312.pyc b/apex_plus/ir/cells/__pycache__/sampler.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10eb88d03c10e9a3d2616d111fff47b706df3093 GIT binary patch literal 650 zcmYjOzi-n(6n~F(k48qQe`G_9==xQq|A%( zT~$u z^BiFT4R@V`pSzD62W{iRqo+-empRHWcnTfiZtmX>IZX7BM$@#OHuVEyx8-iU gmu+`PdKVqEazmuC8Dl@-*|qDjgQfffXx34G0ZyKlWB>pF literal 0 HcmV?d00001 diff --git a/apex_plus/ir/tasks/__pycache__/attention.cpython-312.pyc b/apex_plus/ir/tasks/__pycache__/attention.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..865d9231a99210a2b47769bb65e3bcbdb7d1077b GIT binary patch literal 5586 zcmc&&Uu;v?89(=*eeF0lNlpF&At4YPLefH6Nds(YX^XZtf)O5OX}DhQ4KZ=-aPCcq zJ*SN(9*kB^VUrfq4mCW1g|@M$wQqgjODt9qU9}R@I%$3LY9?(wO#8lb|M*_Vz|j63 z#osySJAdvu-}(OhzWlzmHAtX*Mu+DnLWKMgJ9d$*M(rXr)`&tBE=?A=ILBd|PxA{x zTwrY>EiOoLi6cCDgDBz^qDZQ6M=;mNeT?e^uHVJ=Gp-EWfQu{Vg0YsbaAAxy#5Yr# zj`eg>n}=FDIsLuUug#d-npCzrwVy&`ji@BfDJ0G-oGK_h`~>)kiZm-LzAIu}R76#p z;hYp}HT-X# znmnZ@6~jLVG?7w_0BdNeylS+}r4&WYBv{8sRegzOre;Lz+gS1NLm}2ifvk}NS0D*! znWex(k8^qg^l&~aIa21P$^u2{&;SyA<+5mqnwp+%U~Blv-B2E#8at<{R2!SmrB!V% znH@W+YV&$_aqM(XpUY;(l8fr)#A13$bG1@*Oo!J2maeNAJ(bOjF6In5kw|4ydLohE zW{F_b=HPK^JwR4SHAK4lisJjB&F%rH!kfMOisEN-R}DtVjB_{WpcI%S2A~0|2$Ddh zY%CvwTHX-OlCm)6e0aYl`s0tdSeavYhiV%v)$j|uX&X>HFjW_+_rNCSIT){P2eLvo z+xpiBZ;jj>S>JX0(njQk4fzEc#pM!{fgve+Zc&|a-T{W3ljorWZwO~cgBTTr> z#rZaD?G~r*s7qZe%isen(L7e51@0n>q@kEXiwRSv&`@$Qfet`rn$sR+xWq;G0>cPo zmKG8yH?2WtZ77@hcM^Jb-ZJ=nw_9wUA$$Yvc_1#yeID&BNk9K~F|ZlgwmN=wxuk#e z?)7&ok%M=o-vxggtVCYkkY6^X<;Vecz{1Q1uee5XT$!JmE`xJ`hfQm4{r9J|1Xyq; zAFNJel*%S<|{Y32k&{6RLGriXBF3<=sA(?aUU783kv zzXo!JR7F`Dde9cFN>EpQB-&Z^V@oC--PHiLf+QHOwO}jCNry^%Y6L2n>1(g@f^@K? ztdC#MRtYrks;v4{_!@gP!JYzJn3E^gqwA@geK>34_cqL_NM?(mq zEpUVXSnQ&$up12{i2(8164R)ByK@pJuEcOY`s%G_fO^Kv0G0Xa(SxGzqFqQ(m@YG6 zO6>i(86ytka`T1V&_dGrk1Br$L1<>Fl+uF9iF^qY?Oc6Ah zZUddE`xVilvs~RIE?_Uo9`QwIJ?kP&peJmC68hh}9bRg)-3~O-?LdWY2P(_$ zKnon+lid#9!sB)j9b&2d49Me9G$3h7@y!MqdIn6*khE+e>41Zz(e(iO2PEC-dYa8? zbUhM7(q7l&i-iG6pD~ei>!l@?=2(f8j=vwXwBa2;7+n5VLO?nn6T@V3S z<4RU?ggGY&{jWoxzJbKS*D;B~%4S0vo&_xNH)$N-*E9t{P+vv5N?3%F(qK%IHC{T^u_&$o{3t_%D) z?Sw7BsOz$5>MvdQG`;0CEYl7E0Zb7dT%W$b_od3-mo^5ERtYx_7vA;J*!8iE?%~`0 zRe@_gSQUgo(B)XS_*6u_?CSr3#u{k~1^GHxcb-$>t^kTYN$iBr#?yAa3|W=J0J1{9?AcjTKO8PbzU=Jz z<>3n6mH1_I6>b&;M$!kj--Lw7cK&K_#ORhZ#G=4ZM6ElFVthP7`3 z0hRh!GzoISW0*0=Dv<-X)!$q!U#vuqZpcTs1_#*l+0aqMKBR?>varEZu+eKc1?}*DOpe&deGTZm7oUXw!N$B$CixASpZu>OwL-c6~g4K6)mtQIa5yo5|U@nVgMVlY#%#4NTeH-Rs_O7AB whRDZpUy#l}k%2GB&_mx&E?QI`5~v=2i`&6PSH*_}s)r{I;J{xARP5RR37wQ#{{R30 literal 0 HcmV?d00001 diff --git a/apex_plus/ir/tasks/__pycache__/ffn.cpython-312.pyc b/apex_plus/ir/tasks/__pycache__/ffn.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8f84bc62562f6b429e8806a178b2e36dc51131cc GIT binary patch literal 5538 zcmd5=OKcNY6n*2FvB#g-9LOnyLq*lj}iC8a7~wM|{B1XYbx1}ifm2FD3+CMtGA z#UhJH?SgJfstPq(1&ak;vh1=_H;z=)XjCMmsta~*oo=$~y>INvWMWd$2FS>B-<|v3 z%)9T-z2_az&lMFu0>@+8IsRb6+k2c&777 zVL}`cIpQK0h$74qMO66*ym|kKn_1mp_1LT)W|hF|wOJ)*^?}uIvwE2|0M-hd)t3%- zgr4Cf9h@NyCNv$};kb4kTJh}gCl}9+nl6}9Dqa+ZLAgs*GQufj#HDa5uejjk;S>~c zOj6vl!ib;r+yO>nH8j$YDKsztA- zld3izpNyVWwd?xiRPJ`<5TKQc`7-r*|Y?W>JV=&IyRPyOr;G;mJ_LjF3XuJ zD-e;A4P7ZTg1Alc0kWrcPWURg(s*!A_(iH;Y3`X59!uc@bdynQVQ_&VU}0zh1+oyr za1Au)@f~F3J${IE@UcvdHFvx94&GYhImmFr{AWn8b%aq zLEI)Ql?N7-rPN|7ccAZwk8+`7Iq4X!!6%3;4u+`c=_z&8dKb`TE#L}VzeKumsK2TM>8Vf*7e&aT2SH>BdpcVU z^`I36o`tI*W=UR<#NMaXwRsWRyqnZEvz7m&4IK0sWE4njc&L!m9!gWB$Z3rVlvlD4f>uN`UCX;3X4?90gDP;K#Nh z4TVjE&>)Hsh>dB)URb8e^29=d{Wu=WiUsI@m%BBAt=uVQCaptJ55iXLEO|D1W!8BI z4&H+TD|)?9c0e_=q&7W@L2N?typ(RV2gZ@rr}4(Ev-$)ML4n(o_McF{-$ecX4XA%=IoxBS zexHf@LuFAPXWmdejA(rh_xt zR=O3(1onw>aU8%t`FkR;Pa&4s<8ZOv3i(@Wf$pYtM#{FRtjo6cvO86oC7Y%GF*u>& zrUl$9xL)dc!kuVXY1sFD_}g%- zlYvdM_v-N7kdP=M8P054K&j_bTmbG$NUWO|+Lj`VkzDhexlm6|>e)F+6$THtMyjwQ zC=Rf?f)B93DjT4TX>_8s3kBPvk=)Y5D3ILJ_fX(JKL@dO?%k(w2#R;%)V=_L+}2m* zw$K80)zsxZs7c6jy{P$+<@!+zAj_>ltzHm)h#+7dXH2F zen0VL!4DmJ?ci5sz7_i}VyiVTxXAt(7YXspLq7TrPIVeYSx9T#?52fk3B2L#HszYG(prr|CNsk-i;eHhEea$Ws^R0A&$sj4@x zNv2pOoBYRG%arrK1XQ1+|0SSmTNqxtym&d+b|e?-%SnAZ2dMs?QZ;Xmi36(;LDsQ~ z4&tTb??Mc1D3DWaC*&M8A?N5u$T_wgK5i1N-6rJpmW3R2CZ zt4M-I@DmLcV4Sgg-9D`+l3Ij)wlO~+cwm6}mxwV)gfg~TM;O+m0rOkMDT*60excDY ykzlvwxF@9U57P3499nf(bBFH;s|1?WZl`{vXm5lu$|`|obpXVC&EKfA#QFyeR4G>g literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/CLIP_vision.cpython-312.pyc b/apex_plus/models/__pycache__/CLIP_vision.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b480855738524274adca6394bf45af2ab8694ec GIT binary patch literal 3299 zcma)8%WvC88XrFNqF%O~wI#)o<;1bAL@RBA6mhfh0!dT!l^6l~0ICZ?i&U(HlDtDY zs8t0C7CHFPMbncV1dI5Xh4UxuvA15#J|KdKffVf?dL!29rRbsGH>70Qt6g+}nfZO+ zJUR25-^Z`wafv|sj-FZhDMH8(X!KIR2m8waHi=49PA3hn!f}Xsoo@&gfnh-x8-YrI zVNnk@LX{B10X^J^R3Z!qb*T}pL^;Bfw}={gNYtOa%ENfVk8}+v4jMkf`20K>kim7=nskzpW zb!AFLijZxV|NT#rG`{EdNdj=>)(M`)xt6dYmAg5LmblOR$322217cTqJ~! zn#o>^%6GUYL=`&X6Y>vn$6xnv92)`a!YuuruaknfL~%3i*ua_P+xdKB%`o%JT3)T+ zg+pD|wfypWzJFHvLWD+P7>!{O$0C6R?t&r#=qMD9^pT>kX>U^6qD}-4nH^yv?1+Y@ z*M7$g25vR)r(fPKe`vs+mG7+Unz5p^%GWgGj@eo*-&{9WTFtToe3V!9HDdrZke@~w z2`=N8w3?;Wbtf##^=93a<@Ol1c)Tg~ocK}3T__$9_ZXR++YrAEr|s$USm*5Q%!c?` z($1WODq&B~qc_v@dW|om<69XkdU9L&Iy(0vHEE}ETc2(_Rwhoc%HCtOPZe;2V0bk95&+WdHq=McJ)Vb4?(nw< zNBW(>+o#vTIPms~b;#Q%${{ZkumcYwm;n#%NCGdi;z)YV?Swqn`=r1x6+*v*2SAy) zcYCsTEC*wpQN~K|#ruzX3|)$8YE2U^+YVFhjNqpZdbTB@K;sJR%mg$boSz~h>;qQ|vJDu;Qi&na5=VrUP1uM5;PfT?u z&RG-Z?D1T8{Jb@O-p)*SGbJliVohhPi8FohDj6P;ToR5*Gy}6&`?>)JUI$Z|Ujb|q zv)|SsLob^QyX+mG7WX*QzXsHVVLib+gaG~Vyon0H5@-v5i-(K#cXUrMQ4?4iMgh>M z|FvQ-dh~`v-GR8FlIOyXPg0@k6%PMb!>>Ib?&*p#ppA#uN`X;mIDpB#LL~L>Vc(HJBO}L^_TQLSsdNLSLnf zFE|3l%%fMpY{_-n9vL7P^IU*lMgxim84IvsgvlxWX)XqCh_MD;m7oe?f0LZ+J5)jba^BGBAM+bXRYL{J(0H4lil>Zm7ce! zPCdQ<_`aQ;v#0WQHuowXLb`>9(GA}&quaj#!0!kIwPAV#!3M8$h}y6)H1wjO1p?nN z)qxfWg2RXhEf5fI4^bazf#5hyfgl7XfG**scKYCy=aCq_IKSAA7xM?9o|y;Ho|)L) z6PCqxtauOv7u$(qejscvwvWM7NB=e!LEsLD#esO)kD&#Q5yrp=kwovW#ML@mLHdQGiSg&`~S_1ojLI|_c*tko`=qb zPG7X9FYe|S+;}j4$rXC?RES|HZ7K~-mK{l!A=<9# zh-0$+*EL1=TLd^2nug6f(Wms+h!QHq@Op~iuQJ!u*k>-`Fq zSVd9Uy<>e15yl#J)W3u;dzbhZdKrAkN8>aU@W;Y&d*sx=$;=)p?UC6%@~1sgd=ofoW literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/bloom.cpython-312.pyc b/apex_plus/models/__pycache__/bloom.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7de8d55d8cc051dcb8f390814b9845a202ea043 GIT binary patch literal 2926 zcmZ`*TW=Fr5I*Z&ukBr55(BZFi{o4f*%0iIQc(e^1f}W?A|!f|T&b4hT_;XE5Ze)`opw{6Q+dsk z_E6rfdNp6#M|oZqG%+nw-lO`pKsvx67y1av-n&TlDS<84IWHY73zGPn2ohs)R|>iT zy?9A2m9!5^#e88NL^p`JC1BXAA77;1m1|_y=Qvx`^Che)7)0O2W#!weC0S8(&fX67 zH-*MGz^o$$r5PEeT{5GvvI{;IJ`O%Ne7x+*`{9aQnp50xO&&BT&&k4FZmYRX+9UId zH}8@~nB@ar0NxM0SoTN(dexF<2{%i*>|91KR1`}nu4oxGTUM}c`3S$DWM$nF7Yeeh z6rDNUg`%OrJLE#vP#hJH6=MY#XLEe>WYQ6ySgvso$U3SrRg?kFX;l|UOiN-xa?|`&XA#2PhPXE(yQNYmddK4FJw!p%Zk2al$KN1%Em&e zn92fdGRx|U-o|MJMtW+Fz%aR7w!E24p;$07nM#mCGr1>|>+17B?jXAh^&Ma1ehitt zqqK_+Ll-pr#@4tO-l#b=P5SsNv1cPz6Nfjmzl$echWpKMeB;)JW+P^Dz#JIc_{?UR zZmGetM}jtD13}x1dXGQx)ncdDxSxUzI0{*Oj)q3AU{Bg$eJ6jQMRMYy)>`k{4_!zNu6frp zW-n-Ovj&MT+7=0F4>dTC1l*z4`4+NE26Qck4^0NK%06Sanur1abF+|_-8n@CZ;>}j zyaNOh-&UCTS1INo)NlqOEw`jwfdjHLMOF?@IUq{{Cdaj<4Prr>J21f^Cg!q4iUjA3 zeJBFv{s@JCWLEeJmV17#ZuLZ_IA zBo4))6C!DP?}#-RfuhkV{$T_PNGBx12>(a~&wFMo-82Zt_bcH{B``_Tz7yax6~80W zdN@-FPA1yw#!ThtWa58Sapnyp;eL3k#8BVqB;J~yf*g&NW!#>l9r+y0(bGWgpjW}c zP41V_&!O$XDag2|d%@6;Qx8(xu?Y}Q#?08jsn6^itoNO)^_|@Aodm_H zA#><>edug$=|SD!{G?74vzn60SVATPMPx-Kyg!q-xIINQ^-xG+;&Vz!)zohRxr=P>h{w&)IVqk2p2>`un{8M1me4Gm&Yw^d<{r*3s1iK zhja~gPj81sw{Y)P13_o^l8Ga`WQKDua-~@*WbsCMxlo+P1d^8D*#Cc~)Aw27RDa>GUPAsv=CHYIV1Ew4CQ*pOX(Z1jIS#Q)bLIIY&oHkEd3Vyyu%LPJ-lUgd zx8}?HlYWLhnwXc85=UI*Dp9=aMDeN84)2_i46OKN@ij6sXNgy@-<fjeCt9|P~D1O^?-zA1w~vJc52%sy^2TmWnGE{v;2TXzyZM0 zidPO=0$$t_?-VoXxs;wOsg_t+&Zo5Wib{3Mk9a{%E4n2uYM;vP1A&w-M|SAV|)pA zR=C9rgwQraIvi14W#JJ~_%i>9{4DI$_Wq753uV3pd&4gE+pQ2;n5B4NDbVn=SSc3E zFY88ZPK_zKJ2^c!r>U{Il~`R9G1*J;M2d!@K_nq0EkG>s>$JA4enM%HT7FaG6ws=jCxmlD@kjD=z$kp{0xEosYo1InWp z>52IzV=BJ1V);_3Tp?$qQl%i8H-5c$134?gA?-vii$04sxY+yZ>EfwXa5Z5X6=cQ{%B z(z!t%(YWX88?W`&%VY@pSs^_CRt!ZRvU8$Cmw2%atcLh!%z!Gu2^tl zhWUl;viOXq(+F&><5OZWdImbno25kvu52m5Y*gd=aoP-!HS$MG*zD-7c8pd!Mj1#{ zIud4kPqlrr(mrWMqSeS)B{F74`?g1_1LrI7i=ID_JguTlJbsaOz%H43d;wEedMijb ziBTUblcq{0&6K=TGsiv7fb8ouSl`SXCj^Y;bf@|LOj600jd%S^h#o8?l zt`iN;Yds+Xw)rg-mx3eVCiD4c{7%hb!2H52x(SHXN678^u(PmRK>0GdzxyZUb2(FGFqjhc@F6;@d;jfvL*C)br4}4W3D>;ekqc zVEamS=u&0q((}+|kVksAZ#)^?{`^HGu@QXP(p7C4skDrk9o?I&4_3`cPc<@HiHw?k zF|#Xb_6eYk3NojnM8(fz#5+z*o`i}@b_Kyij*&@AF1fkR9u+5zo&O7~VTuYQ+#7Jt~6JrQih1_)c_BmjVvUO5t=VG!bhk8PlcH6S4ml z#OXJjOuONwGRG{ZW3<)m1o@e&OSBN?Bqm1?6K|LmWt@ZZD^`+6WJiqU~@UOUh_zUC*mffE0>0q!@(%MyihtA?L7?9SvQBK_lzZyc=?>csz3m}qBDNQikATdNNF?o@ zwuC4^div}jq7V^7`z?sZh^NhVdEMex68y|9KKb+??DqLXxgD0=;{Ch(1UkD5Ck`Iq z4Ch{WlUexR6EKeci|Oo1R=8eS<2f2<0m#DA>Nz&RGDN(d>RCuz?MlO?F?l*FJckI=IaBK!-v^GtKBSjGhJK|F0)bM@aeHL&FEX@8Cick4 U9+?Di;CYV|H{So3K*u)u4`l_1pa1{> literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/gpt_bigcode.cpython-312.pyc b/apex_plus/models/__pycache__/gpt_bigcode.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..afcf9aef6bcb0bcf70d58642d0e91fdd04ec7519 GIT binary patch literal 3299 zcmaJ@OKcm*8J>OcwR}mmEs+$(heRt{TOxy~wu8h9;3P(hAQ2oy&Vh$4DDG0CwIt=) zl@T&DYy?K3LkqYk+Xzytr@D|2>8U*xx#vRW0d5vHV4&zBw?*V!e9Hgt@+F1v05kt< z-v2lMKi>v}0)g@)J+meG2>Blh{o?8|ha14$BQlX0l~kDm!ywP9Y?Ui;HqWVi)m3oW zJg>T|o`T2bU8=Y0EBI{QtqN6t!Oswuyhmiu9U^-b{{iQ$Q3y1AqVO{cB4bXxcV+$U z%H}(DS$U^kD_1r_=XzVMm$rf7KK$sCO+L5+#J^69HLYByRfU4+yR@Tx_92W@OU}ej z8|n$YlPILX$fUr^jKaw*R1PW+)dkfJ)gyb$A=!6_FYt;>78EyVNST-Ycld+eR0WUh zQM_eV4nQv-@B;8b;QbA+7&3+75lrDmy;R(gv`SMkh1zaaQi~0RYGxSu>$?hVNLxx# z*31AZwr}(#|5inom6|hdvQpC(IHX)D>WZUsQ$^pUwY8FGER zm|HAJOkZqqAh8392dU=`LRv16_<_w0QXdUyGT3W_i2NGG<(q+#gQlAc>FTbgr#Fyw6rD`N*i1lU3ZE3!UF9;Rb!z(Xe(hTmJtnUyYWd}ZHQZ*}DKGw*zp8<| z^4kqn(YA{9{JV;_t=D(*R~q_Oy_PQm+~M zT~}&)rCxL1qzy0AQv)~FAl|ShS5h?^gH<&=cREd{fS8^#t;2NXW@N<8J>5KZ{s5ZW zpY|Cx=p>8@~&gl>xBV z&|k+&i+jwCz=~gkyy7Y;DtH?A*vA*{(u^QTlo~#8Ds5nf(#+ry+36xHN4p%6#Q??N zIugb36iz5Zc-!%e;{8&LOQzq3lC#mpB2p_Qv%FG8d4&{sgza2N(M2bnItE>&oA zB*ErvO&+`c37XsFNodjtOmqY3P9VLX?ar-q=2pI5|M#c=`t+Oi?&YhU%U2IMHZ1IM z&)p;(xtr}q<~otN{dc?BH#*rjz7M^*$32P8?tlDfe*gU+qWQhxlW@EnUg(4ujOm$s zxBhy|h$gzx#ZGk5NTrQ<(n!rey!GIg5r5@nc*fhd1JQqhfxigkwQ->cvJd3AaR|5b z(pZ#(1#;gwq=N&8cBvSRDQiR-3^kJJC5I zXtfz}#F0P3YBTKU`rm}rW?(5jNN=mn5JULlERJs_f9;VKw_|5OJevZ) zcT+2!)Ji+~2T+`Yz66Tp zjFCCr&0OeYF0|9H|A(=H?%cRj3 literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/gpt_neox.cpython-312.pyc b/apex_plus/models/__pycache__/gpt_neox.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c5feb3c1fbf596f4c66593c5609a94c878f0b33d GIT binary patch literal 3321 zcmaJ^-EZ606~7dzFY3!yY+05ig-RU7ay-d(0A~g26j>KEz-lAFx?&J85VCj`tEMRT zk}fJ$L4qLzJYWNwm)Tj-B?Agnf&0+^V=rc#LIjau8QMPVjhM2RJndYP`fw)X0=nmS z&b_>!=lssqKOzwxLHV96?{tF*{XmsL@w%X218|5WBr!5-FlC0J*du!yY?-w&D{~ER z+1v1yeYVcY{zjl2u(4MTHuy4cW1k#qgv((DdCY^ zBk$gPe+xH%`%Y7-)o+2p`wO{Q-7R=bcJszHGkUW^Dzc1aP&VJY3GJ*SLhqA`qSl(E zfeBcH*Y@xyn@tJJRc98bj=D;J8^9sLsLV*H?2#DEN*?&J@Z;d;g`W>|l3xncqEhf4 zSN39`#A82LP>qv9_qbzssB%E^<6zArg`pP@I0QHXINS;pqNd;7kI8@BtX8%~wcf@i zuk1HOxzfT!HG>rI;EJT0p`E%UVa4e)T~{;=yOHV@4LhdDp1XOmqG^ihBUsxf%2w6y zE`M$K70&+^n0~#~bSkOE}jRVcS5`;Ql(70in543?>pbpnz+c1yB z>|Q~LOly%rKgrYKK1UMUVLwM-aL4XUzh~?(U?v!4xYiaba9e~<-wu!MTWAYHV_(&T zZ7fLjkLzlETgJk6OBf!LPzaDPd`9TBgx)BL(Q1lT(@>fG2NiiAUnitV%pg5l`}hie zlT)!=d;P~MU7+BUm)A?bQQ?kCyDb^3JC$bXmss7^ntP?2Ep4Z%lq%q4;-0*(j-dv1 zI<<6bPZJf~{G_yU2>8g$oDVpJGO8(H8UPKtzp& z`~mkQkTh~@RA!CLGL@<4p~TU=9y<4^@@?qi(|Fp5XOBKTYFLO_$rzdZ(XT9)nJ)BM z_Ds}5Y$R$0Q0hWAsL#J~!2LPehbfWitJ+ak9XbIR1MssQ=q~^cA!0k=L>~fvwSaZ} zJGU9EL$?`y0aogmuN1J-*(dtdb3B8dAnX~&U7KedGuv(Y)D>CJus;rU}tV*TZ76dW}!t~cj6XLhljt#EX zHJqnCuS0bg{VNtX<}$sxC4Fwm1|@y2WF*qP#HyZHHImt0a#>F<8`;HPc3sb|8!4fe zTGvzSFCzXa-a`H$Pv&8y>M(Rc8Q#-h!3u$nCfyNvQyYaens_s4;?1B*H#1=D+l&^1 z71*W2++yHv+?Cz-C|e^sfHh6>g^4pByS{Xr)o3(3V^8b}qfYRe;rHCw+Sm?v%QL<@ zjq-zCK8z30NW#c7BW-}dI>C1(k3`QrG3Ygt9naV7n6GkMG>KKQ4BkWc=z6B@ab}$Q zB37!v8ARNsiB2^mr*x-_gim)lr5D0B?Gd`lSNV{hKcUP`D6bKEaAwH%P-nJL6cxOL zZpQT5VL~oa%r$LqaXQ>>HrAXyh}wFrIG$57oxMkv+`Wxfq%p3a;z zre+SK_oI)vp0KJ5tKTg4)^6x)H~Oq+HhOUVg%4%Ub!WT&C+XEcZvGgb{Y*Mk?kkU^ zUVdHAuYVW6a`4VmYV9TRdLrNDdP`UJrK{h?-v)a+*FDp%cKIji%LkiJW2s&&ugCJn z+`{3V`*)0Fx|dwilS{@~!ANC|v*#b)d2q)_U3^_s(VSwl-Sqdl`C{NHn9qdm1E42h7wI5u7~1(Jq?DKNILs5I(b9)ui6@E$px z;~dL1=%wyLbr(I4<{mA6Rr-7BB)0~c>T(J$=3(hU>12Kx)JyZmeCA>HLG~oM1ZrWy zSUB5TxTG&!I!Ud7;b%D`ccGWNqUWxh2tNnIn~>>py-Z2ZlupvC-!N9l8@X(;1HLRo zXzEoe4J?W#FN%s@<3GiP~0T0S(JwEPqW5ZHng@hCBG zg(wQ6^jRxHQ4}T9R*a%4#7Fzn6cv$g+VTXv{L$|$1ftve4;6iOV7F$bz5HjN_7Mmx zPdlD}L3=nWq>I~v=V(|PJ(E>?b7p+_iYeAf(WV7LX~RsPYOoosILw<4sZh!>6BGK? zYKqM`X7W_y*z67$jeYApoQk#}@SnsvOaf2Lu~U8-c+0uPtK=#)PzO|LriI52!#qRz dpU~nz(OUqXq19(-<%REUhCitP7lE)3;(r-%=_&vK literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/gptj.cpython-312.pyc b/apex_plus/models/__pycache__/gptj.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be151e018b66bc60717d6a1acd4c893a6e7e6fca GIT binary patch literal 2771 zcmZ`*%TF6e7@ze%Y_A_Y1IB<^@*pgsm_$)&)zXN*Qnd|2Y95sjT^a8h3~QUtE>Z0` z0ad6%RV$?@h@uFNiO3(2WADA-HkDT^kW#fh%lZ?326@m`qzY?D^=ked*7chZ3qINZk_a+mi&tl^fBHc_m(I+A#J{iSQ;Pt? zGfAA(bxpta$<(#aXFwCUj*~efoyS=XgXo`H(!RNtS2aE5x^?GZO{jhaU;}9=!Kf(V zQ5lU@J@BybaPaW(2&y+7RDEk)g41}_uL+<*X-*Z_xSg7tgjW?bU)rMvz{(F;1RMk$ zD0t25)Tr0WRER^ zXO&r;KnS;)f%=Z>DRB>x%9hxN=n=P5^ZhMT;!120e1n(9=?X~ZrZIV8F?i(Dq@pBc zmkm>z)g(1@J7Z*KbxoQrNR3EHvKN!e!{oJ?WDAGL6o$zbZzlC+?IVoy*!Gja(cs8} z%^8}We(nDm*ZP%{?I{8D_TV9w`r@g!7@vZOB?M^Khj zkrAI;G8e{{3bs#CGP#VYD8&#-?^sOHs4{m^1X&(fvmG%R7OcW9k^ z?u%H1m&h7>5$M|NDF;q(Cw~o`-)rx)+GCq{HnR?5M*FS)q0P@6mT8l#EZY%w5E}|R zJ`_Fo*kA6sxX%3;uEML3Eu?5~q#DTur4Jrj7~cWdfF>(JaV!8nIY4{#|7zQyZIr{| ze}J};-b1a*G}jk_B%XS9YEkq_R5^XQ41cl92LYi~bdcZ|aTj<=HTnyLM$rEP zVFQ_sy%K5~WYjdss9BKf%y^eE{otIA5;MmAnA0^I80&AgJqMS8c4 zQ9MFy2z#!gz1G;KxpnWsy~k!Hex)40@^fcp>Q;H`)^Dv}uJe15{_PvvSD!~l*F$?P z(Mrouxn;=e?%i1VX~l~4RU*UX$gnjaS<#p^aAs@e!HN|1x9aHNtBbDW)gOQ>6=v)>;UU8keWR)U`Kcqfb4pwi>;)0h7WJA$zo`f zQnlNblf|Gb(mQOj7#@|5aQ$Slb5#1@O*Z+8k#Q8>DKj)j25FO8R)AQswuFxmt1G?< zCPo6LRrDfEasxr#jm3c`$DgJUFo{z4eT7{9NcAhpo4>93N2COu3i0*pH_ts#f& z;4i)C>~?rq9RZz+I9`H$2y}jeMAX&e1PBVEz5yphP#B5fY70RlNN96BUS8ZxK%Md8 ztbopzBI@878+SeytkJaa%S+;fqWOv?c~XdeEEGUzAS?TssOEQ7~rtI-Cu z`;Ki;8W?MEH0@hEa?Iq2Z?}894!7Mqc8o3HTel1W6Yrzd6FX*Bc()}PIB|(!mgx&Tw`GDrvgC(CP!>kZs32+#& zyc$pTA|g2qQ8AIc8F192e*K1kp|Zw1wdYF_<-X&91onY!wJB(bxlY^1WxE0 zuA`GIIvHVRm`d2^I?FYS}6j>QQFk>>-bLHm?F%C+-O{baqqtI%X#`tQ#DME@}@= zYzp^-343f7`_x0Ze|xYgkMF3z$Ghe5!R6Z^-FmKH!BxJ3vmNNVP1zYL#nMFI0}) zLGd|!(J*NO&NT56=`=kJof9b08U$9U9`Vf6X2)>e%Rtu2AF;SSFwz>BYz|B^kZTU) z?Ec|a|G8%WIXjVRC8nB*DLa*ErCw;JUU($?Pe?BDha@@x8x%Wn1q?o(Md1mMZV{_9 z)*wAqOM0nxw{3(kI15e38m#YS1ahs1x;yKANAU{zz<1RLhG^gD3*N$bV(l6R)e{ZQ zJMn}F*xt9$d>W3#d(7v5=6Bm}0;U%hAQ_8A-2g-38x_Cj2zb5|NBip4BE%OhEMi(W zoyalXo1*E*ryS!IiK3@<dN07|>S2#%MR2Yjok@2Ui@Ex#ne_ik;oM_R zp-FhE!ZF*K=-q8gL1L!*GVM;x9{(cDG4nvy$;0UAj__mbhuGfeIY_hTlc4B3x!bwD z!6{&;2kpU;JE_~Ly~HH2%8)%Y+8UZ}4o&YRXF%}mm_2s3HFlvnc4065;xC*V5+ave z{xC5pVJr?+wW1daj#MZ>V()K6dX`*!Z`6|%Q&h8Mnq@J_BGVZ@Ho%fZwv+BzR6G4Wrj$xmiZSjajtzDl z?8SW7TCap>Sw}H{dMcbngc+Tw^>OfJ&tR|6mq3P&Wu5^7HJIb}N#-w-*(WpmWMZG3 S1MtZIJST0w{V#!zZSo(5Du*Kg literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/llama3.cpython-312.pyc b/apex_plus/models/__pycache__/llama3.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f44fa9d31ce88e894de58c09cb212034c07bb145 GIT binary patch literal 3201 zcmaJ@O>7&-6`tiT|NksmKl~H5vL#t-%cNtWae;&Z+{6fw+Cl=`LBawdDDE<%wIr3< zm0L12Y!oh_qAluNDj7j)6$nrV%Aq|a$J}$#HZHhW*no?+hvu}1+>1|rZ+1yhD&Yb4 z&HI_z`FY=aZ@!I2Lj>C2>FL$)1BCn&gD{6TSO)@r;<8XR1o+O@KNByt$-A>e3vhmuUuTRLN}Y0@=8gowG}JW*r=D(a!a9_ z6-0hjDa)D_L3Zt?D}`5UvaB?m86&lZuE5dcT3J^dm5(a=25l@?`~wH{0gv~%|3@HO zq{DScsc-IhK;n90hX={imjsYp4-nGvf+X~9K9G6{KpWqNb!2Ys0wHunkM_pNo{slZ zBJ&;oQ}UUx>#p=mj?I9T;4l5XwumGwQ(V3s891oe7RCC8ri&|zDA#V*wAzZQh$}6z ze@mhipkbIsBS>&l(Nyzt&O}ZMEFYzoBSrdUHMhc1vGvHu7bFSZQ6|&<40VLQc!8h}xyxddmuwO0`B! zFO}MH2MM{J0uP`yf!rbXC`r$43crn;sae(~Pe2zlGYgx-qd?M3&tso3XU=1veHxzF zP8#78JLSKHPdy&Zn4{V4kGJbK;TEUNsrl`n+B`QR?eqNcm`(U-%np#$+{2)ed}&ko zU2GqgB$ltjVyeQUS64oX11|x&L;evTH7BRKlMBY=0%P*V zWZs;}bSKUk6X(q2bT_$VB$v!YwwpL@Bu<;z*>3h_Bm45RsQ*~VCjMZECg4w%e$ax@ zcTyF!TSV{MI%JqrdSS>B{UC(NP?wOQE+NBQ0&5|BkF;Ia{kJ$B!;!V&BSDL?dZg{T zpt|MJkM~TF!aN^(00QngZ}Iwg&vpYHaiqbY@yd?0;k%GMGC=%B>d0UaJN%dY0O*8e zC@J2Gq5=?bk1oU*N~}=?mC~p{csOw`KA5kIF#Qbvh5ChSM;~IY{>E%~FlF zBQB1al*3`a0dj{t4n#NQ-`9U#e-z04D>ia(b}RQuZfCZ8V#zqM^p|U2eel;0zPZ-D zbltdg{kzz^oBZS0*u87r@i}9BZb$E)Dj25<-^E@7ePU|o>cf*eZ#_!nH=~c^scwAU zh|imo>8;zJ+%^-LZeqbmESS@xnaY~eCm-Ctf7?u*dQmK3`e)_tb70_E0_AR4x`Ltv zrEgetuyRF)r7f^PNgNg@V1e>DELOk*rE*xjfW`k=6w+53OkNF0NJhVkP#oGt)6P>vk9 z6+o%20l7n-#>AbmKTrQ@dQW@_ipu#EIOIY8ets``8pJb6GdcAjdq2CESO8JVm>IE~ zIcsFj?xk~}$fwQpbT@s*NT1nDErR0gj5#ycow;DlT-XzT{55BXz0p@}zL(vk2x7ib zt}CUI6)KgWgl?$FM@prSHp;4N;o*pM682|>D{5KO>IxKLnWFothy=rcE&{Q5O{eUT zUl5i*8O$kG7DkZKeg)(%v4wH(`KM!(wikNaN5&JjAGrVlT@bktnM~PX4 zWR!uq&Z{QJI*J9zxy&CShuM~0@bkc{j_Y2duYwNoreQIJ#|XzgA+!G`vrow46Cyn! U=YV4ky&epjKM|n=?>w!vu z^1L3bhbke;2lQ|~Qi)JLs0;OICCVTc-9t*~2~xsp;F$B~sKibCle`%OjFLE!HHWEHC5`p>m{KmHK7zz_6hb*t(P z?9qfT_&ZTig;7w2RT!01Sok^kdH4hH2jLGX;aWn8JmD+68c+l^2pUx5mFN@x*dMAA zQbKCD#wsyr6#-rVJ`Q}e6&4dt$lrn^JZx6w4av~jsv|UZ>XI(ERBSlWO-)hMhG$4= z4O4~9DVl7mo+?7xZmP0kI5EO*J@lm@R?QvUSgVGHb{GVn9Odbsfb63V(?L?-++jgt zdSZtIiS0`~NWK#g>I6XI`?erRJrE%17ihyg3bS<%Q;XXwQ*gr*|@yGs5 zzhr0+m7!XJJsjTm)Kg76+oj5r;dXQ56V_6J2yrJr~Zy&%Qi!6BM&E3+62DquT z)zVdCQ*M^-s>YVt+%CP}GB=xzk_-rxw)Gujh^rGs8l}1i-r{!42}_dJ&`e2ckNJ2g z_B3Q0#x{`0$i0AO7WVkxChh4{~a z^53FMFUE8Bc>ds%gSv~Dm05ds@!&m|V^ZQN$4w+$#KjY?fb!zeyVmU6d;B-y=~I{! zIl(HuTh%@gKpBRgg6NOH?880n0FWL4f4+crxSwwZZ67*=9-t-QJXb(VW;<6vOQX?Q z^%u;sKf*c^*)=gZkYqRK6`B|RkY@svZO zu?|<&4V;BF8D!Ii0-l1(3Ds~DE@-Wt?0ME#ERdG>fILS34x+IMJ3ZS?FI(wl%9N~h z$)3t}r>c?_Hb_n}ocI~pYoe%Uo1Brw7$)EygegpR;o*zK8!w2Flf>kZohU_- zH`@T`B9AADm4jdgMBl~~Wuym*R~{k*W>eB|yO%tRRQwJ!F@6o?F?x|m*|D+Tt^H>0 z;BGf})yiExGP|W!tF-#p?{@F3TX)t^IW{Tmaj%0YIr;R1?&N|sxo~K9m&(>s`K!b` zd)$l6?BV*+#ls(cohj|b|B;;TCKs*bqMe@E-~Iiroym1G%T{LDo-5eXd3)~S^WA5= z_Vm)Xftfg+1|1t1cw|848WnONzd#ln1#g;pMnW7ckljYX94wIOMgbTsko87|A+UtN zG6LRU34^8g$bfzH3Rohar66nxYwZa?PAm@|1m$*og=RP}K9$=sPo!zT+)jE@FY}k% ziIu`o&MdbttQ7w5;ZSZ1D>V5jTAd6?K)fwuxo(u-FyfgcIwE*F;a*?wvEua<845px zTvZ=zklC=hjn8H_PqPZGj6EQa(W}J#;ibQn{#-hlzYh87#x%g)0!A}V(_}|y{8SjIVaP9(G)dY z9I|_HX=-5ho;gKYVe&k5aswmurQx*LPqQ@sIYa&XO=`6UCNa<%KhvKEKTJa+J4zBgl~JUdk5bJ1A(MLf+0M580IBf{3p8h60Oire2Ff-4z4ncu&4bOk(v(kKYhRM AApigX literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/mixtral.cpython-312.pyc b/apex_plus/models/__pycache__/mixtral.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3aae166e831524f3637cf5aa30a9311e8ec341ca GIT binary patch literal 4100 zcmb7H&2JmW6`$qubNNll;)f`5CCjpC$)sQ>cAHc!96Kn4)}DM-k_km)s-4GtOrwuwX}P9b%!$Z?1rilfdKd4_q# zS$7p(3_BHf-Ba{1>{7gSU(v^~8?aCm81^XsdY~BKh=cr+NZyA;^2x4U-kPr%+!S*D z{~(j&%;5FfU5%ENtIbBWb_)dl-_UYHtu|?0roj5Htjl+=HzipChQI#d71sUw1L)j0 zKCWGFz7NRtp3#_g2xSy2Ocjx zKG`V=lD|4C1s=k(Ww#WRJp)=&6{OHZ=dL|X(JKkEuj-J(51mB;upe*)aA4D)iUfs;Q<^Yp$=E;YxYET&ZcBrE0mNHL2;QvbI4R%N6De_7{7g#}hgDACPU* z=GvrWs|aa3fa7{>n+ML(=bXUVX9<=r;GBJ(8@Qfdk+ugocVFiP4rgVp?E}u+=lHg( z1(p)Gc9{?w)kb<6$tXAOI?u#u^tZ%5AcvBXo+MSwOyQ#?H%}ud?0%D6DfxYQsE7iAG z%Jh~@MQx?r5b-F)*IYj5rvaFQLi7lOR$^|iupRdBe%+NL6_E9GY4nyjwDw-!Fw)K;2}LK(uX zw61KZ$50)^Pc77~NX@Trn%+{W)~IQvQY&J6cD^UUb*g^{@_=*`Wcz2Gq0%Ce@(4XxH}*cplm*W}I89dvA4$cBNWT~_+Zi`m&hRf2q0(wb{Im&sQY zm1f|>2LeUK17dzn#{rq1Ds4iFthT~EkF*7O(5fE+c|g98kjRJ;pXkJA_4q6U1wCFc z5}8iooSrymWTrcrOM2##F_!L(E$CwlMl#z;UeuEp_t9oSPb}<5}r#TP^6r-Br;hTQu z^41p|MR(3cahpsY?hqi#y6ILwuGO0|JrCl+IC5#tQjNBHby=RV*U<00_*wnLYo#}V< z>39AadUuQeHX4T^I)StvNS_2#dSD7iU;1>3DMURW9{AlcVat6OBFW6N8_!Qazq2=e z>FdPht>AazR3|*6hi8oV`1bwJ?i-18Co!uhW{s?9q%ubK^wax~?;EMJhd~dZxIo z3mT}chGl)wK%KQZB!)pFJQ)Gc_Aj?a?2I{kRGcohg7d86v2y%kD>N@2E0Pvlqx0gA zs?^0+zOthiSAB>CeASOAK!lMWf`AVA-kaTXV=m#*CkF(&?tUi9zrfAcX#SYq%AjLMYOCtavyS4;WTual@GiRy^8IRuWpDG?`+bvYwLw`1 z$K-9sP7K0wwUxoyxh#Uo(OJFr{>*ZQpHs}h7y{}n5U6-L?iHE&H(7W^=GkxN70JCK S(}(VlI8NBA{g+_RM*2Si(3vv; literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/model.cpython-312.pyc b/apex_plus/models/__pycache__/model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..65d1a213b979010f90cb16c41d6823b2d11435fe GIT binary patch literal 838 zcmaJ<&1(}u6o0c{iA_IJickwlXWXLz{sG&w<+$;LfSFtO%X4>wCY+@tJNPN>RypPze5{EYiUn zjJ00LxKL>+b0L#u(GuO-S=>is3IamLA+&_enQ*wpA$5?;IjxU{F1Li2THKj9VS`xp zjYmWA_GQUMroF`QX%7XAWRcYJOf5DGHhzmugeg=eNbrHpSZ~JkPOq$9cpSV|LaN|soC(#B%iyU{M^!lt4#!o$EP@#2M8j;Pu30(B zQ-RsK!?A9pvdp7?da`{fzFUWyL`Co(es1oaHTM>1LqhUb>p!hX`n$4)fJ1kb$Wle8 zJCeHAZdFDDd9rmyY69z+t0})4O4-Ip&u{8x6cuqUqDZer5vm_$WUoij@hHyfOe@J^ zrE-DIb4e4HTLko&ICXkO|j W!rW$zeFOg&9Q^efZ2ue(OxZuyAI47r literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/moe.cpython-312.pyc b/apex_plus/models/__pycache__/moe.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e7d34f030e169333800bb9cd26cfc777a9217123 GIT binary patch literal 3929 zcmbtXOK%(36~4on;bZs`DOsXkOfxjCuC`$8!}13!N5ig6b0%XQ>h!Tdd?k^q8J89(F@MK z=Qa0r&iUr>Z+^c-p#7bmUpKvk`~#Ur6YIqBZ6NlELKIFTIWEC*NINt~j!*E6<~1QF zCPYRHnltB0xN`1qhvBW8&=uaWobUWG3nQq!t2C9@GX z6e$x{yt{B()v5SYSDV(Kk(9u$a8zF+;Z{7VC*x3pyFx+&+6#0S(7tUc9=4otKs8!Dx%%svrfm%+g8e25KlJ?ZMhl2|qy#3>ULAOszT!|!svRlak z9M|M{;2cd(0Iu#gLP{cVLR04it`R?^SmXS#XIQg&>TMEttx!Bj z(bsaVa#5CZTe=~ysj`xNFRN$QG*w>PmYZIXi@ug?mWu;Tdo4w8sZ=)BQ+XLLUw+Na z#U<*8HE004Aa-5Y;Q?tFI}AkH1Dz%PKBaA`Z%_(e@}R?+Q{x^>&{Zw-eW%g!gmR<3 z{@MKRbcmw)jcrZU*HeZ08>+qmo14G6ZLAma^C`%V@s!AkmL=a~U%4aZ$R!-3O}-}r zYL6M4L-Ei{Z|^~W#XEYK`ocT)JUnEEqX!=xwWfax$C#byaO#AVCf_S6OTQW{)>CUp99CRS7JHS3{EJ9 zU%DBez#s~;Ech6ReZZy?WCG$|djn}R15R}yZN~l?X*23ir3weu9ppvW59AwP5}r7Y z+_wEYB+jqA!l^%Vf8QvG3(V3Av9PsKqmC!tC`uWInm4kAyp1d@DC45Vr!}2MVWqa% z)bW~FOwmE0ELVmW0F*MtKs_Yt{0u1dx1hUAzUd-e-Dcl#wQsu8H_eFoO5eQMJ5=qx zQ0cv3Mxxco`AX!x8I2uIR!0{r@QW^*kufuL&K#OK33@(~j^$-(VlU3N20O$X zv4q@u>Vn1Rv`t1{1hSygSy1UXw~gj@>WM}nnWaTvJ@FWK4Gq7B?k+Kd-Jk4yv~w7$ z1QAYyQ)Ty5IXLw^7{0f-f9?LY!^P^@LS<~>%i!Xk@H`N@H(m{oR>Gr)H>%@TD&tqa z3|!sgzth_Ebya;s72nWVVzlBL1<5DNA1yP5T=B`rUT09+<4*#lKYFS&Op8t7ppccsCt zBye>?0{o6LBrt)1@)Qb}PFMg10TeNvK${g89nkweF5Jb1MQViSvo<1!Smx}&bR+ZSezEt$tEGre4ih)_V zrL0*h_RPxPDj1nnOEI%lbj`BLWvLjRmD?)ErPo}Xj=<*Q91B_<5m~l_5^4mhZqhR~ zf-P7DBmGa%-6byr@?pT?KV424Pf#lxIRnF zXoZwPhu}rao7Pggo>QTYQYZ!?oxlz=onAr%UNWnjrh8bt^s=X~Cc+4X%ehF+g_N6s%!8Cfdi!f$q_@)Ob3E%W0$MBD#qBV3z!9aVi zS(;EpS*y2$NhXu0409RWVjfzNGiEJYu#vZA7Q4EgV*^DW*dAFz3M&ofrPuDC*kN#u w-T)bfx(;;&fkKqyUXam$lJOU0_G_|GAqy|a1)xuys~jio{oy|ZM>f-c0l;--M*si- literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/opt.cpython-312.pyc b/apex_plus/models/__pycache__/opt.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fea18c3f905ce7a770b9eda6f20d9e9a84418f13 GIT binary patch literal 2910 zcmZ`*OK%fb6u$HD*q-qtF_1WKCxnm*!KNvQ3P@F2s#YzeNYG04qL$+s6O*wW?o3c^ zj6fBtqN_!dUumliAuTX07-G?v_Ax9PQqiCGGwd@0 z#b7$fuw=+ZC9N>*H$ug5I?NHCd`7guJ)#Bm@VelgmyT3qRe6n!$~of2f@wh?{Oa=6 zPs*iy;TkZ$kBxF}9)NJ^^Yg5`co{n7D$SP6e3=$?3T*KFg8u!bvZfn3Z)cB&n#|UB z0G5eP(ws)pyvFH*#=|GTC&K50PtyGP5L{49i@FakDS?FKMNPgZt~a+y`!z`qPG&Z>rA{>g0;6FrM@;n_o?}cp__Bra_WL^&RgY$)a8mbS1zTp;53;9 zW6|7&is(dUs=Qz&7b;F5lPQ!6Rwh#ofpL@D9A4GD2IMYrJ4oM=CGqE|-8;&Ih$H1#P~=*s1tA2o=N3N#ScV|0foog`yuX2T_qygV zNL$XZvjU_oS-%-b^7^h$_sy#!a92-f1LNWJ69Ta|JINnsIUfMOn+z zOID#=@*;+Lg~~fZ&M;{YY;EFG5((M`o#W5bGK5yXs-TIQ%we2$9>`ttS2SjK4>Y>R z>)qoFr0U%%yQ{y^b)w#N!j2C%;uH1wggrR4Hr5z9S%=@?$v2ABA-hBh$}|qUX@|C@p{u5w>irXd$I=W+gXEL#}oIp)_Q&?Xdx-^*Z8O0cDUXa zEQ~DHuA|THYj7Th+P>EQ78O_P&kZoL zofukIFXbS}Xl52OyXl1Y@ZJkMddNOo3Lh4kZ~pj&p7Rxl3L|I`5AzCm$r&MjInj^^vJ( zk@uGbCT+w<>amfv3ysk;_0coWB4o^{r>l|4 z#I9;FT|GFN_+KrYe#@z}A0Dc5%x)&eTGL99o2kA)cXG4Goq-u<8pvJpGBU8Dt(G5_ zHwKcBX;ZzRG)1jmQ%;UkUV z)Aixg8;K8ogB<1*9W~T zkwbCUk0?Mu7DObIt{zuG6e9gYZWvL7$dRomq6s2(y1d^fuY3z$=93?v`Ukr$VO#FT z6rX(m))s-z?Zt_Q-{1`IUU;P$HR?!KWuZ{IM$sLekhSxV%w+F#Vyyz3EYKtiMHZk= z|DFMsE0V38&w|_OYBMFDFHsD2C%$KJ*TG)s?^^4X^CatN5Y~G7Jc$SsJJa#oz&X!) uAJTImLmx9SEkI4?xJ}aiij2G>qnl)MlZwr#N{@{g*(;_W2LmP>&)2 literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/registry.cpython-312.pyc b/apex_plus/models/__pycache__/registry.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..07df70e1378f4dc99544edbbb4949c39c377d733 GIT binary patch literal 2581 zcma)7&2Jk;6rZ)dUdQWiH;ElLwe!`~6>(^zz@dDoP0}`%orE?N*o(FGj+0GhcdglV zi7Q76sE1Y@5E2rV04WC!LFK>^@h@<38lhUNTB?8^dNZV_o|qZi#A#BAkv;o+AM@tD zH}B2-=J$I68NcJRtA7Xp{7H$<2(-a|j04~spa6wKK;tBiqu7BQnp1Kz>_je&mw1L< z$gK&Iz%Y+IT94Gjup4|DT15ki|*aL%5 zgkjhV`(Yd=;2=!GAtmyiUm8}TcY!pboPeWBFXZ4@oTNhOgJViRJgvkaU-T;RyDn+G zGLTL@r)f%awr8&nvdC3R)S zP#~cMLM=e!V**_So^OAzkuRUk%DIaKxEGg}bH*G&PJ+ls_-Iu%OU$fyCOf}$Q#FYX zRFG3m3o~a3+-O6`@-(%=%uS{Jc8Ixh*}yY$*_2T>X9wxl+1ynqD>>p9vU?Ald{ysS zHzU$^znd4zB3YBWIRWB!H=S(zPkDx}sI3fMfjY!8#dZ&ks^v+f9a8st!eg;@^9Giu zUu!PRJeulKFSM(ieMq4LC;9Kw+W!Vvj7g^(Vo;);_O^De|_%C{Eg-7pZo~+ zsrcpGL333$CTF3!ZW*P?rHZv`=#z2@Zsbd-Y<3}y=GUCWaD^UKB~z-{ zkrim=89(1)|os~!jsrp$7wWMB&_m>$BNXpWZ3VeLDC zlx|oDnrSB{VFR3|(^)d5J(zlE2aDLy@+ETgSXOnY*iHm>+uyylX?t(Vs0`;YHZUHh zs|2=cwP^E}kymkAz~j{L;Vo=;f!%Soi(0e!Rs`f70jbu&W7L-KpsH1j-zF^TpZP19 zw!jM?2*e)y$9MhXJKjD2)Yj!Eg7@BAkHzG!nA{Um+vgq$qfY{{TKwu>V7}&=e=3Gs z!fkF>9NwC(3*y)B?+U|ICz08)c8WhQ)Z#OHf!Ug8_Nfr52?Gz-cRqM@D)Uz;x$W8w zjXe%c?1m@b2#)mF?$|xn2MuE zytbxsgi894NLa8_I7+0fA=ou+JIO_DJ5}AX`64o8ixo2-p-Q84qUHY5^A3rwjNp4X zOGw)E<_~0QIysJOfW$K}@(jG$0B0ItssSb%;57RYa=ZaX8{iGbCo}mCH^AAZ8^ll5 z`xEuSaXJqS)nkM8zIc5+Qx7weaE3?g@#No$K-94n{AZZs5=~ExOVvZsrjvZ1hT{)L fz8Y-u1QTmwpeaz~0ey*P4@F+!4!;lyVp04H8!XFD literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/t5.cpython-312.pyc b/apex_plus/models/__pycache__/t5.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5b0a65b73b9ac0d3b013f193d96addadc51bc71b GIT binary patch literal 4218 zcmb_fO>7&-6`tiT|CSvQMnhN`ra&eMba!>w;gC_ z-h1=Sn;Fjg)1Sj(kwE*Beq-ys2qAw(#%bO*aa082Hj#M8R%p3%H2 zl)X7`*_ZP%UQqqzKrX;&uNo|iIg!ymHB=tR4RFLm-XOC77LfzWz#(t%lMC+$)8gNe zNpogkWBF>eQY>8u+IvN<7IxAelfU-n8gL~TK+Ioz`yCiU8#G_hid9-xD6qk`UFE0O zs!pFrlgf>iW#fy zl_jN8fMZfg&F?EzGY30mTS{Km%pnJ|Rgz_;Vy_){gr$nEz@_Cs*~k?I}X)9^gQ3w>l4;$K?dz+HVd> zFK~i8=L62MJ5u)phx@Qr4*=(Pxgc->mlNuI4UjUovqA_R)Q6lI*;5xjA~Ij+KO!Fs zhwU@{f~(^SI1^}bZv{K}|BV)*9sn)gy1z#YuISOSPtv}1ir1|TJ5SZ|eu>(AqP(Z+ ziA^OTmu{4_(x$2;Hun?HJb$7#?7Yc;y57a3`6SXIIsnI@VeG)W35{ZR0=q%%V%XtN zMbSM>@dtTzPkD{fDm8;>D$J_VqA6&KT6`g08vdvWw>`bQ_qb@AJZ zwxd^f7vI^}x2l!JJUFeitL|wXR2iLEThx~`yZdH9l1i15E=je}#&X7C@Oqj7-A!VR zk;Νg`|Y%*zMDgFw8MT*7g>b^08RiB|dwj&skOe;k@_B$tfP(!Kl_p_PZRsa9;R zapslA+^ZJhF3h%O&oxpj7S9c?9`XE8#3Fn+Vg<;=OO5QBF}?;zi2Tngeib=_OA*so zU@@@ZIu8g#@G-}OI0}yE0TZrs+X(MzAnjb&o&~AvZpRBi>N;ANv=6oh4hneJ`MU6~ zGj&P(X!-x__&IkNw0HU*Nqg4Q!56a9w2#3}5q+;6hJ#4kaZ-?_of{_f#Uk?;%s>TI zqo-g;4S(81f=<9-`iry*i;6W7Y_m;g5roO+CiyBFYmJ_2j?NpS^R4)7GoCf#+17Nb zIh`@4Gp)qjr=d?mcZ}pzPWsvh>1Q8@ZA*He(~m=NUnFSlV;SyxkB;`s8l()*bq~B?`$X^a zIz}pcj%;JV>S>@GgI7;;bqig0d-3(&uDkWw3HHa3+0)$peI0ibAhG$b^BuSI>nELs z+anHjJc91AW6q&K+Rt~$HricJ!!)~%&)?JP{?9#!ZSMvz;n%UGDk!R|Irbmw$S>Ws z$tG4_ni+Ny8RKn;lWQDE7oUz&+_59;pxM}|2QNf1$@kBGtwFt+Vbmpr zeF@!7^7Zieom6uoYfNPCrJ74`8B1^7AO6lk@L}xahjMcyV~k|(3C--fkzKzZ`|g4N z;n=C){rH~0kvjiiZ1o`gwGCS^Mi%bM&CKgY=JorrHvslusQuyH(jP_}skH}VS3q%e zq8XhtqI0d$$=e_N`h(WkRC6qAjAdK#)6MufBYv(mai)`g682+w@b?Q3SQ4=C@JAq^ z%7;3xUowH}2Wq;00fSXnea#eRpw{b`SXeF9H&DO~)PUQ4(g@5zJ-Cf2zo)@W;NvKm zc5uBG*;t-GF0?P!PA+s6R(6oTSR3vZvv}`<9ZT*3JK{DtvXJO-8>aG^()r?3E}iyw zm@%uuj%zQs9!qn~v^5Nc?W6(~4^?)lTk+V;djQaW4&6=iI5Kr-Yck%PJZnszZA`ojOO|aNmyP7* zM&et4;;gVQe3i5KK--kk16Y|=@?}MmOi_}cX4_Md9+0G;?B&(=3=h8yorF`Hp@N#% zw6X$KpG?v2XbL;@OS*&rUehUE2I@1yavAedib;cYXc*e>p}R#aVa9ufdpta9d6BSu zRXg SJn_BDapJ-FKM9UZ&A$M*rn77S literal 0 HcmV?d00001 diff --git a/apex_plus/models/__pycache__/whisper.cpython-312.pyc b/apex_plus/models/__pycache__/whisper.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..640db5159833e3a8fc9d6ac6795f0dcba82b0ce8 GIT binary patch literal 4262 zcmbssOK%&;d6v86l1q`4C|QyvSzO8zEw&0J$f^`Masat`A+6Q)8I(Y=rMOFr@{&|% zskCAlL?{eYhZaar3J?THPj#UH^)aXXfn2z?0}=}xb{QHjdwq{L-84q>0}EAd&L!MrY% z{8@h~kPR?g(8W?P8)UFw50#{>#NdD)E)8S{IN~F(6IHxJ)SxzS!rS{~BL|_h^e;rx zoE3R{r)cbH^xKtkp?C`j{u_EFe>?57_?vHR0#<|x$o$Rkz6DeGM>JP93Kd$?D4?Ou zJ?&RFE2^gFZRPej;vlDG0FH@9vYbk?K9$pW)d!u2EK|fkq8$sFSv%mMp>0^ulxLGVr6yUVHk=;*Sk5^2OT+x@PR;DvPgZ z#_eilZ}F{z>Q1G+m;=vM_Vj(D11g~>8;ifRLqcZnzzQl#v0SVwN^Q{bs*Fp)iy9^j zhon7DCg+ZX-^E%pD@VeEV4{^=#(BDRc@<~5mA-*<>g)KA!_$rAvKd~!m-}mY?O}YX z6;Cy;Tx+CWY7=f_t~IyXm|bi0+|c?d&ksl2gpWkqK{D||V`T!S9&)eG*z-BA}hn?d*Z8RkMj?dQB?7tO7wVgB94=kIBC@e|*P@i zbij6@a7W_lE?Ty+E;Fo%>pBd#MO>e7MY`~GjN*F;TP|E6hB` zR@xB{Fp;H$^dgFc;55p1v9y!3L9hw%&VtIg4jIhwgA zG?%u_rLFt%?;VK`$1i>S(|clL_QeO|>qn67+8$>3;_5WfGV$Ff`QryD!zVEgVkPrZ523B3HD1itkUWmEr0_R;%=Yx0vxCo zzk(C|C+4UswBv3ah1ywphA)ujn0*@<51oVpr4ZHjXm=@OQ+@za#vu%c;s- zI@ZnPdPBbUX|f#&M6S2_pkq$y0W90fxss+RmZT_9)$QvD4=Bnn_H(+!;o-y3NjS6> z&g(hDC}~g^suXRHrf@(9r7I}F8&yi7%qM>)EU7VfrI+m= z^mL4YZ*U|q)b@q_(g#0l6PWHE{2QlJ-lexE;(qDhD{XfH2d3QXgqt5r@;;TrC+A60s}4An1cbzWsZA9QvW2geYUvEGc^@ zQ@MI@iyvHCXhI4$q=7y<7}7rWf9Q+UBv@SY5Zb4_T`EJtPn|vK^kG~K9k}1j&d%)4 z&dl!p76|we&@=MJ%AY(!f6;+ULQ|;!3c@bZkjCgJg;;bQY3w%AIE+?V*SO6OxQ3i! zF}vz%>s<-7qARyKXq`yf8Y+Rdr&~iu8vi4o@@fJWH4kPpyyo5JQ$8my5#sV4G^0xS zON{J)NnMg@iY~cP*M9)bF2X3qXeh<92y>^!cF^=o31?b(&b08JX(2+?tR>Fn*VmUz z1)P+9mZ&H>bsZ~;3GZt-f-%v-*Xbp00AifQbK*Hv=` z6H5f`avs16G$xnQs%c{K2-WEckLMDf8kiV~wUUmF6*Zr@hK)5dUr5|6nJf8RLM`Ag zl!9I~I;4V1R9(k8bqQ%+kpfCEFaxbF(E^ihhVcva%l$iF4662 zC)Pg%*hOVz0&ma;LxN_rOQD;XGJ1d>vIkA?ubH-G8AZWm3CV17vwtDCq3T&J-rDza z9O@KSmX>cR%Quv{8;gs|^6i`Rmj5$VFXDMZ@yRPP z_fs<&U<=i}p{EhK>Xn}dBbC_nerbQL621H+I8za4o=fNVZdav=3P0gwsH?Ai;7+Oj z4nVuFeMl2E26GyVdAPNL#%n?b?#{N5@@PEfGOTlhyd?ozEjX8ldZx|n2A+ciJKr41Z7mBU_d=pF2cuSL zN=RmsP=1;pClEr*L52Zj-r^0cXHKXM8c7_ek!!d&*<99Cl+AP9$C`F0VqkB?0k+Ul z4;r4@fB#AJ-5vhBU@dlWhkq(Y9R5s;3r9A)+Qn35e<%c z+H3g$-cgw8Oki^fnBgA^OYqkI4^QFrXq+{@(9fRaOL~;eXm?sBTRl0u8N3wlsQT7Dwg(MGZ7-EXG!(Pxob5;cU|pipfFuO>#*YxN#?o|ESt9hG zY4P(=lExEz%9BV2z=pg=!MQO`Jl!!Ki&q-IkE`S&*wG#^(g2W5hWP_YM;@MuH(ul; z1fm^6eZz<0@oIRy7LGQBP~cWfuYAHFTR~wv#4(dU_1g&8exy#> OKRxG|;NJ+qInBSz8N5IM literal 0 HcmV?d00001 diff --git a/apex_plus/parallel/__pycache__/reshard.cpython-312.pyc b/apex_plus/parallel/__pycache__/reshard.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d64c17b68253110411b97914e485e62fc54e966b GIT binary patch literal 5575 zcmcH-O-vivdB!uI8GCHTIQ+#SW+0)|B(Y<+P-(RaV)M6Y(xe+UDrH+)jy(Z`?Qz}= zVL^_PN$uK_8Rq1{I!E1zQvPjV=IH*Nfh!I+qWUa)(`7l4kfLh*k#t#3CUp+?AJ3*GfN@-w5{Z%MgcO@jNV+8&gnvk9 zkfctdRMdh`sx&;N;J^C_x@Sa&z-%3E8Pe?S976!}Q$Hq%Pf4YdB+Q}APps`7A$*p( zzPkoxia0YNsS^?sk)%vSP%NGr7vqYUmK7y_KOy-MCKvOUnofq1lupE>5yfXgSSg5u zPQd`8`2DrOJw-xFU}`pjPm%*yBxOpK(}7>ksuOZ55J^j)hSP~@rEZjtpa@7b5vU;d zr)Tx1xB_?#S|UIIa>DT#nKB67JW+BHHhxK16tX=lf#rb4`Enm=oIlTvuJ!ym@Tb7n zAN>8!-|lFkk@a7VYN62*WwA25u#-5T1AL&NgTL6Y4pb6D;FbZIzN~0VvHYC)9SPE` z%*znvev+=AmB)`Q8OtPS=x}>Xseq;2GQ`UYuBRmG0R0(eUtKLfCo#*1+RMruwEr_K z^9p5VyPFPeh3R7&xG;ltnZQ~zbjCVog%e}y{B0x~+V`#{0_RovKJ;xJ zEfH*!ZzYj8_@p;^G@GHlZyT``V~3|+b1}QQMq}36`~Slie9*Yo-ikV^MqNET*e;fO zJ?#|uvEzB;!G`u37FSQpQIf6t+!_ald>f$A&|*7K2RIKmv-mnr9^04ywsHL(+Yg;S zAiXz#SM?e`%Fi?)(Yok`8rS*{&an}r588{`UDdof_G*+PO?@!-I6i)EtD4J3_w#cx z3aR;Y3X)YdE~ganfn1;A`t5cR+Atbd-ZMsc77e~94vSGp<3**C$BXJjL>1FWib^p_ z5u+i04LWoL^aG$lo&n%s1xtmv8lqYi@xQ3VWa-o|Ps&nQm6GX1M3pY; zG?t{!jZ11c6-i1u4Yj}&Y>%WP(YQKWArV2gM(9*Bo610D0ko`-OFvOU@Rn|${=-M!9t7n@v5@}gX5x}Y^(n7>}+gaUU;<4(;F z@30+N_iMJZC^Rq4EY1{!E=}m#5W1h=%M1PMT>rYzUs+npEM^wyqNj7EZ@I7F32L6; zf>3PpWW!q9`2~KPcP~mcpm!W(CU!p3M~xpcur@h za={JHg@v0tE^krn&W(Nj$sT2CcJDDna~HgLKD=rzdVP3wb5+=5>1KDyW@~9G5jJO2 zsf7Su!i(W7T6wbkM05Jry7SJT91qb)tIW}e+6d>#rvqCyPd4=0=6$C^C<<)_LDU5C z%MS}@2eq?b;(vy4m>dt1%t*w@x;@RFDxq5YN>>pTE zt!*W%wZmN^tRS=|qP>5k?cBQST+u1KjbPq{eE7wq4ey`{^5)fR#h&xUvjI>CM$cmO zETDJqqu+LPJRR6(ZJ*D4HdA2RHMV_&J((TNv%MvTa5NdlaeqZ#mh=3DJq}cz3%=qv zU01eT-pZ(CrLdMM&@%V7THfDWKE}T3x@?Tr$QOgJKG}5LD6up?l!im?$2&Cl@WsHZ zo13l=>*%u+oc;4BpFJrT@_E)*F~EW=pt%C;!o_v&Vi9WnuEnlwXk}=5NVE0lB6(Y& z2$Hz7c&B2$4*vpKyEy6 ziOGapz~pp4?JKy?XznwcjUUW`Q89|AuHHls}0_`(}Hgt5IIZ1UXxo7(;E+59bvC~sHyvR5R=#q zA8I%sgu5yFHp0cI5s@JDUGuT%G|Z6~ceo5hr*MazbvaVts{@E-u$zFW12L&`C1tpJ zgv&*vTY-V{Yv|^QZLYPzc{I*b;Ld2=nGLQt*P7@2MaRj4!>c*G1xKIe=-Y7ka-(_2 zz_!p*lwg{4c2m;NwXI33X7#|PHwfT70QK3-=a6hp8?SRs1* z{>_)adf(+YA}1E6!GV6t1f*&Q2;V5_^nF=Q81We4NM$hef-n#oY<|zZpOo>{<0TY? zb!|POaNmX0}1BG4oPW)6OuNNCS6)WN4mCjk>%Mp#K8u7ujz_q z)M)AhlcKGYCY5T+N?#UGAI7v7`qu4f(q8VZ)7rXfq)6KyCT)^aDpl*#zVBT7at%(T z_F+5V?|kPwKlhyP|D3}rEEZT9HQER>V$C|%yR(lTIIt|)rota zP2LnzD;YuMlF(9}i!0fvZS;9{Sp?rHO`idlo zQgmT8BS<$BOVKGoTF5OeM`u=ZHsaUR*vkHab?&~kgVvv0JNBTPB=rMAgNGkw$5}u? zZ-E`R06SQI13C;nIy3hBp@+T8&jKT+^PR?x4HzXek@pM-jl!AumI;SV`+VGH#AaAj z!o3B>7JztQ5UkkXz#7?y<3|A8CJ)^MTdk#VObN&2fio4tob{Le!IFPO@sG$|k*!39 zWn7~bmbKZ%R$QA;~vvDb%7bMDuZ4+gIh?XJY&Vm{vL=u?^ls(>( zC#ZOWCC>@Pb7H5h=y_(huN1qYz`y5;Y`sD`@P9jc3vPYkt$1!}L0B}LUZiI*9y@@? z?m2Yq`nf#~Pku`AoZ7iq^u%_rl_sVY`1ef9)@kkJM)k+65dKDt6~Q$K+RX%4IS}iLDapPktabI*+3NxQ$sL93IhOD2pubhPAQ>N zrO>1jnk-6T5rQfmkcY+c$dm2vij(!@tOEz5)gh^;T|Ya(i`;;uSN) zUuCvNybF0Rv8VTx} zD_r9ZBiS0Yz%LA-bNd1L8Dr?@8uhPMU8Y$agmuLs1j7i><*RH)SX6mrf)qAuezVaq zFS3k|q9&y=&kPG-Y_9oN)Z)Caj^%n(MN5)i2LMskO?+JtRqf@@KBzq9z%T~au5$Mv z)}4P39Np}{H}ug^F%XfhU0-@y-aA+Hgydu6pLxb*>$vP0N7_Wub5!m>{h8;qY(1@b zPHRet>Qm>F(Y~m>TEJgD&NHPLoYyEQ>HZ1QvU#RpLT|JN`OGaCf3QrE>!+ZXD{!}r z8dDv%AKpNwxGP!;rkc!PHiYXnbAc_G-!{E%!W|X(0yU64M7t!LG+}aar_d0mKHA}$)`>G|C+VT|Qo_}S!J zA_zG3Iw~(F7Uu=3FJbF`L{%LIC#iZh4;;@5*(Gr`z9NCSsjfPkQhkO1O|?gxqcUlg zrxPUHGu5M-Dybdyf!Q!Ef4pA79y@Rx7L|Sr035ilv*ZgazVOyj#W!5?omG5ii@tN~ zj&e_MBe{7+2}D8B%H2mx-6!sMpICpX+<&sve?jTL@ab5w{{`6@`m(Ka@$7KCLd6Kq^V4oyxF{nCjO|_(@cp{fu z)FSOuKsu!M;usEW$}Qy*8G|;8&mg&@K9@4~qZY!=29wjnTJArT!x^UL2M;TH-FFOQ zr{F8n9|1f`*Ztiar#8Eku3_1F#Hj1uV9DFBc>7D<5yd-l-y1DA9Vritl?E;=1DF5O zRvfr0JNu0W5Zr7oHucKK(EyHX2GFY+fDH}6O`c_Dm}1wAa{lU0Zewcmse9)?I$!jk zl+Ry|E25UuM# zY2+enPPkTsGd>L_fp@hA!MQ-4GYta`6tLRM6_^xTFu}c!E3g3hyF9vaNOYN$xk~4Z z^gA(7a|UjJmg#&)o$ToqP>Z<}c~jV)ZwdzMeC!)}_IfO+GPI{IgLwnAeFzvJQmu|6 zM&tVl#Pl=(yg$vI*{n1#qRLXerWt~S+@#J~OFJ<^Jsjk0`yf{88dAE3itWQ+G>@!5N3WPs#UI^a6#q%^zIf$~ zDE`RSn~HxF+D?z$*1wt9oZVu#UfMdp8figRLp@^L*spRCwOO9ZmrChxj-RSmmE zOzFj51bqlN1pNq5b+mjM)rDr`S`K~&o6jLQhJYsL#}R8qfVXQA6B`PUF5L!zX}Fso zf9Pqg@K8e<-gcy7LCi`zx;K8T92uzW_in9J z2vobRyU9=B(E-qk&%psYYDS%$gOFuC)d?v z8rfben0X-czsDmIeFxZP#Y-?IifpcP-!;`00G#!EICDP%ngu zylXg!veD!)k#8Oj){A07#77_T3W9b3hUX%xdzyJL9!9no*UBb8P&-hRI(P-*spYfXne zsu>=%;Ki2u1hqwfRg9#?2=&0!ZK%z49q5a-D}DPu9kX71e2m~tQZ&QzYOp~seax$k zrLO^M+y1bXTsk8~=p&ovkuVx*K8JDuozA{9`qp|@dv7==&O!%1&`3`MfLA+)c|h9# sLHZt$&;xSv0XhAE9RJ4L$((sa0DKc0WxS6FfNyRxEsXsu0)VdjUqM7&-6`uX&@<$RSF|{R2vZzQA*K)|nwrV$s)1p#ZyN(qHw$YYou^_l(o7OJL z&aPaOpbDeVK?fgfpnwbn1$5{^f_qR-?WypwKrdt!SHvPg4YWP(Bapxxv>rd0-JotVTFEMf~;!4|WkEoCJE zWuhh9N><^#WT|#28{)idX?8dp=DcD>>}WPB5Rv?lnCdDqL(j!v_iT*IG?0ZmvYtvr zkNz7c=#nQd6!NYoe^_u_Py49s7D~mOr3;=sPc7cx$~j9g?wv1L7R|eb2Xww<+aOUE zZvFJe2l?Pb0af<~ttv3C79s3y96PPe5|_^6Wr@`(5mlAK+WKhs8=r_*cl zOKqeDa+2>j0Rzk+cll4itP{7RtC9PAm;yzoJ>S!>KxmHBn*!!*MRb++5*N-v&}!oS zaGT%npYO5lo!kA-;Oi^Wid>VrY?9mBg0P~PVomvkm{Lvng#1>%NpyLUp~=7v>o-@zeyYTPaho|6a*1 z-K9lZT6PTn4ZfL!@r0a1V*|JaWQ)-^E=ZE4Q+BQTpyflj!gZ|sn5K( zGkv*!>ixQQ@>PGjp80M){k<3Bi}Nq0>U~%0+7*VdIFNnJJkS1FV7__wZxR-V1pxnV zLB}1}1s*a7@v-N6-(Bh&5W&4O=u}U2j$BI2_m{@Ls6k7&+H1di@BBr3Z#vTB1dc3-%UH?gqul1NtMl=sBR~&|o_3;T%E6 zz>$wR)v?28_rPvW3OD*C5Wq+Jow_#OjgOJhwd?;IA16+&*PhmX^Q&fJq!}OGNbDvr zHj)?Xm#+SKZYTM1BmVKz9zTp&iIASD-PlYcHnVyCW$azQ53v#_@l;)#=2+4Du-q1N zHZ=?{Vi)U%c{M!t%-xvZOl+E4!y6wpPG4vY&-n^y0Pn*oKZKk{ zBE79Ja&c9;Dm;6$MX>kdf^ueKutlKrRZW@Nxa||@Hr?&{tx5}7KM_$1lHz60~vyFeT~`R!sAwsYm+nyn5WHYnZEUBqSR@FVii-eg@%@|Y~@VKxgwb`HsT zAYROIb8aDTQ+R)w4DW0 s;QPRr1VQ*anQoBjf02nV@OSPDGW}ZBh18b>$m^d7vM})#0m5DVFQsGzRR910 literal 0 HcmV?d00001 diff --git a/apex_plus/parallel/templates/__pycache__/__init__.cpython-312.pyc b/apex_plus/parallel/templates/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..43614ce20c5b77083e85be856d6345430ae1415e GIT binary patch literal 963 zcmZuv&ubGw6rM>o$!;V~)Y4k?unH|i>uydJ+A5Tq)4N(6oki);c zasojVn_yfOp)9@?O38+lP5$9Uvd|Y+OpkKAh%jb0Q#6QyF~S-puvhPM8k#mAPGE{piWHskc;1kpONw;`_>iok4?ozs1$TA&GD zT#SZEhoOA~n~<_c`Y$g5k{MhL&hN<;hscJ^XHj;qZ`w86d1VJswhar-%2~k|cM#P%B5_uQ@KSzq z%fNM%C&VGy2od>}#7TlYkIy5^XV2744-rqTH8Aq3hNC`4UX41gT5Qm&W2=UXUh6Kd zdlx)6+>knKQPp+RHmR;FZo^OUUWOr(W`!hnyeCZUfghRCR_a~qeR{8OoX8xF=DLa8 zN%~&9&`r;D#F=C1+R@l_SDHRaPac@vbgm=jevRDd+*I0)cC9lp`)y>dBhH;NF0h)t z;>T&jHSHb3m+4;}cB_~~;jj27MEIBYr!GqyY(X(7MK(1ZJ9y&D zIfu!WiXAK=nEneO8OBdWm^u^ZEvH<^=n)xbkpBShK2!ZDgs=z3e}g+c@UREgdSJ01 Y7vcN|p%0ij80}2WbZrs1Zfh!%7O?Rz*QATk>z@3iD7JPCDDLP80m?%S$F~LB#_|_! zkn5Ky^KGzd$@P0SDz3Xj!uLcZ#9($Jnb0vio78md{3@d-SJQDNL}BLE{3ujxB(AMM zT|d66C~`tiK9t8-Q>hS*#oKW;4r5A0PGyw1E@Oen-il{3$@CJaG)%}}$JZ3SI-jtD zvcAqr=cpeV&ykFxl!T(R%t(~XO0>*LjLgf9R+#;om)Ixli;69tf$k{>1$vW9t8*4NZ@g0AS1$^kEd=IEa+ahgTTWb{iu3eF{ z=C>$yQm>}<49ORir80qjODE_hl%Vf5B9x=ERBg)};O!Z`+03FGlcR0c&ywd6I`MI@ z0c^8bCd)peAJGIjiQ1?`D8VG?YtY+lWYH4Jadx{l?PgP(TP7HtD#;ICD74waBAbPp z2-&ReR%0UZfIR1% zwPU~cGnjK^oy0O(;|5u$E6Z8EWx=+DN>hTJ@S?J>mLWf<3&c==dJG}(;0oCCkagvp zn>Sdr`DYfTU)7H76I^TVW*e^VQ@GZa{}fj?>;C`mI*Bh7r4IUy@70((&K=ICG7Y=!u_?+M*t z2N2_za$HhL@~~@EQ5IxrEg=JAyAr_MF?~HFtH2c~U!)x=xEqO238(c zdYl?hFUgn(N|)0T7Eh$X1c+g`ChL;L@|c$&=|qY%Eve)%RLRd2@~C7#n=UaG!{=-) z9kcA1?l6_aK3gp2oWJz%J*};dF|ntm8?86B>2e z$`1>DztCh=>swh@WbJ-@wQoY!R`k_O-|cn%{%X1}o{=BLGRm4}Yh@}YV4q$&0%1}I z5Xd1K&a7h>c#9FY+J?`5UWGr5lQ6WuK&pO&-Utmvp~Db5b}s)*=zY_CuIxk}u_y!$ zA-Mf`U+6jJ(a25eC0o2XZ`__^y?CG)_%Y1y9$vErIQk z!D5?dZ+>f{=nWa(Q2y?|cc?HH0TYdvA+&H4*a>uyxCf1n!QID3$Jk-(_||C28_qx2 z_YUmN8{VHDc!|%z1O>6R2DT@P!2tsv|G*aaZEO4XZfg4-n zC2voD!0`4Ky+ej~Xs`9<*uHnF=$$jXb0vT95ANT&cdq7}3;y0?hHmkJ<@24-Cf^Du zaOG_?@`aBtnm7vw!?P8HX8~chD1K&$pY7Z=5uSypK0>$xNB;4Tf$$q|%hO3)4t#Be z3c`kOXp4cI>^v~UP*EH-#KGM=FX?@8yeQ5X;!Mfwe?IeUrsO}r!xlTQ8u0k9?lF5( z@%o|x&(LC-tofK>8=J~*)Ea!ol^aM-n~?uP2@>xK?A+a3Iq=LJag5*n)B~GP3>KVi z@88K3Y^V3{{E)f#@7kfQ5enP$P5aruI(*aT`M>?dH{Hd~g2c!ITpk2SS+EbmCB3<{#iYlNXhVbNh+Kv2s6pFu97>IMIIq&{Sh6(7pa%Xha8sXhj5D) zhR+l9hHEU-BV2HsDI=)z7tCt+(%y*i#nd}8bi^RO=`E2_$z9Vl5Mtqe`{gL+#*$E3 z-s`LPSs1xBdwVX_h7fK6rTO{-^Q^aF(!$SsSUwZu=59nl7bR~T7H^Rpcl8uUA#kZvTK{&bsC4Y zgene^no8l)q~g#VsnSY0a_qequLy~?q5`R{RBooBQp=@%v+GR^q9c3e&6{uD-@Nzs z=e9OKg7HXw`$mmN=vT6F2iGLFt^u)xBqUKXQm722&>31`G7Lp{w9G18hO=o#<`qxI zW7DkcReTwrO>?qe31k9_kP#?EqxX@--$9ZG2UZzJN2aafP5PdZO_H(L@qA9V*b8}0 zx4a*f^n9@(%1O#%Ph;6$uZY?-?1QI^vW#Z|DoT>JLKj6<1fF~aD3q`7Y$~$sgptr11y?E zRZb_i*~6gRQ_#K}wDSh{CMkL3;y_9#3mT_~= zsb!$6yTBnz-^T4ctAwTxA@mKkM8U&#u&6@)$b3O1XbIy_ji`&o zUS3RpqG44_Pgi8D-4KiEajZ@2#ZvlWMZZxjq{R}R&6eb{)#1O0s))q-PYVs!cD zg0?vD)y#7E>szq>M|tNO8PC|B-R~u zme(fX%;KSO;DTgvs#uuB7MBMnG?hG+#Xz62_!&_yV@)MZR3#lH*`|`@u!UMd+xjf% zur*mM*;_s`JB-i{z-bv4mA5#XY57PaambEs(w5bPa+)rxy2TLt1Y0HqjI=q8SP*L+ zY6LizZ#$rKd;d4qq$srukTJK>)BeHvV7(){G;w!gqa!gtUXS*C*Y<7Oz4rOD_4epW z&}>iD`%;U+<%$_f)`hO+O9a8(D4&BZ}f3EvUqDF9G@Sp z3q6}c+!W#~V~>R127`7b8eX(Ny%`%eW5XMU*T72!p(=D!3DAz8c}*K~K4GgN}1O%WMO|a(~EP){Z?mZw`I% zCt+`U>s3I%0x0~@lY0!Mk@K*sHBE$)mC`Bx>I>IMLu?paj)`8BC6y&V(r<% zyFDO9s?M$xEB1y{JBeMaN_=trwcQ&g+ijO$dPkh_omAh21JdEN1WXNvqNpdR*F?R) aqqlw`zriQy+zb97)l-Wc{0qTqOZg7~Ge0!| literal 0 HcmV?d00001 diff --git a/apex_plus/parallel/templates/__pycache__/ffn.cpython-312.pyc b/apex_plus/parallel/templates/__pycache__/ffn.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ffbfa1e9eb75dc0fed6fd2e31a6f0560b2c7496 GIT binary patch literal 2246 zcmaJ?O>7fK6rTO{I=gY~gxdLWAdP|4Kx`5kS_-NPI9Q}fh)58{ZM3qylVr)R?e1nWOo_UblAJZKHS4~Vj{-DE0>yI&C;}8W7&l(31^Q;GS08Md zrJ;{d#rp%7@-H<-#b?Ac${;qOW-P>xs-}g6D|t)JWz$-eLd;lh7*Fg}+MLI;?_y5V zV8&8!!HYRvkJ89Lkv7s;rcFUTuca-BL}ELZ&ga$aEZ)+RfHUa@&8q7&ZgMUaYKz^I zn7IoeNKpz%(U4In$SO4C6bACp(}A@!yu#jLH) zfw$BQG!bt=Yr)87=-NIq;o|`WqTK^ndKVPHO(K156sUQuHK-+E)!+o6Ky6U2Sps}Z zXXsgwp{D}?6zRA5SjIlSqgUHlP-Kd9gQvH|T*jsHD#mmR_RSWU0=q~r(gXyBT5bj) z!(`}lc(oc}N%rh(spDmln*~K)p^Kh|m<`!aXC2S{O@hU1L*37S0tF9=P9iVkuP8Kl z^Av=Q8mb8}eF1B|IG6f?Hux-f3*0gX0P#|6Dm2{`8aGwLDwS4I8Lp&5v3X-V&b=O z!fN$+DU`xlgq$7Ak>jyjG zp6&3U9Ud$T9XlU>R2JGR{=f?NI1pU>>QNv9a zPpw>h+}&FmT8}*Hj;(ns9o^do>}0M(q9(yW`U}z7p>J>B{$49)|m|#lX<^ zK*AnKR1S2Pdj{+S1DiK~3;a4=zI5Gw|9X|72V2Yjo>IW}M@p9;_+wQMI2f-=;GOnw zhO2F$_3&O0v`278kEPDi=lAEgq>){YX>VGQssiwb%R<*bF9t9GChj`$V=%$7<54dX zWI3DGp)8{&S=MvPf<|~rmcLv`Yc-983&&D3I>d*o806hT46bE^I5M09J&L)X!7ir2 zI6?@i8HW(H8N=ib*x>?I`XCp+$5a8H)+5fe`QH7r_F(b_5$!U7Z+%W|jQ+ZVD2MpT z+iG(38e$wWHag+vK*YFS#-jnmoprA{q1xi>Uo3K>6W^!;U(e8Z==I+vaMHtOmSa8gIs(LR491q(zrY| zJT@_UerkA9zB-(|JUMmshH-*eIDimrUkc~kbNbjggO}t=GN&-9G89EU1${Q?`x_j6 z0(zf-!KYy43HW4>f0qiE+ah}a&(%>1_z#tRhbmH6Sq$&fdta2rW0jVJC8_M|e=hQ! Kv<-qz)PDgqR5}{~ literal 0 HcmV?d00001 diff --git a/apex_plus/search/__pycache__/engine.cpython-312.pyc b/apex_plus/search/__pycache__/engine.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dd3d5fbc63a18dc420de49b1c95d43270d14a483 GIT binary patch literal 14193 zcmc(GX>c1?npih3;s!{707%@Vcz_}yk(5YD)Gg|=L`t?O?U-3F3PLw2Pyk3bKuJVE z+co3bz!OePZFVDi?a6R<^Mfi+GMrRW!#!%(wW->2C6%gflNPXTxrVM$s^ZEYc&JTg z=1=l{uh9TVLo?2?mB-@icYN=A-}~P29q-{knM`^L!e6*A+&O-dqJEAUIjGBtohF*1 z9#IU%&%2bJlq;^tE(&~gR zsh`v*4U-0v*CdQd)1)bBo-`*dlNM6ekgz5jCmTsxn`lbfCT&Ukq@Cn-iRPqZ(m_)! zJrkBmt$sNl^sP_0k}Z=h1m2KnO}Z!DByCK1lHN%#Nt+VBq<_*+(&j`U8JrB#l!CfQ zF_yO}#`>W`!a3Q-G_vhX6RTj`TcBu4#n|3fP3~KuL-sEbWIUu4RafKbjHtafmx<4& zq6twwF+Z1JL$s*Myv`)yYdZgcjm=~2^+YrkQi#@RHWT64^qnZjL}Ihaq-c((<$~yK zXsi^C=h#HzM(hsD%qLjUdOgZT6A89jV!DxuPO~xq_JT+=Y;rCU&9G_cMRy~foQKRT zCt51WYx9}8`3!)oM<-srejXAH=MwYj49j8J*~Dz@E+h;STr`!Qn&pzD+Py85skx zQq34y4P#==1V+a;uv)@sElh-JQb$@~Ypl_e8VzHG8pAY&e@r9%H8HlS2FA`BnP$LM z3+Z@UJ!xW`te*6!VqDPD%(So;rWGLF@bAGEK#iX9LY;L%AM%OD8*G$|-8r9{j;B~r zot$OZ1ZRU$i48G{S6n0J)EQ&~*NkZuwh+}!W?_yMH5{9n=ThS_4OGbi(@OZ?nSJQ6x09EOIfqG1j zhZ;B+0CUJht`#E>MsAGE5Q)09yk0^YQI%#BQ+pc_SZtL>)BzHHeeS@`G|QzA++9fE zik>|%%BJsTX6FuEU&!2HM&aX_Q)4y!IoCTGT-6hH5K0mD9+OX?n>4%RrNbMX}gWgR4P? zAh^B*wOUvR+APcuL*E^Ssil`pFTxIRK$VOE`e@5FKpFO-BUmt{htV5yiW#j;RrNbc zz4kqTFy;(1m1S71ovvV}sTzuPa(~b|0j+-vbve@>8r?Y^lp$s2j7jb-XRN_`3hFF5 z%Zx>?t$uUHoLN5G7p*z#TlA7EgL|0-s5RxRAC~8Fu`y?06u9u}jV{+H_efJYIK1w5 z!vdw3PSMm-LPag5;Qw6}$ic;?oB>8^1)6NRrVO4UB;;&z`z$QL#ipeu6_vr0M5=9) zYhhmL*ZvS7>@W|_1lK`=tGO;!ww%3gAFv*9Br;Z@xjr25Y|*|nprUfkqtv^NF~u|$ zQ@jSp18}2p#!)7(=JQaoWL3c|E;iQmYm)l`zKp^ebY!q|5@zt^*%NZ6e+HZ?uhs{< zdIqkNp7GGx6K4E-3Lq=*H_rG%YI}fMM=iCPpo}Nykm;7cW%}u*|4=i!x_d!xvFL<( za@EYFQ?7wka00oTYtGq`9g8iPVw71(=bX4>Noi}Qy+Th*u2o*GX$4KC{F#n=b&Kwt ztL|!W%YDPDJO%5ky82vlP0mU9u2`m*4rjV5*cCZFQ0J0`$)YFcnF-0Y)o+fP>8a-A z6r-wLYm44YpInpoPtlC}ZEemwTA3@El0{$6#~~dHIaR&UWDdw!a+;hr=aKiR+@6M= zvh+v=XX?`LllaH5id8pY{nGHi*m%Dt71E4zaD-4_{!a*B;&Cj}_hYpOJRSmr@V^5q zkoqI~3Pq#Lre4#*#L+qW9R)7ze?dQ@fwcw|wLtxA<$apNCHa^F?M)FrDC*}xXG=!X z@hmGk81`N~#zvBCa+X_&#G-T2SUj_k#%=Op82QvS@^?L_ zew9X9_Z>?U9de1Ss590lO|)Pgic6_JEt(<(I({#np5@Y_6;xIjVJ1%0UQk*^bqrM2 zw5W;B&9Nzl!_!&RC8KkZOf-Er&7rL!DidrNtYm8YViX0+TfgtFC8Sm6WFA=0RQr^QADAn8iA9g8FtgHT0k#iq*C zkwP3G2gd;=VZdya6E4J1TU5cVmGyhkh&+t|bLZlzY0*TWM|{j9bsXbfw^QvqN&zuNsEeDKl~5Cgb^b~VuakA*j#shL=DXA>}-<5 z{hSU!MD)lPN2kf|5SzZf3$p&-vQ6;FN`D(R8Eyz8HFSj6wr&Tz3&C?j@Z4tbJg@Z? z9o~YYPjK{YIu7Iw&oz{{XKjDMGnBttw6zv&`vu$nwX0jUquW;3c3a=NM`%028~x9G z`}y8e1>fmqebMG!)eE+swdg0d10@aB>;=WN;5;BW53CzEvckd3Th5oZ?Vjy@!yCth zedl>=u-MhVJ}Gpa;;s9j!D;#|&0E`wp5PCzd~juXblcupu=fe}zO~z1_Woym$M}(} zPx{7qSE%UkEck~7|8UXMy>?9S9OjQ)7Ce`qD;0s(l9uvz05)%!?>{Se&f>1`f3BrM zL+i|w?va&C#r=bY{U?O|CyIf6eCNp>rJ~ojaur$xdU^lgcA)!d?-5cpyy1b$KHth1 zqC2q037&z1=cwR0y3x1gxwP#c-ss=-AO9lI@s*bf9skNng~#cVj`DVwPSaGdZQ1l$ zaB%r@(H{I)H`ZGI^!Eh&;KMN>r|ruYJo6f#<`V(jWQ@8&IN8UTacMS1vApR5gaU5C4 zS6V83d`GPcJHF@}Ss5$3y9@3 zUppgs!z&6*y}aJ|_$EJeS?IkicwbskJoUq*ueWXa4}akaKJ_11SFE4i^bfD7imr}= zs~6a?Hu0%zpk$$ZN1j`Oh?NtY&Jf>s`V;5rFI<6V9Ro#|x9I8xrb3Gyox0PpJYKR> zeIte5388nQ=<@tP|DJwz@I4Dov4gjVfMQ>8rz|XX)F3n<<8v%u0PeVafV8q4pSrqX z-4#8ZMNiL8gQ}(VzbiG(j$bU&RBQMbAUyqNXduixT*@jbYa6c({^}QR(l8sp`b87) zERDzZ-%_?KLH*yJ=)2OXkD=_716n_r-}yZd%V->f^#YQYI}RE~tynI@Ln(^yJz=KS ziYwQ>01~vVy&bUldDw!VCH({)W z88p}T1X&9LTT&xgEsG7gh8MI4Yh-VzwAbde<@P%0360}Dp==TAXY6|_APbcD0_A8C zI=^0<)77QNID>wPJwc{N{(_nn1`QTH5F~&#tj!rJGiCyvv0jfAO9a}3GK>0PNI5Px zu&Ee$bvU8~hRobQ0-VIzAvU2jEZ5480@JR%1j4WcK?P)2Uer|-BMF+gkSYS6JCEIc z3!|4Y8o~(m4Gy;fkV6y};%q@(g*yw8Xkle1A~;jPIVU!LO@k4wWo0T-c2lKA%bpU^ zEXn8!fN58r5LMPDWDsr)yEpIY9+%*5ku6DSb45xMDzonT5}H*0JHU|+!}bQ@tf7q7 z<)fb%JjGx~A$UXx9w}PwkMs}q1#6#R?W<(E1#5TF>V)&B%`6ywAk^DBzbQNq@Mh}Z zK;8lZiZ^H(cXI9pzVv&E1$R4r^qCD(qNuHjA1IBR?a21fHX^`nLgHw`(Oo61zq zGdSnUG7!c-K~aQ4D~U~umKyBZoE5mKt3O(((bw`Bpzw6r6LK{tCO}lq+bUSjg0qv; z%Dw^c?p5c$W_(MxkSA}NrxZ78Aq>BPI;A{L@J|^SBa%l(*$A^ypW0e4-2WA|xyET) zqBUg7wo8Xr04_iRnLBLYHk-}_DhFTy53g`mf3a+pGXaqfWE%sqXo`t5V5Sp*bU5o9 zpAFoLCgRMVwt#m&#e}ohPy9a)yF3`U;L+i;6MzUeJ3}KwXW)Nt0HLTD=g?S!s~vr2 zHWN)mGVvtdVbKrLxb^~)heCTi5l!A^q9?N*yUz7A@#oh#CCXrM0Z0+W?=5PbUJw36 z^;65gwUF}*P%H1U3t4+l;Ck5-56FgNAgd;<|22Iq>+A{O#ac3&s8-{&$dWqTD; zI5<%~$Hh|_?g*Bmj>iSCR9n8;V2P*|jjzn7aCqlA4(`S@;LXUejHqKl^bZqAw{>6}93vxpo-?CWSD$5=oLS0G|zX7(#+)R*HPS?8ZFOe%DYQ$kO48azx&4LKKB2X5 zjrno>aeTx6*PcJ~Y_?v=Tb^o7%lZ!nSKi!oAK+UL!nE)EY3L`RbvJ)}j5m*OX|I8B zLz6e(P;~lNUA&_wZ+L2Vt@xkVL;2BdfBO&Kc<+sZe@O5Tfz(IAuY0H~T3d=%Ptocp zlG06tWy_9D7jO$kPyW*K6c{;HNB#}4(u+phyW{VSuZ*sCu7K?9Dfy}9p0C;{tMh4- zkN2P2Y&y*wPe0R|@^_Xj&zf8-PNDPER@3Qx!xskk(~j`k{U;s6f;G5IKRmfSy3+a7 z+RTI9_43-ewfXglpT7E&SJ$s>yv{q$Y+26&bp>lcum)D`Th>mJYZI((t3zu?*1xs! z#+G#)S}))EBMX?vokw8ccK6C9-nNf7?t5lzPPdfO?<$evL@1 zfk&KmOr#gWE8)VbR&9|;p!UF5Orv_{5Z*UuD(b;Z9fPN)wvLg5ch%F1A89{?3o;Cg za03k{MjexfX*dq`HNsk1&zz}u?I8<0xE%D7P9Uni9DMEHK)-l3SjRl7hcBnWBM{6! z<&00pSN)c2wJ?fcwNy@lsn{;3t6%v#C8wQ1opVn>g#v2Or@AM=Oev>p)&fw}y$1xc zWr}n;9jG;611;)bK%2haCBsZ_9gO;Vk3xV>!}k@n^ei2L>$!C>C(E-7v{ckJ8hrU7 z<9OB{2($;r@ajJxyQq%^vPLYuL43cEb71Z)xlIm$1PBmY>KLd4i3w3P9!-Kv{8ltE z&puX*YS06wQ=;bfEC+U{sJX+zvko|9a%Sv2&~{GEIgJVROryYCSGMCn1~OYSeOJO_dC!ijc)rI2}MK`w+Mw<*DkqKBc+|FXuM8@ z)3zs2@C<;@lGnBrTY3sDM}?N7c{9i*aJPaBu+VW?=(x=9dxlym@3xdz^rGuD(&|IxTda<~z^u=CfPcbI+{qg0)Mqb`_hNi#F#|o3CKoC)oCp zM7v;XFShtso#3i{ZqPL9^J=g+_jTsg?^@omtTYJPz*D<(#VmR9z^^EPYtcTK*F9}? zEH6B14Cc=iZNY-AOR#knY<+^QZ!Id=4(3O}-Yh#{gM#NcSeeGgcgNlt+qSzNjXfOu z=huqPeFbN~;Oyre1MB;1b0sC+e5j;WH8wq0YD{Jz#|zHwBZBA1Mx)>v$&Y>J>R474 zt<8^24^1l>N%Rf~{(<#OVfdmje32iz^s$yd^fK>zaNojK2b z&nQWoe~Od+ZH&Hy(c2KkDw_*372)&eQ2L0j@gB;eBW?AJ1Tm)Nc(hE$UgI->@~ju&67zzFFUTXkACB z?d`k991y=|&wZ8IN(Z#83`pdk8c!w|^_F zsR#A83|Q6iSo&Bz6nnc)6-ww`FZ}||MJ;MAa-yn>j1|D%i$B)Xe!1A#^4D8 zHWXD86Bj_co49^$qKqVKoggN7;KtQ!-N`f?3uO(EOkukbu{? z=(5SG`wt!rAIvuP1W5DS92>pM%-&BO%i7Mv1Cr^5z`5B}dOk@WDj$ne|M8DUM8%bt zLXF&eIFWyh(fb&EfYHCi=v9bBcl2H~ok9n_cDxH z>3;)r`Yr1JKylQW-!;8sDw_J%I|~Ej!oc{)X}WO z*bcA9_@-kewbEpMu2chzu>y0Fz#I~6ht_ZKO-Jj(949aXf^A@ZkZ*$9ZyC>X#FL-h z()vIx&a+EKxHN9`{ANX1bPbes;OAy4*t6DI=ouAyMxO-EqsQj_!=vC^32s}g%cI|2 zD5BSfsI)7GR?n=)R)^Pa75c`6zOj#4p>Kk3z4@u}7PwG?7idBXZ|>aEc9m%XMz{iH zKT^@*S()OSJM#LczfDmWU1zKGxq_>XLU2$B4nA=ndgo&P{BjVk^G!{tWv?7tWd(Qd z+TD!~A$bn)YXk9zsmYo8jg168iWaEGHaSBP3Rue`zg!o2ywmiC~`Fzq*B z*htlA@V0{uv*j6h=W2d*IkORn+)S-!tIRA@gWv>#f(FSMWB zNb_yyKAPqo=&wUl$Q<0#ww1?1^6hJyLidQ!J+d()bYJ?|#&?bJ*C#&J-juw%jrgJ) zO~Ob-)JG!V{seOu(#A;S_4#O`TmxH+6IDz+mf=82fyZI!y9IGfk(XQ2tt{RxaOmX~ zRkvqn6WsSOkH#Qp!3eh?hpqt04r#*_UYw8!6X`D4gl|(Nm05j;-nMv3YD~bQX>yku zNKy-~!IF+7^^~QlWFSc+Wwn<~Bx$Df<{b-31}-bqZQFKdNsS4>0!Z9^$-4t8dY( z*Gd#gY^2MHk7MNoADfa>cW0kVP!bQ3Ya*TE&?&X+Mga{voSmI-K~!tRfkzN-7qU<) zUtz)HVY>Wy82ohbViKNokVol?@HBa!pLJYhGXc2F3^3)l zhJo3sKq^cwC?vl>P23bA1@~9bCvhaC;gkiZf#fR~#8QkN;Q<_m1RTIl&&tcI^y--( zs{ioKO~c`bZ!QPdf@{&o9qauc_I&ru@|%L;Ft0lNt4|Dv(`c^#fvHQis_IgOI=B(Y ziW=~9z*{?MeWLv3kROuhL?cvmRetFZuG|%c(J4=^^~Kg|wS03QF2AOe-Zi3y3bI!I zxkR`ECEtsX7nIW0Fe0p{4y^n@FfBd5^j1MCzwQWEQsj3ZWFLvmRY3TqQX(BDzhvRi z#uQEXOQFcQOXtsBxjcSRY^j2l-w1@uU+HHtP1X817bfop$&V~*83YyZ;~DuUM$(UK zoYi)fp`(L|L$7OXuNC?cNBIXh(x(4QfFu*Y=ScXq_PBHeo#g&AfWWGt(&+vM_XtgY zMj1Y%Oh2Q#1*-e!RMY2F^M9cRenwsV8Ff>jZhlS;{frv@oa)+E>IzEdCran%s{YSa z-Jh!teXbh%Ty@}oXcpD List[ExecutionPlan]: - + # print(f"devices: {cluster.get_num_devices()}") # Generate all possible parallel schedules. if arch == "encoder": parallel_schedules = self.generate_schedules( @@ -225,14 +226,19 @@ def search( model_config=[], ttft_slo = 10, tpot_slo = 10, - max_batch_size = 0) -> List[ExecutionPlan]: + max_batch_size = 0, + distserve = False) -> List[ExecutionPlan]: """Search for the best execution plan.""" candidate_plans = self.generate_plans(self.arch, self.cluster) print(f"Generated {len(candidate_plans)} {self.arch} candidate plans.") + # print(self.simulator.num_total_nodes, self.simulator.num_total_devices) + # print(self.cluster) + outputs: List[Tuple[ExecutionPlan, SimulatorOutput]] = [] slo_targets = [ttft_slo, tpot_slo] for plan in tqdm(candidate_plans): + # for plan in candidate_plans: requests, output = self.simulator.simulate( plan, self.arch, @@ -241,7 +247,8 @@ def search( req_percentiles, token_percentiles, slo_targets, - max_batch_size) + max_batch_size, + distserve) if output is None: # Invalid plan (e.g., when the model does not fit in memory). continue @@ -253,7 +260,7 @@ def search( print("=" * 80) outputs = sorted(outputs, key=lambda x: x[1].total_time) - # Print either best plan or all plans based off flag + # # Print either best plan or all plans based off flag if return_all_plans: for i, (plan, output) in enumerate(outputs): print(f"* Parallel schedule {i} for {self.arch}:") diff --git a/apex_plus/simulator/__pycache__/comm_profile.cpython-312.pyc b/apex_plus/simulator/__pycache__/comm_profile.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c5b5e6381e74369edfe34fc083ace57f04042b3 GIT binary patch literal 5911 zcmcIoU2GKB6`r5{nc1D)Wo={p19-s(d%@Td$Y}sWYHR}`#5mYQ$tvnS~ zOsEp|T6^!g=bm%!&z$q!bNy$J$HhST3pq4>*2gekVW(1{Ps`@lEW_MpWJYFHCeDUf z7HLl9LOis*YKz-LcFMDHb@$Tyh)8w|?i0+7&yUXE(_1f!|)mDR)5YQ(C6H za-i;f+Nkcny7R@e-A+%E_t)vQ&vC)dhj_|C)(};a4o4%=X=Mz0!f-MkpPWl6LC&y` zOujKbYDlsU{ji=4YdV=>Yz&jxmm2sK9}31Wf&~7LDk-HPv=@G$kPuCHfB9nPE7(Q)fJKU7N`$hBKK8 z&(0)dgNKzH4x;F3l8Ay(wK#y{;kWrKsBSZQbwIC5>2MHc(NUTfU8qnp`x|?fX@zdg zUE$T*Ec+=}iHQYH_f$q=-U?M~vo%RR%TF*tKK(k3x&lzFeFXl#v=o@%565PHfc?Dz z?3hPob?B)gZE~&iLq_GO7O2Uf-4LsWC}}Df0&uAj!?VSNAyf?9Wr+wY3krsB=;Foj zxuMDPqnE>zuZ&+b8J9;#t_+XXF(xmCQIJYBTB8%gLz9ygHSfge*hu*D=@*q53!++j5d^XieeRE6w;IsEX` zeg@Sd^R0_%3x4+A7n9L%8Ri;$frI9CZi;)vFw#}7%rMhjl6F$uzkzj)($~0>*I(wb zyUB2pV{znL2jh}%wlBBmCfCKb#WNdj&*Dp)Far~{a1?0_MAJvxfB+w@j>&A6xo7z) z_zO2iB#?lwgUcm!y(k+XyP-Bb;p*oO%Tr_@@bD1-&^I^-Ms}{?~hu=j;F3d3Rhwm7jPKl4xucrg3d_V zfin1LQZ@K@BC6riBjl>0hk+4A_p6jDTrPoQ-Da}*$0GP;x!E@v3WE!_tSwfNuWLzf zma9m|5OSX=m1H0TFuDMcn&l~|evf3ov5%YFn2PnP?L`K_%YcZ1D3VpWK*$U<6UGi}2Tw;QD=0sD7YXJRv3I;S1DbVsmW zt|F0vL@jGc)$^<~ury>l>=Yz%w;)Zen0wE_(jf31V+2tk(=;4OB$9eyCIL5QDharz z^aTv|)WiQkd5BN|pl?^=DSgi1;f^$TRY@4G*CJ|K86_l14BL#Fj0T<59Y`-IkRw?2 zL1oxg^a+C{hFyz8RB3pK2)s~CgptB^Gn11rjEWjSAOK8RVm!VjlIvW)mhW7CwC4*&W{QR-M?OFn<@(DJ)q`t}ul`__G(E0LnFyC8IL zNZu!mJaD>}27Zyry?W!l2i}%Kd;fiYHGO~Re)Rs8LhH$O?_fa~+>rb?FDzfkk>xR% z)#|*x{0d#d(!|d*xhwf&w?b>qj*_n_7b^M!1tG8@N=xb7#B!!6w&sGFqu0^X!2OJ}&7&Kiu0UsRVkFz`) zJjJ5eIf$}a8WXdw2hgh)>W@-ge6SV2UvMO5w`kYTZRuOum;lg{t=GP|MH_tf$zo|& z6QM2IN498hlOkE#c(2g1Mcb=4R`?Ll48(Iap~861I?U(=qyh}E;uyJhq_cb-*QPgB zEXA5DRIR0};ABU_rK2h>*(!D{IO$wYYSme1)e*z{`-wuDFEFxztCdBHL@q=k*`4K~ zma;drS zQ;((IK-JApJ#I34TRi>LW1%-dogFAl)q7^&3O*Keu+)>*fEMd@|5`mOPJpz^lM;oV|EyXz~SuK244x$3`Cs+{jf3d#&xclCNRIx#Pq0Up5^qHXU4Z9(>?w%O6|!bS!fvvEjq%FPnRc z%{^;k&v*FU#pdocvHO9iHQ%}J*;|$GEjIVAiM~lWY z;F6LXToLp0&_BGVw5PAIyMOii=i>eOvYiu0*i9Z(mImR9kXrHwK0b8kP|@A>DOYqK zS{!-c-j(O_7glYnv32+Hg6%lP)Q1clX0&?6!|~#WpwRdxi~uyURcNe$Btj#b z)PR()&gGzU3Dl~i3L!98Y#T@=jQ}Acg%B5o5I2PoDeD065bD4KQim8m5S}O$JgQ(z z!NaN7Al44x;oJ^9$Qdy75bu{v_xRCqiUsM!3@X>!=xlAe1D9Sag?OdjagnLy3HKNkHMWfnu65065v-rNQIyFFQ_2EcYHU%OU@mI z#;&5XYn5Gd1~E-(J+tl|E(pV4i#u~s^Y!sdOT$CzGm1 zFmwz@Dw2>R8fn4KuIjtHFBKt>I4bZ@A2>V{;&;|Y)B236^-+8^zassB^s_a9A%fRC z!JC=v#0qa_g9jis#F2;|IZNOl0_Mwi9}ah6br7o{R_GfB5BaHKpMru7!$}TUa)j z(*PyeV4i%Oy_4NUvg~*&yC)Y#>7M*sAFFp%l(LKHQTLV8y3VvyWwjn+o1*9 dV9|+`fEGohTxijal!O*NKv9d2IH2f_@n8PzK?nc< literal 0 HcmV?d00001 diff --git a/apex_plus/simulator/__pycache__/comp_profile.cpython-312.pyc b/apex_plus/simulator/__pycache__/comp_profile.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9c5e763d3064fc43eb26fd30579ad3c1b6737a2f GIT binary patch literal 11518 zcmeG?TW}lKb-Ta<*u^g1e3KHvhh&1HNIk5`l5NSNC|Q;%O0uN5VeB9f79=5p0JRHR zCbg&$$C-exI|h?UjrcJGwVp9Fa$9<)(<+^QXfl(IX4>gsMYl$Fl!@B75Ba}uv3u!S?Zs_)T_QW~7P)bciHgvo{eAultGNVk4}-3D(~XWeb$8P`VKNUdVqQ_e4F6gY?#R{K4h3_ z<8Lv(&oI8Tw#LqgLkvXB5nn5COzf{gTO$6-sSLn+yK4Ab0fQsBIC2nHh-W8=Lx9!2 z2Ftz<);7TEsbLu@*v0M8*E??x?6{4x6QDINCd~!Iq3~1$D$Hn9PL2U!J9gy2kw{1k zm^JR$=!wJ8STZ7M+>@hcDxh%cLbO&3m^91C>2IAlqzPhjel`+JCW3NOTIimY5);u_ zWGI}NnH{<~IXjmcnvBfM1TRiVk@Km+u>8V8*E%>eQz0Wt&pIe)qid0(6x>r>_)O?U zIl2(hY>C<6bTlq%W|&rEa6-*0MUrz;JZ!Gq8!Q<3u)eB5_6C)NTu(_LSb-3SB$0mG ztWbF5=WLZGL!r=$>3!3c%D9v{$yGbg5k*t2>7`<>UicKFFsG=1d2SGT?0!gAGx^96(5NLKd(B(+(Xw7Ce3I#Hhvs zS9ORVekxe2SSJ$FOem?DW<`yafa^hEOu!-uNTo(fW8$Hih-MNeWD9iQGW}nS4xN!B zk~}m$AB)ISp~TQgM4nD2W`|D9C#Mqeq0nsP#o%mgPBuVtbY?CVN+u*C7QqU02WRIs zJ{U{D?8(sCSVY+Or}+0!drTYC5w??^wj3Ph-Ijipe*O%TR+004}yXkWOWv_dKL zm5Pu6V@zWbnuE(US(DTNu#A-aerW>~8XF`+Elx*-@ZEa>Tu{G|u!PNk zwQ21VF67Pm(<^*`k?AK(knqSgCY*?E*I3MhFN9*GzJt_vkh(n?k|ra`ARvUTFrb=v z00`qAf&2{$m<8;@JE@qa+aRYDGpR9HTL89VEoEJU_uLvhsMYXnbwN<9)f+0((_n;U zr=icbwoghL`o1(y@*Db$<8}ity^8IeQ7kLF6!jfhaP6lI!l@X?!>#*Oo4tsqa z0sEMQVt4z$AuY5W48`M#q(2%5Ge4UE?Hn2Oe!QRqT*-bfISyT4mB{p|N zb@vv$g^|lG%YUE-9#>uaRAGPmP)TrpHNs7sJ+tfk3)zz|zw~xbF7j4?fqrx7ChyG5 z{n6f>=hgjh%lTbzEfh|^`BKT(^2Xk!y}8iRf2frwtnpG zD4S?cYpHcZ&YFuA9x3#fDSDT&zI>o;F?n{cni-$7Ofe2;S)d%=*N-k9%}R@7xDgMj zoL@W!6`PPb{k@CXNN!|l`i8A5A1RDnnqEG0_2Cb`ePi?IHI~nitNbmfaIQ{12ta_eW=BY;D5EQ0)f^SK&91v` zh$F1 zG6m=BM_xTrHk&Lb>04Y+{$PGr!M4m5E?(`vdgkhr*S23fdF|k}^VdDs&DXbIKY9IN z(RO0hg4iXt;bT{OZWz3zlLd3(@zuEE|mA>L&dy-&tfjb@vX{a5bS+63LtbKzEfynwSN^5>3R!nG`U7BAuck ze4(a6Z%RDK1ijz}b5d`kU(;LR98)ktF%feJBd{c+ObW<_LSrB^CJn)SgsqpMN;sr> zXA>79QZO+Q+!36FC<6mZm!S;;2wBF(0-JQ*?voO8aR>(z!8)4sG+>~Wkt2|$saq_U zIhpk=o>}B`^bK2U$>q+lw_4kZOjF6;kvo6G?k}-zx$`Ou+O7@GjNgzuzhd{p!8Hd8 z2h^sm>ElS_xXQK{{X3RVs{U`>uRzKFi1^|wg0snA> z92_H>(3=p=3viH-!{vjELqmjT5y|zIcET03=m#cR;t(;yahlh(m?3hkF;Ov9J+G9> z&{~fjjkrt=mr3GDl7vbKycje1OP1xZHLYBQ@Ybkks*|R^|yf~Z-fvz@IZVH`wdPV5Y(Cb2cV>X=IlMfXLZ8sz#|fgkr&>^4w} zwu)l6)>OFOsw;MK(TKW-)^!!T&!E_?pvH9{oI!v59k#B_L5eIyzN|#PtSDb83z^@h z`}Fns4eHzmN}mxqkTZK#0zj41B7c|SuVV^TOpsqb1zx7Yq2j;ns3`u^a5ifde<+!Z z*A>6C9}sS1056@y>{-mdiP;&<4q%23s5A^2=!jl~4`PN++-18#=5Q)~6d6w_5#xjU zUV@89NrxblK@(Nv5T&sqi?}Lc4kU>3s6R%$Uqc2uzJaoPvcBxMR=CcpLSIM6UtBz% zn^%QCl?$v<^2cgQ-c;CCcxc(X{M7QJSI=H+y~Y=9qo~-Ap<)L;<8ID*7nQt8b#~{U zQJoK^pVSS?{Nl0Pv#QXma+}h_U;s+2Ju{l?&rdJ!xq5EZY;s${1~@69F-O0;?~aY) z8qrx5a_0(LmgkGZ&)hMaID46;oUSsDrT|dCWD5RYmUXGWhb&9q0zm-L+YCir^rUa2 zjI1Y1!r5rBE>7P8w7)>qx;Xi=JG1jEb^qZ1PMmK34#Y{bN8__|Nz{_E#*-&wQLk8M zLh^J(Twiyd0}c8^2CSq>rswHJ2J>|-8AR~)H`j9v9PO8q?yiWYxP9I6Y_cZr2CK<| zy(O;;=g?|@DM7qk?L~%vDW&)%y7nY3)@`c*_S^xVGl|-bB($-hvq_w6+yAR zU$eN+fv!urQf|eexE6qF%Aq(FH-VhV>W@uSeFydVsy2OudfgK?O~M1>^Elq4SQHQH ze9?^OBGa!4if766nv0=QUg-5oa-Evutx`$(>U+vC6SQc3g;l&GRMo~*@f06<*Jx0z zQd1Hi{zxLegk9kjpVFYP@7u0aPMqwnG1U}rOg26!?gMe#qBM#ut^_=###AGmUSsX_ zn!cvf0|T<=^qS%Hnxz55=`~kRCDkHwsn&JQqeW@G>pbqo?KM&Za>7+4=7Hq}PQi(` zIBcTfOL;CBAb8jL&ngErW-1h$(4TqGjgg*#EL?Lfp@OYD;add>qJBa@P0=Jucpklc zdeNjmeEkXi22E41@eDOj{ki!99iYdgBY+Q2Jf^{I8Xc8_P^-A^X<~N2;2xWV!0W|X zY2QLSK24)DIQSSm1%+brzQJ1OXE?A4vd{jD`YcTqI}Uy@RydyANG_+*W&uhwE9Tt};rmfK)S=xTizv%8Y+fU#Bf zOA28B6MXV%(1pP5PrQ`iDY?AAfM}v`r{4$eA`>W zBGXp#G^CG~d`;=mkK21of~(|hFWLD_ba87gpxS#%_Qn!~htB3Z4qJy+Wm?ilGUwpU z(BjJWsZ1LTW!%}LDzh;?ddEt2KlH9#*!yQMeb}{orSZ7hwL5p<>Z8}r|7_p&lPiyo z{U}uI+MOONHXi?_t?_0@XWqHeF;rw*Z?<-yC@v=2V4&%$V9S06yxr$L@nx>=-DwP!A2fKlhi)W#z`^M>F5a zg>N{zu5E;9mum(Gl=EfdD%+c}gFjkq=~LOhi~t+z4ZQCwj9=;euxHOo^9i+QPcC%z z+t)om`_A>y%Hzl1q>DX!icCYX`NS`6%{Mo86}xw=Y}{F7+e_^oMYiQ;XJ5(fD>Zdi z{+jzs4ZS7r23Wt}uCg5&HrrqJ;@ndzyOGSDgZ6GRb;qVEH-H1aY*e*(J}4(j5alUF zzNg6aeE#W0nrb>hfBxwRO|_h){yk(>f5qTw)FW{Vcnu`7TQ8PNSq+g_Jx+%RF~!2i8SDy1 z=J-1Y1D=Ap8gCI>%HD+szlN-XXHW4Enc0aYga}RH2~>~Pm@z2~yU`;pRR5P5T?wK( z(iJ4?y~{#f%}FWjBJtH90;KVHT$Qtv$gl(=NSMi(f&HumVEBWvQ4LfzKx|fzz+Qp= zU0{FYcK`qd01=B1ufDf2JNe^*^wTA8Yp!dl|08do>g~h(UR4S8iwtt>4;X|8ck-mr7%aF60qJ>U>m>lb4z}j##UY}cf6uH1f9m(h+TX>+e<0;@ zAAyk4z{dNspO-I0YZ9;sBYuDq@GoDJ1bn^R2c_}#XoSEf_)Tao9!@3_FiLNqZ&67N`?+g@Q*wC|E_c+PA_Ko=%HE2 zlP`I3BTv2hqc2*7x(lo^XJNO*dy`(l9=z@na-2i~L{sWcKl|8BLY#|5_DMg1UJwUL zUV<|!n`xT<71jAGs`)pR@879yzorhU)S+KfyKV{F(p-rjOtU5D&NTmtl``?!ogdQ8 zWs{wD=6q!eiu_Je6vCufKKnPZ4`QEU$Euk!wGhm58%_6Td*CNix<7Y{6!~Y+e_X|u zGE38eED(btcaRkM;rB)_jbd*Ad$(j^4k&Uk2Ne0U@5L^~uy;$D?WGUGJD&Y@VNocX zv2x4qC|eK!&&uU3+X%=~ytB*^(2lJ<0R_s_P<9Z|Nm=<-7lNOR(e-WmX!r#d2t#=b zhFPElvMgV=A;coYA!J90M@XOqSJ{COlKj*KP~{*1`gFiacbB`p^fw4qC<#?4D^wBE zsUoCPMM$TLkiJlWKIyM7b34IP?A}otjvOtJ_|znEC}hdAf(TNkUk4Qm07TDpGecaT?O+msD2wU*(f2N0bU{F9lBfJlGUL%=cfrV3do2~{Xz z$xd%Juox`cu%=IekiKk$1SH`==mZTpSs0YRC57!33Q!UXP*!F|NS_rUob`?aAaZ5@ E2lq;vcK`qY literal 0 HcmV?d00001 diff --git a/apex_plus/simulator/__pycache__/simulator.cpython-312.pyc b/apex_plus/simulator/__pycache__/simulator.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6f85a263654bb46ac8743d29ea0dcc572c917cb GIT binary patch literal 34613 zcmd_TX>c1?njn}sNP+-JfCLDD07w7?FYwkyQItf9;w6d_DT&m@l1xKPkRrv)3DA=0 zgypXBL{YWsHdV)%>Z(~&U5+)qc5kzyJu~#oMzlJ%c1ND7*v=FvBb{b#skUo&+7Y`m zwB?O1)sNl%zL%K*NI{1^wXwUgN#f-@zIVU(z4yN3s7ckJN`-dygnCjlsF~CbY9})WGbS?!Gl{=uB5P7Ns3UgmME0N_ zV6_v5K?8Xk2aWK~n8=yT9n2*#nG>c-^Prj7vnKK;^9S>ZT{mHwv<_N{J$s^H(l%%# zcKw8X(lO|mEF3H(Zo`Cg(lzLsbPu{Giw27(iwBE|zj4Ad=^gY=`UZW(oikA~=^yli zJ$JB_Hib&(pnij8w0W?c&KsiT9PW7AW?2{B_}c4i_3 zum(Cb5}cig&;Zp7?yREJpfHkM%YG`;CBOIRyP9f|76PyZbNdhQvH{&TOAadO^{2lb3>0ux@fU%dgHWf8?@Q{fb`v(n0 z4P$F4k5;xk`Y*xqHWi`NtQ6E;2Ct{DPG(bPXSIXe-IOf!A6k(t?uSTGZ0f|Enx zvANLDOo$nxL$8iOW4MB^2FE6X7bikPlcC9J=2~i?b82>Sh>=<=OajKBUxV16q65Ps z)0ab2TSH9PT|?mrz=b1Y!(q_^`C*p_r-m_?2!o!5X*`E!r^X^-(GZ@PPQd9S(~;nW z)SpJl9-0aR3!7s2?a)I*myru7QG@|d<<@Rr52WkRt( zru7R2g^OiNnx#`qdfr*h7u3YGH9}$0VrZ#txnSuCUtG@@HpH|Ig2%tqcKcdPTPfrf zEQD^kVp_LgE{JIhgnav&*3MKbnQj`p4)De2(f>lQpfYx|sceegMyo>_8pn|~ltIH7 zrD2rPS@6@**(15M{>_X*9jyyxkEm(Gn_9qy#*iUo#M;q02n8JVuR@KRR!e8qw4FkfR0STH%MOWsuYgLMGZ4GKccO$3)U1GljN;o6G{* zk+2meY|c;~?NVGLHrfs4&6jgi(M2N}baBW+Ba{bn!<^Hlmq>y30+cUgg?bf)Y@`%c zx&+d+LtAa%lvc7G{1B%Uei*kbls96g%OQq?wC@hOB2-8tM5R(nI*_nc$>pk+V$wD6 z)`gr9PFg^DOE_c1O4mZVF1ik&>LE-^+6~jd__~k@+O`2&-+e7Ruv5%V-~a{}05Jn* zNN7UL8kX?0m=PiPFoTW&1{Mu7OlV|mVq$1?W>(bGp&~x&!C1Wc#mKw)~-|{GFV-TcoP=cS6zEgoq zz9Dv^<`BCm8KT&fya3Xq_yMF(u_xatka>zPl)pr=C*R;(oOCI+Ip{gaC0}u0quy8b z1}scHcrXoUY(rx^8avR~35KZ0#adosV1Iy1%@7%78XR5DWT>+03Z4YmR-Ph+s64GqoZ zC#GVZ;uipj@Grr5gWAZaY_56jO}$_*hL=$&a1xjNa+S|(*K`iSSqi=;!QzZt%6Lmz z%u*4z?BFdsz*i?YeR1bL-nlR4Y#~tSuHl_EF=t)exrcY|SuKq>_wda z1mYz-`I4QnlE!#RH(%0?`FI3RdEB#;_w0;$8qvco_$uSRCf?T+^EIPKu}~I>m+j@t z_O3?a2l{#VFYBMzKG2nr$a`C(Y}v6rpkX=?fNadAsg##GBgVqGt-+Rt z0Z2w4(*lk;TXNu~f**viFb&2V6l^`-9=tWUq`5V;M87wFXMDxVmp86z|3?3@{_ZZm zshhJNna^H#``$Tv`zWU?TsN5C+J9sJVmEInTlbc|mvbkFGZv-y6--`E=Ov>;)F~TD zF>?f}JMhN%dXySgUjXQ+W|X?6zL}||qS~k? zl1PhHCE+i)De63ojA%wQ6V{y!Ry_y3k7I2-vC4o$e%MSUri!{*tc8`TAM#ScFRHmh zF}ndQpb<0R05cjAvqOM9a4#GZGlMfTp(#3$MS6%i4sgr~Fhtdms0+_toPcd`Sj-rk zqC;23tk6VgGBg!QgDmK=@Jui=d?^)KD1P0LUbq*GHz;WHrIB}YVwSo)Ig8tVaA+xK zUdLJL){85ab)Ob*yU{z}v9Mb(S{F_&p1$=GZ!C=)t9fJfa@Pan&J7La+5XUEpEo`J zDvPq#hp`#{EW0&BJDiw6;FAqMGHKVqlG+)x>P;Bv?Ub^pC=CV!8+;SpW5@1cH8eG< zV$~^Kqfy}JVDZJH>l~yHomeP$Vxm)6?VnMQUWVM=uzihYB!@&Ku@RxwP@W9s2&<$H zDLNs=%xDIinc9VNXJ%Z1e(Ze$OX1@62$!)QI0GW4x&SLHV3y=aOpL65rg~ciVw2u;B`Q(e~pR_JtWQ+YQ2A$#8_oLjEWFfDWS$vFu_XcW#w!K*{c zxTxDVF+Cid2)EQFBH)M$7l82()bHP*ILD6nPc04q)Uw$Bj^q7PkHa{1zFS~dZK@Gw zjr*TRxXr=Z(A2D|UmC?s75Kfc5>;2jSUCw@{6V*GA~<=G4z|qIyP5Q9h-zDW`!@Z{H-RFQY7n0Gfk%Fx*LzjZe( z=)TraIp!Z_^7gtFUCg}afw5U|H*923+13pTU^aA=*~RIckH0cd&VJS7uhiffCcWft z^=kj7sI^S{AD~d=5h>9~Htj(4m>L-y6?1~Gj>4V-4)oY&VlFzT$pIhTObNsi?J}a3 zPUk}~?8xaEc0>?>r}$yOKVxvb74yjXeUq0i3^0&XSkwlY;Y-q4JQ9Ra8VL^@E5R1bt7=tds7!R;;fA3j8f5D_?g(;@x&BL#-~$IHmy}=?nnaJtEi{M z%ShsrZxk={TgJ=!mhp7Yjc0;`arR%R=3uKIHARxRPetn$jK-9)s_`@%mZ%y0(`_*F zz#qrbmM5A&wQo}nhD|x7_NyF|QQS9l zbUtkvRpXkfK!0DSrdVTgV-qck_>%An6tqdByiqESdY&@xtgPoJ)IV3VMQkyaEV8K_ zo93-4?R+m#a)X7lNehH$)=b7i9x*M%OnWS)?PSANs+2qVCgaD;7HsZ~sE_qAI@Zd1 zu_aia+(UWM61D`65#`DFiVY!X2T9G&W-ty`6T!n7dDsMQA|=d-;Mz$ZjFm{+&*svk z$0U>WkEqdSA3(y}j5Ux$)T{=}^-XdhX_@7;2uVsxD_t6Fni7)uHaUJ_N?B|qq)-lV zri2ub5GM(F-pDPL=ON;;(nu|79ISV#(&I3SO)$&KAT%*rSs=x29j(zUHVejL7o_7! znN4*{ipe)y#(ESCwz)^5G-MnF|Q%cgJrBd1@9fqn@ zprbWUX$KEm^8)qvD(O6R9rm&IZVq8uRbC03qjEt|`l*xC_IVvp{)A|1FiQCHL{ z+X43_Y^AVsFWJ0v$88AN=QhA5LFQU4X1vUwt&1SPYJnpcWq}#Jt z%{Z2M%fn_V>BE3KprvjYpbQmI20g1y2Oq&B8hO$!_dsz?Y#~z84Qx8os>m^jP=*;? z_5zt==BvR8;Ec(8_VDZ^a|!}P_0$ZK;*@k0Sqe=JUmHT6pO_`PM3=Oml$>KB zi7_PaMnxm;RU;Q8Ly!sVMU&}C9*HDz*Do5-l#c~smK;;mMj|7?h>S!gBFyVhGG-Qx zFz#M}=%GAsNVK;?I7AI22Mh?0kZCfz@LW^!RDdKwnIn}vcs(`d2xj+}VEi7)CWm_C zhfeW_POTj}6RSGQA3C#g<-gv|9Xi7qE4Zq&f~#o$s8H;k@B7@lcfq(`>|3gd71u8q z1gC2uORy9#<^AA*;Hp?w-9E6;zV0esWMi(Hg?41{(6?V(zRVZzS?ChX?zq{{oBc~& z56m?iCd#+(k(u%|Ea(NZCvGn1&E@Yh??vxKSI*yUj#czNF!$jRzu}>!gfo^r{>nvF zpHq=T{*ST`SvDW?aa&1d&N7Vb7nnEdlz9VmEP38&fG(D#y(A9!FpJ>ZXO&(Q2|m)*+FA%}O^HSRJb+ zh*X0JHaXrQCq2oj19M6#Wwrt%&tcDVW>Yq;#;v6iYjatr^buJ2q$hbGi5ge~n<>|# z6}GifE%Z0Lo>~i->FH|0W+l+08fI)pZ*s3)|4N1p7(GZ!LRve5Efz2cs zxwOO6vr}-UXWo>&5irGD)RCDBzFQ}$L~{@el2a{(=R3Fag(qlAtXH83;S3nd&z+kv z+c-~i%(WM0sIzP-c)M=CC&l6hm&eN)-DKtp7SEFE)`54=@|M~aSIn|cu$C^Xcx!;S zmcpstw{Ihp%5lVv#k{eY^ERyP&L|1i7Fq@Al7xWr7)j$Q(yL9@5@)zLcd0AbsEksyT>Oxg?B(HjPi-xMusdV{@B zk21qd4~N3xO;P4NsTrJ<#RXE*oNG8LEf-4mARpk3OnXPhm~aFvAVc7-92n>v@QzJ+ zC!rjHx$S9UU?C-1MHpgYio6HQr01ARVspPBY$1V>AtgN1=)@a2aBz zF!3x{8i8xXkY$xj*DsK+_yv-;=4?_A92)Q*PpqiT-s7ZLUm(?vL0T{KUtk18tY08z zoO8>~>IH!rkh(z`25+K?=5ivJu(@Tr4#+cvhQC1cE@yVE^Au+0O$cxNK~*MdjvYD` zD3nkH;sr75QZO8hL>T5ay0WoRNJT}1R7=7VWBwci3FAy@F$u_$aFQk=z@q*%?k74J zW}1=k6XGJ~&q(^m2$JgWXk-2q-I?KO21X(?fDqVPz;T0CWe%X5&=9jFw1zQQ^duVq z*rri|I*IT;gTAkz!JvUuQ$i49!f1=2f%^pp5e0Jv4T1x2pzX(K;NgVvqJd4oyobgz z7@`jA2z#ALF&CSKNYaD=?E#StaifgpaBrfSyf`ankZqJ`l6FH$`dgOF3=p-@6O*EK zWFiQ76^CpJk!UrCg9Xj8oX9iFxuYfx)kMX3E_HbY!3rHA`bjp71WySK$x3sA0CnnHcsagbgsCrgx8gDr6<=0&T)f7v4NL4e{fBA@x_8VeldIR)PKiv z{(&{!sgx+E*UnzxUK);_r8$3SO*fJfbariUhy3(oSmb35eBrj09=`Cv-4edA zeLnZ0DSuuoSZolQn>Vi!45o!WF@sxZJQi;p;2Q_l8c)ZH&+v_>mrpIuE}d99`RCWT z#?$kCT=ALDn!48f=RY@WOoiy1nr2E4Qw2sX2J09F{VcfeIZx16hJ zT0L}Ez1qZAw8gybz;^tsWG}a`KUOlp89nP|bzJ?yPs>_)vu`0|VNNjn*j+yzF$>u=PnMz7F@6dmfPbsZ9M#2+U|z={3G*izdd_#vE{?I72OBNc-NlQ!CwXV{r&e{ z1Kg!C4%qx9yH;uH)ZaxHrD`9h&c@`TZfzH zOdm49`3@-|54&HEcaQSjqifw`G50v%J+?Ca)BYvTgQ9I*H`K0>bC0h(y-S{zL#yf) zkSW+3bMBireeMj*A6YN*ymR691+IGE>UgZE`&Zq3(W&`f!RUz_%XwotSJioU?(PJ4 za)_&ZnY%c{RlKrhWCUYb+*rdKYnD$vFz(pMqFhxQddgD|1rppKO!2whGv5XB3hxcw z8Cq4X_VP_f?w#aLp8sTs8@j|FyTn}%^T)#6t5N<~ly8diWv`Q7E9U|Scnfai@;m3- z1f%(_o*O-jng>QVWa03Bt)najZ&%!^SSO)Cw)_e=-naq-*JHYyqWQv<_XY6=2iUavy|6%9NCW zz4R0~(xxy%l~khzY{89Q!j!TBO~MX82mA`hfJH>>qpk>Uh{-b!J-YRQvQ%yL1BsHF zC;bxnxgib;%x!sCH)~YL*wG@$xj3>-0f7)W%i_~cEvPS?hW_vXgcr)`dv^JNMdL|r z!)OWQzC%hY)dZPMU`7B_B!{&DoyG8>@dJ6@ozH^Y{LxY%*_THvqLo1I_P+s=FxRNa zE+vI|?K}k%!8vd0SpSpbDOv@2HY%{mw>(EGBYPA-fNNIl$u|vDRS0?N_+YC(%>4kU zJ@`bcrz)PCo{G+R@$`V)UjxujZ?hV?%}iU`tTtK~t&cWDw?(%{cd)f`o8cjaJP|z2 zkOzqUt%^PQ-a0E&;J3dB{2U~e95rw499EAZ>$4WxXeYZ9*dJ!X#M40TZOL2}8@qFq zQbr@Y{do|W*ALY<>w$K@3sQHE1C?#-!|qZ_18jr}puIP~wR>1Y$^;{gNMp9OXsoGsgSHaZ4S*TE#esO*-`*-0&A;2wI8A73e&=svkhC| zyljBofqrat7KIaS{u#KBYd&$cYv& zsBfMFMpXh+(D|_LxSzxkDQjqgu}*j-dsE_q{?a#!yAS3q3ifPyZfM!P>^{J^#jF$L z05jP_zMpY|ohmo@czK5>2rtQa+Hkj`P!51sw!9!2h z5!Ya)g;Z4Jgi@@?NyVmwM%ys{1;BV@wC;?yUvFb~UR7U}d+IlyeBj=P9#s?K!@|{dI+LHo#J_(`N*hK!(qu0CAwoN13 z!yX+;@J66*u0j8$g{5sNxuq*tt5Vw(xOAmTz@_FJK?)>!ST}8dllst+n6<~)W07DY z)DpR<*a<%-f3rI!?=)p(lfo*`Y-6@isSDf1?q^%r128*X9~OQ9a|WLgkd>B2Hg_f% z!@Hx6Y3=P%fC)HznRg)OTyt*-NYufIcOn!767zO%*YQ(fFX*6oWhECcXbpHJlBE}p zKf%#aCNxS^!!}7#0$!kS7w`!8wh6h=Y8b6}ssX>P&p5`|@+fN&tUJZ&&qA_1Y3*it zQjMq`4o!f*&RBQ|2(wcPk7i~9C~V>P)yzwfk(h%pLt$BMGF9Z1c^Sgz%92#Fu%xFJ zN^&|ABBs9u&@k*1BoWgMN?or1Y~Q)%{oIA|_=Rcy!gTz?75>7N2N$l*AA{rg12~cI zJNKAKeSX)oL-iMVJ5;3+j{8M zaWv^p)Dnj^-I*HWz=)LRTWVGqB6rC!bcvdDiH1xr!k_33WHPrQLXt=>DKY#v;3sA$ z1XKfBspB+2d|(KDz5@WlSTV2n4T)3#O@^sg1+f<)_V0-(GzipsbARjFvCsxw@4Wnl zqj76B5C7)s<%xJ*I}iWn_It+o;g|WtFLU%voSx(1|L`1dehr9X*Hvxmf5cZe(#!A# zo=4QZlB-V&8<1S&l{bJ8XViljbVf~M5WfC*iprzapSgBC4kO#;pR`uAR;k|4V4A@p zaZ)4=B*HfV{U%8;0{tXb2VuS@#GEr_Rtg;XKvbybv{%xZ2L2AzKJnnNAwzB_SP}!y z{B+bLcW3@)B%@+JgZK(E$&xt%-*$S@*gwhXnmN0XW~Wm4~ptPzW$i`dq{j!ZzM1rayEXf zO7sVi&dAnF1sW3S8$>`s?uXn>{UD(^X|^!f@uDgsF)2ZhdK5UW(v*9nI zr76SJ_I*bn)f&Ef9;m^3P8=` za_Cw>&tO>?)U9URXcVDQj7E}==R;=+8h$VW+nF}B0lnm3I2{i$u7?L6aS6XP(u&9BY#mw0mxcwmn8cV>X3=;cXpf{I^K7t<0Nwr6UELVLMEafQkJiUkXX7=3?iIg(6*`>I1!p4RCi6nf zQ9Q}e7Ok6{L()wPV*X}davg_RZi7U6qC@eK#!pJl`AP^%Ajrxn1&ZIMnuvDATWT26 z5Ex5LZdZ{#k5^gHDPUUkVEzM!CfZR+Taw9C%D34!SH3_) z9!e?F5}R?Rn1k`~3J)o@>@ipoq|ui_QUtxA*-0!D-V?+83oHm+6A*@5U_c}SZ)?#h z34k;VBn8@VqH0XkfMkRegz{)o5TXNuxe(okk#>U;0r>mlJO?Gw0iA*gQ`IXKzyA@l z&IhXZA3zz~K^e_)gO4}(mRj$Fjw^D}&&BcwIpg3%cU9cIk9Y3_PNq;;6))Vw7w(zQ z6$;AZ1>5+7ZS#7es4iaA$``eQir2cY>`OdJ3Hy%3_no`H@7yB|xE@jJ53;-+&%xt*JHN6dBeIPcmB1Q!L|6xeS=r96ai#u<$DM29Qf!Q?{B&5 zx;GF%c7ccg)|YtyOPtOtl$5_$f2aN@A^LJ;}4$aOU`pT52VKDUgYY# zxbBzXh!e}bxMsL0SnNW@fnNmpiXKi^DwOa4MK)jF&FTE03AgBq6_m~Efd%ezFIpBO zanCLu{tI_OMXQ?PRj2r>Q{3sn`01DV(=T(u5Py0kRy7LDYrblHK6fGV_VruWd0oYN zA++1h)!e(0e|!CJuk%f3_@Xnh!n2Twf7{9+?>_(qf9NVgj**;LY&nt+VeIP3!n2QCoRWKI{X0Ko_ z7tEFDWAkj}QF$egsO-GlFY|Ocxx~LjFqaAD3IYjc@rI2uS8}HM&+IOu3;4BsA^~+k zZ|);XQE+A+2ZdfVL?)kp-HS(m?sQ8H*K^36-r36NTEW#R9aHcw}k)CpU zej4FQ_r^;P@}&o3rH5jkwqMX(PcYsy!uO2CdM?E}$9T`!nrj?t?1Qt|I5~@1?F(fK zuPhcVnmA~ls@8Z_2Vd23clh4+SXD1~Vl;l@GJoPSH!;n<5{{jS@Kq7s91&dOAbgQi z45vC@E;L#WXf9Xr#wrj9STpYbGLzbWOtn^ejNftoeIBKJfFb741rJseRdZr)%{95ku(_iaa1m;Hr-T2EkeqM~TkB@=k7B=iLau?I_m=aut{OzA=8=7*{p^zjwj5`ElN!%FVjRCIe0C(s#Uv+xTct%hZv4PLeb)M&I z&aW9S2uANx@5%+f^bpr^YVGVa-!aXXPV>fT9KFWkxUq~kmT~1x+}@*n`BCop%UoZO zFAuI6FA7En=WgJQ4f97~qAd=7XyIxP!~zFng{|K|vJq8N#k;>gsG$noJRZVgu46IB zv9;s<+`w?`IL$lg`5cJGu~0BN z1*1za+61GYSc;%FAQT}O?T_rnE|s3wwSm9|SJb#_*l<&YuK66nYMamgd%zf?+0){Mv3on`NaV$PcRp3jT@xAY5|g;Rprwb&LjdzU(Rb6^d| zH%PRXKHPOMzN?ep)w#Co@LEAPzw7Xwxux)O$3l3q;|CMmuEX=4d_ni;#ifvjc7Ybm zMR9W_Z?0UnE;BLn_PBWuZ-y%{?mFQHj0fhUUqYH?AWv^AhiL;v)d^N#+*;0C%RjYN z&bJFTf818V+bWi|FTsg1W*{OBl_ za@#|*Ll$gWyb?p9pmm$$?U7p}i)Vf~4XGHdZ}r{iTO3$wT0XpRvgolstr{ihzG zq~=5BPQ6gs_KS;r<>B>;zmEPKrOJ9y)THO}qh1wdJ0i)M{L;Ps?7qxjmFJvO z>3+30=UkR9=FUBrqhs(^55|N>9G1{wAn7F@1K99b{AfNGgX zf%JZMTFDVZ$^`&;AwgPsktcaEsF)nhfnV+z^?g+50tMWtdA?#^3w&%1C@#a@RMGqh zmYY23(^Y8Jz-^e65BC<3aIlWUQGe?Lat8V1c--3Zum$NPPw-fZ4-Mr}aslq2J{=dF zwcz1N;@m`SPsznLZY2qBGSW$=Y|fEzZl;0HX#j40J9KSc`t<3P`!b;AHcL^cqb_J^ zH~fmSkq1-^KM(x8AZd_=Je&nJbDT9Xud$|dcPS<1hoU}E9Lr1c89|LRhb@_dI)Q!# zmd?(a)3$MfGYM^oF@P);&6zsL7N>&-{-6(AG@V3FN?&-=#Z8tKa`L8wNR}QTOpNO0 zYpfR)<0DA3AkU42nkf>)nH4rS>U&n1qJF58Kcy8^Q~xX}>BAN)WtL!~rEDpiL8!e( zIln z@__buew&(}*QT`kMg{9b(h4ZsW)OaL?lW3fb#sL^g!1e?busHnudIx>sil)jqO-ui>dDkT z%>F>e%7hRJ50r?VMlk~+BE^<2{$VCvGhhrX;lK~R@`(_c~DHFsmh4?)znX5B~UTv|EMd00a8P*;FjnACo92OTnS!uY$^MObn67`d{xE3 z6-LikMbfS>WRH+$h26Y5DVP$Zfd}av{(rz>{AJa(El6_+S^YQe}DmG{H z4jSez?p4Tsn0@*>*a<5uVE3uqq--s1WOaB6j8b-x?YF1hOCpQaGrIAI9M$UG!Ew$@fjHPA*2{o;?pdd%z)ao{4PACiXMz zb|n7nVX*vI4SE`I11}ke;NJS&P^FVnf*ayg%5DJnD$rJHN!J{tZ$1ML=KlZ#ERjb>+d*dh{uMR@Zj65qs@!mq;UI7H%A~;B z)_83PU)ynaoIBYcs~xy+93Wg_BBUb=ABnntxIgY>X!uph_fsr)RjB_ck}UgisoTa?0VL?#7jd5%pnUx7{HZT=nFP-s%r zA?Fr(Ghz;KLoNjwn%q_&7PF;$?BSkexE3E%n3Dq=nnEkO1tGUJHE9?|a=bAc@$ zV&KCBL&I=^;$<=a3WzccA)_7{d=l>}Q|4#t69P(b*}PnIa_604Ds*K?E-$kmGyMgY z1c$#QjUlsZt?Bv#81Bfim6@hY3q%hCp@Tv;*2`!-`;qum%pV{|>aDG*O#DB`BwyE2 z)MH>%*{GuG=P;w-q65Bo^4{z5zKQ#N6S3mSm}?UFEf3wmbtx2Vc2G4#eONb=$Fqp> zwOVc0&l`aHz601j>)ya}UA(G`uj=~Ldl*Q#^R4qy!Qz$f9|kh1(J7O{cfPm(&i
-;zj~E7`GT-1)Gk4mm`14agy?ssY}b&x8~R`I0DOOpc1?z@X%2bcLcz)T)EQBSGB~e z4)axq*BsqIP>nmPct_Rpwlzm>($&1?X#C1T*^7ZY<0@as0=|L0%P*Z!Jm0n_VB)a0B-kjrOmv( z8FWH~7+$SWxRu z09x7x*Iq32Z1@wkiP^S&Sxz~CWmocU|FYv}7w+4)3$B{wYrG5C7NFMJFZg=|XrD_f zukm$l@w#5V4p=}#eBBW57~&jNLSWlUH6Lh+2fFz{_wC%@I*Pu^q^vGs&;IzH)BK*( zFqi~Ko!}@z;__L`{6LRy)#zVwPrmG44-$0xvG{}`2pT~V4+vAI)6G$Zs_5y^$VTrMJ4aV@{x+!@dp9^@{J^6jHs+a=CX|B+_p$X}ay$3da_@V#cf`P8Cr$;mrv9yoRbo2jA> zhKtAdgRX~pw#CX=Udi)E@5%}2W^T`k`}Pyh9m2jfM-7hc6QEqqIRc46%T*tUSGV!i zZFi4w$4|tnPjaVD^VO%<9A^?B%Vb;^;JCJtaSdEgmv71bllq0M&&U*8&b#AGH@!9$ zaPwSD4#nV_wGzjueP!GGE*NoSoXR7UtJoW_IKWpNxU0X{3U?iGCrwIZVWNuHU}? z|H^5&w=cf8pWh2C(3iNEgR#9AxzGr|cZ74)ED!RICcJ`c?k5Mh?cKbqTj=WM+&eyM zTh;${AMb7#QYLKN(ZxHuxWj|ocP{XUU*dw7`NNkVI3_l-aoXlm{<4Lh&&z9J-n;5o z^t@~D>ZQALe9Osr%LTsW!hP2T!Cet|*YWPU<+%s$=7;XupSxBEW7`hj+Xh@`-hGsF z*9t91??Ie%i-(s60pmPy@7*w9>PE^_b*l@e94rO?#+BKXQO?&AvmM~f2OcUs=MVkM zqaW08d;0h)zlt}(jl`@m#vp}-2VwTMXuydiim0v2|RggD7sU&R!@fN za0iaxgpjtc2VrlsN&g)AX@q}h1NwF9CFE>ElLqc{hm%iK4}2v9{ESKc^YnTa%IOm{ zYPCplCKU0UDDVo>$;|_YM$ek?EJUmD#0+*gE&^pUcU-2ZLr~&TxObp6O^pMOW$Tj& z&j+5E1%6gYCk@Xqo%T9SzH@+D%g^E)LakI6ek|9@2AApDXf1r#z>~a|muSA(;WVsa z;mcDWfV>kvq!k>|Le|1M2*uO_m!}pIn-!=BHae5&Ts2`cNJB5Dfd^#re8V(8Vr}Cncf94H4a)dp2BB}= zTY|xv?)xfmDRFvO@00OC3$sA+RmvPeVNdda{=19t_?$RSlc)I?Y1;+WP0v$mmC!@~ zzp++N^u!;nR^LzF4*-|4+BQ?KkjFhnO2IF;G z3Ifik8?bJ}lm2WDb}zwBF9B2$TQt5s4OAAXi-}f6i_=3yJyUt@=a1H zA*D{!Dr7#(=|2^Q)NJa?b8~M==Bn68?$~B>9;xkKhbuM zNShK$o6@JadG1C!6!`SefFC6udZz-Hd`lN{v)&I-TMHiGDyGK+DKaF0A%j}jH;^yG z7~qD268Sz<;2e{y#Z}`-eBJV3|ADr(MGKm&0A+-D{*-ur)(>;$dGX*f@q{L9N#ZhO zKj^#W^-6aPPC>SWDhyvA$vSqp^)TF52bL4WG9YFj8k0Rc#q488k9U#}afsPnM^DLC zWB--0q|4NC6}DxOBxJtikqR_tPon`Amf=D%Qz zr%Y|lCS6Ewik7S-g$&Y_k}?2^tw%ipIZDcx+2kH$Cb6odOOB&$9m`4B4|2bd0|rs4 z{GTvLCb%O-nW#yqLg7;Ie>MqL3MKji%wfo7t|ZB_cp3%%PhhVVZh>hI*9|(Tk}|IB zV9eLbwGZ;XL8MuhR|18yvI@wqDb&<>lA7wOl!TJuE?RHns+;$A071#yeQ$#IfX0Da za8|^fyLspCm~#)1G?D0qWXPDKdcON1~a#-Ba#yi^Xw(^cnxD2qg z_PxG4eeu#=AW*)Ly|_!TxH)elZ)sew-XS==0@BfnhzU*grtRn;-%{Q1+`B}qv)Hbu@qQ7wbIQU=mQOjWcHaalKnT1 zT6e#oVZFK$YX426#tQ;`0SI;PT`SlpRPKsbHuIIu@ybrVvJ(ka*ps!qt2XZ1&bzk5 zrQVyHc-`UR+%?M)-o1m{eVBJ2zUPZMjy^JECM%qqro#70dn;MDtQ8!1p{%ifiQfJ8 zS^v&i%2l+iUR%A)l^uyWdpK*)Mz@+OYuq@ZP6)ztCA-#)jR_feu5{084`14~W<31R z{(n5l+qXyr|<>lWL>wI8-D9gCMX@}-UO(nBBuf8TOwy`*euGG2a= zFFzPBKf#xuh?SgN=vl7-9e};7ukqDKB+8$)^r56P5qCE5&IX(p34Q}mP=ViIvBxcb z-s0!V_ujYc6}&zmuUFT{tM~HoZ>@ajEsc8{cy9x@vl}KG1|9@zF{pJcwZ}_$^6+oo z391$Aphr@%do?fS-wQ5C_3f5hE$^OD{aG ziamVAp8ExRFpQ2jcJlD=@4V;Y{R0a}@j7R1yt0Xhe=Gc#ro`LNSM0xEuwO}Y;iz11 z_{L%%5C7GDK=O=L^ly#@cVRZ&FKAL?-8%X^XD!#zbx-?uxu4{6U1zwuvoYs6&U$X$ z4Rn16()9(a8)Vl7B(*x;K62~G;w-QM1grN^PF7L=LiUE4%7e@03A<+OQ**;tdyzjg ztwsjTmlGPQsZ$j*b^iSI72kqtp;scU$IP`Wuk)tP&vvxI6z*2(!Ll5jZ(YcZn*$)m z9|L8G9c^EdAh=xu2}x>@I=^q~eEb!K@Z^GU-e5ba(()CylbM;!Ra|mpm0bC&oZ(@> zvx!FsHbNkJ7c5UxN=_?So{{`i1NowzI*qV+CXgnyDDo7|N{X8U;UtRzg@5?s4}3Bi zjz(C9Esu0nAY86S#1=lxP>$cgP|w^fe1L@~!UuRPNT`fkAxpen1ggLFQBIDs3b}2Q z3VA@q;I>J=D}#KR^+{QT#?1@bFf$zv%Zz1-Lk$aN8o`8oC@N7Rmjn|$XFn-k9bL+b zdw28R-Er@pPm1N6Gn{UfbV&r#XUGnZ`5X(CA$>ZPtlumKnb5!kPe3PaZ?L{uijQ9&3mDKH#e6Qg+wdT5%f&Vl`5>!i^kag(c9jLE-}%(&s+PBffQ6 z8NS8PBMM#{`PvKze3NhVPSFMh)>RF8-PPV5xL5JF7x;tykI?%oV^)R>R5_tn;NM#! zxIMr!29~f;5rEHffO-;W2Yy)?QDyX~9@=~x@OeqFK~Wv9jV#Ho1MR7eY{{;tEcS)j zTh$u|$!(+xoEtfkJ(nsehihGTu4KHwmn(1NeY-bIQivH86&HtZ7jEQ9?tIGaUDDj% zvtg0kR!lW~E3i=@xowmwf5R@>9hA+v=)ZODkyOTws~W1j5)1G1V~w1iFAI)nGV~7} zWg8i2fucj?EMnDBR{KUavFd>pwqYPvqf(k2;)KqU3S%Npvz*^X9&zSl(ISXJs;FMx zac6A9K%7R(V%x|e)?92F6S0~puYV(tSTVB=3$a?MJnII@2y1Jak1TZDcx|zLso-`G zXR6?{D<9bj+)1q&%Hk~A$bdH#rLbu6%P zoj|Qg|Ai6DGZALCW9l=ponz6XtNi!5_^&qP<4L4Q{k```Uv?)!_95-o@4Z@<0&>c9RW$Nd#KEd3 z?k*SL0*VQ4QZb}ZkX<>UoKy{|Ce=gg$%3JR$-<#R=C7J4n$!$wm|ZCTqC6lE?r7TY2gkjP+WMuZD3DacRP#LppCd`wTAq%q?Pgo~yLpEmDPLxmD zhwPJ%AqR8oCY+P5A=jjP$URvxR54jORLT7H6IGL*A z!P;5WZ^#!g4%G!rL-m2Op$7c?L4}e#%mvJ^a{f%nU4ljMQ4LAT@QxB$g`mT{I^aY8bh5Ae2Tuo!KUEG4jeRaV}GOKxKYA} z{HW8GjsFIgyIhbPQUtgm4 zq%a*)l#W@HcB|5E4Rcf8#TlEHY&Ug&eadidZ1VcVNO+nbydIvp9!^yyL0gsl(@7(=^`P*DIV zq{kOm`;wJ4adl17>X`Q}s1`0PXhmm}Xl;(Go0E=;`QSp|qIKbzSlK2z+T-f>WL3>V z-@Ti0bwko*jRx@Q%r!U3icIhl~Q3_pTaz@V9fhJYn#44U9$V6fOA30UFQvlzCF ztvqA12TcKo?7D0VIA1RqDof{95^!BE2)Kjh0L7|6Zj^IAco_(RO2n!PT2L=*(8fw( z33vdrd=s234!nq8jUUooE)CS6?)E@!&=DYipIoj$UB*_QE=dEz33pOWjR8MWIs=XH zZGx|w)tBN*@eIIPGh(=I7W!LK#hHPfDnO49PNa%PrTI2h5N7jRK_HA7HC4z5!`Jz# zQ={_oN9~SDjr}%OSo;eKfg2`&_6SP2Xd0AUD!9@M2geC&xG1$0rW`Mp?b)}yyo|G@ zIO|U~q%M?w1m(CvwrAgRYQ)sYzKHo{`IO1=vTuPKw`N_kO@>XF1^f)muYhlp(`VmE zt&?3yO#rt%a$bm8A_pNRjs7i9PRtBGQ2xp-(cpxy^GV-Xv<`AH${{zo_Y|l6CcX_G zd^;K2$k+}erKQy~J?CS=rn!HZbraQQMHdI~)Mn`5wM#sW8k#gpG+B2n{365MF z#)LW(O6jkTUAc;7ba;jjj;6|32$qoH7e}T7H^u_ttI`5a%fuA=X((loRt%cW(;k#7 zc}nF8rA*lvvX?d8xw8uwrS%u2)S=+S<$P5QQGfT)!#7jSu||wxSs5OlEz1m@R@tu= zF+zU=<27!*l(V_!)VHXS}*AQGHabK02pf z)l?;`>Jn9*VpV6ns*60_NpC~K+bw##f0|m{HvPAt=@-3N8_rd>2V=fQT^75dnaNV$0MEb&Aqqw&L0(ZzDFM4yCrXz#Pk*U zeUk=HOygl)G^LUElBvSWz|k~6npsg$*i5Iz`3)QFaHhKoO1Vq)b8o6fl~=e?<@1Og zQC;EgC~p_4xrjQV3TI#mifsJn-5mEEx_G1@QiwIIKv2%2D^d3z&s`Vcrbj50Q!2RI zm1?Yd=a82IzliDv$L~NCzbaLLGt8A>syK-0inhZ+{wM_$u{PufVdqc7NGXO>n$YzZ zC$J3;r3%KT0>M{OMZt;SWN<1RqMauilb-ZE6&MT6jD$z8=FU^H9{~-Wh4C8qSj&0q z7B0V25;wQLT{6Gz`v(_F<}@*LD_W?kVNvs8<+fX=<_<-7CiRx+h53tjUJ&)Q34N2O zZ(2P3P~W+(;;Oc<8OrAjUwp3PENyJQ{D;N81?th{R%Vy0CD^*`hQuOQvt*ttdu zvV9^<_+w8%iSiW4$5i+>gYDb?)Q495M~1d#@$v63o~ z#)g(E54>FoU*(^txTV2Y!k9XPfnh0vM#fA^$FP<5e_{-kJ&DU%_m|a#N*4MqEB7O- zT_m)|?HzGz$DH<~6KCR%v%flY|J;1df@Ps_vGM1j<&J;(irBbc^c;vg4?H+DR~mDi zrHRqAP$|0GHwskc+TXg{qngJmuEhAmLb1GcNfS5jdZ_P7y4%+axMIsXhdApR&ghD1 zoHPKP=M-Oju7qcZwUWEnqyE#1UZ0w$*86w(u_wi)^lY~e>c`aOu`8*Pk(aMvUxSl9 zAud%)&S`eiCpTY>RH^dxbSoXxhtc=gF&aBD1kiDQ6cf=H&bcWQJIHVH(u5EPph7A2 z2tRsNI+TY;&{e~sQGL2t^rA*!6U7H;|L_PIUE67eQ-zZ(-@0qUX_e`hx%5*p$2a5lRVit#nh#{bj z81XaVSB9S%`!pKjTb_s|I}HkofKHwSc^`Icd3Hn*BmAkGZFwRl_~*81Iy_>Y+OsJK z{iYmBw}63@<&Zg$WW!kwd5&wyK^M*(*a9VT%YRoPm<1CZa3W=bQ83d&A(??vvPY}| zL&Sz(IevBlWQA)DSa#KLsiKOhy+YwQ7ND(cp z#~W?8+7Qe6rfU?N3~V=ai02fX{T$nPxCGZ76>U6jmtc``N1PE?ICB)tcT^MHa?Qfn zkVwxg!L)rvx^G$!EfphMk4&zaLnX_#{FWwA7BF8?(&GDu=9XsMn=Vw&OU9OhQ*wen zyETebglptzw={ANz)V+qt5q4Pddg_E2vtAk{;^W15Gtvh;Z3F5G};Y$X9=sE8&=+K zHK^Buk@XA{Gs~FCjn~Sh%f18UY|B@d^^t9?H+clxmUi)C#GyxReInI@Rqznj1TVv6 zjZh<0lPzFpknDmg+$5(GGBX(~SwWZ%r0k&-EPbs|8enZEnL_PlCHd?@EWAxr>U4-w zP*J|$1UiFdO2c9VEeTdW7($-XvGlfddPh!K%2|jb9pcOhv9S;*3;C+P>r0P8n(=&L zqFS&=@X4)=er&+_s*Cu9qHKSS6bVIN-d}|x^v1)0T9q@ph#<1%5$c30c}CmZHj(#_@t3RVpKmf+bKXSgF?p zO7rybMhc=&>%GB^ucDqpKtUC&2nBS%}zC!gB#}-#K0-|<=Byyr{JPWX!#mA z@W^nUx`ulN`!|Dw`KUxF6f)~wq*W*t3_|N=Rc5R2xFH?fR1sIinYLqI&)9r8090>2 z0MJH;ZI|1zBmR+sHq80ZC{K%69uzreQ4mf@++~yyX#!WcX<2 z{c7%(4rQoE8MJ~rU;J=}8k%owAgBbbJVbKf`Zt}873?&{cC0)tyF~nWd1L}iw)ECN zbbXSYkyFa486qOeJ5MmG^k7m2r$%oM6ZN76@MZ8c!{LcAFQCYL2u6tZKsb=g&uh}j+<~*$ zC^6GOGDW7@>_W5M*~0}9Gt6shcDCE3*qWVXb-i79PWm{C2bh@HNNpt0b>HGVW zuKGpAz5UVtN3P0wA?|99_7jsRaPQlT*Tl+Q(ZfljJ7KI5jWr909~zt24S=;_;i}rB z+N7~6VXPC4b?@-+M&6DrJ$Jt+UVrMLagb;Q?O;|}YGV58FFyBiP2W;5dchBi51KdA z3y46+M$f|69X-mN(E}1%di1D3lFL$ZGSmW$B^)Vep}~nBM+#;u#272W7}5wT^l_p% zV2ntyvSdFRD?-6FOmQK_ZL^>WDA^WULlg%rB>6^=Mo_cqR7KNlw$~#kE#>s4E@$(o z6LbM(cJmm~M|8JJQ5rv$<}1clX7b-$+DHjaZ|M^Iu(_2=ti9dz)DmNC=PR*Ll$j8f znW;~Q(W*X`9j%`RfK`7Xe-kfhylO6-8$LfceDWL{wEVXbkWxxOc_})v3N(PuTOy(z|;8~-V`y%J5`jXouz!3 z3x=PV35G{5Bk}O%F&@JkF;W^fwgDw&kS34Jfj0yy4kr(4>75?h*6aYyVLH!te(hnM zlj;^S2|@njYYb@&(O6Ya+_f76+v!^vxz{>(JjddmI}Q?o#~ahTKlbd7>XYWG1;w5H z?{tgirp0f^%{@s=?V>`o_(e-Cs0!Xa7@{TiguYVLSH`^CmYPNHzWX(z=g_MDFoGDM zk4|5UcTR|>uid}2bZPm_^4Xs~8#{e1rr#FpocP$&7S(^^s(5SXy`2kt;;#0e+eO!| zs3vK4%_F-;%FZNO8Y#PC&fC7OeQI`V=E|79@{7;6(Nyup=N@jyq#}e7$o;UmuTcH3 zw(0wH>e=S=SNZAdSFVD3(c|fT`AVzjJQF1F5P6IUssRrsSx+bkVn-mQIyG|Y=}Bhz z>Cs>)v?j!Bvs+bplG%ve3j6hCNM$5A~^aftq|$VP7Yk4%sd%!lHVzOv+9!vFL?Y8Y0klh zh2wX_NEKZj35|rqJb#Z|#e@}B)09rCCgTk9|A+z^|4>Gm6v>=%GMuo;DeXntoE+l$ zX47Y?dbZ zcgblBUky%!(?5oZ3_Ftycea%9#6*6Am?c5Pqv~ECjZ%~8NvxGdOqj2cN*bb&l>2Z`@7gD90OPDfepyvQmPhP$r!qOrU zH#eg%Cz=Y;q|G8MY~A9CD;1(cq`Z*)fo%Em_W@}Wsms|P&RX-XFSHJa`myEs0$d)C zxwz(WqocTLy_@UqiD_KPwrw$uH>q(YG}WS}I$5&&PcGcw@jpHn(;s`RQWk45(HFbF zIxJRu=+CY{xbRCMR&#z;b0G)b{y($czy3>iOn>T0c)fqN;dt&<2nXu~5xxzZ_ zXIx>i7Ufzua<=k0ZPH=|yU}c!(CSk2DWl5Q5aTXb}%<93Pmu5{cU(b;qB z_$OuVSjFjh*_oLB4DEycSRv;$YmSi;EZLuP>ZgIQx%o#=0(wm6zrQ ze_Pvu+SitTRNFCM{>!3e_4n2Hi=yfU+9sEHKjRPtW`MyG3`3Vr>J7wv%Gp$tJ6Chbj&heZ4KpFbno_pTSHthUFh0#g~XFf}HO?V_=L z$@b9LjZ;D0LB+a-GnGY;-6>sg0LbDqqH)Kv`cJjL(BAJ9yN@KgPm1_Ao}@xL(e3Q+ zpNxs+^>as(E!z?;{bEahOy8KS>s*S6b^Q-CqVF_L4@t8>)_hDfAA2w=mYtdFLppck z2YpML_fKN4u{`vbesS+P(RDtiu_vo{ERBfO-ODBsmE3zW<~@b}Ki?~ADv_$hv!DP9 z7Pg6cU$VqG|AJW3^g(E;`~8Th-*aF4m){Z(Tom<}P{(R?%ycsSreu%whN$KbPtFg0 z;1?@8MDRp6Mb>gPUD*OGKNIr~$bi;zi?)0|<~=Bfpbzajtw?lV6uU33c0aSBQdVd` z>V9Tn_;z2^@KEoGbw2}q6+72C1fUwO#>KNsWs65dXXl(DsVPrr+@i(}F^`X({<&k1 zDyrUk{@(MkrajB!@rt8=c~q>pFn21cuS)3aM19?wZTnKsQuFd;%yJ@WsYdZlWoXZm zQnY7DT|(b1>YEoYJk)orn~>4EjjL)yIg@S(wmhn=&*c&Ob|;Od=qb@yy>NE1@$KQI zo1*W){ch2B{J|B`cR@5>_)Mj&D@6mM%$E9?f4^wnf4@U48<^`$>Wyz6zjb_G^-%9# z*MY12xRf(n@7CX`pRfHv%Un@rZi&_HS-uggIlQVllGJz;ng&tRuxMV@v;d|&XDtL4 z&n@g*)okA!xJX$~1)Wj3`VNom5-ZKqTr?!ICPc-6hxv5DUWgB%;?5VStzm%#Iij69qFJhAE6_LEU!l}WPFW+ z0f%Z+`cO@beiNYCWXRcfnA(&*j0|QX6J!H0`oXg?y>Tq;lEVY4*STBAo)l8x6*swX zg&gbFF}VymkP#Ru3)9w%Jvrqx3Ce()@tGhX3$X^qdzh*KKImY(<#`G|3W;zb=laI+ zY+2@x%^ioBZPDwR9O|%9j`?Iwz!NmzI>9((7LY-#_}TC)ALAl+ydC&C0~+w9d~(6Y z>ttK@jZ)^}a*U%VZGGgDMBGTzvNaSW7PpY$4(4)*;EN+MJ^fr%@%+CBb}Lb?DwNOj z6i8|A(5(|P;W6UP;sn0A@QCPB4Olf#f#nsd3FAUZ23Bnr7JM_Xp8sC}$tTnbK8hcy z1H24Jf=Vwv5SRrY27H4O!6InhFOqzp!#h$RX^8kEjgh8EGbrZuuc;Lr{oO1U9OK*M z^x3yiFXs)qcqMp9*!KxOGC|F59`DG;lx^~O#aL;3vwc)GMtn((YsNEZi?j=E;7n+R zR&dxrBh^*i{$DW%apSx4l(0rl8`&;+@DfH0Ku>j&ukem|rm?M>>Vhzo!&B!Tr^M`%B!Z9g&VmCwgF4&fK;`&hyp@ zBIEHUVaIr89A-0=TQt%&wJ+N%^TVOJPKJuI)S@gqg&o39VJ9#JkWMbrEp+9>K+as& zEklssU=h>-x6Zuj_RA#|y5#us8?6ma7;7CDu=;-jtJxXTDaVTJ3TI$IH2`zOlk-?k zZ8JP_#@^`3NehL7Z`j9I~)+|PU|5x+!pZY!UVI#qMy*^M^e&LBZ~qfD6hI;afO#|v zOSZ{l0Quhlruz}=xEz*!3;i<8$YG&BP|8+eZ}_AfGVyZr4PVHV+7U3m&V68#+ms|>!k03krtmYeoiS0$HoJ53&QnIg zmZyyQ;+4zsgaKiXuvgfJ(dqi2?0w94^jwB)by{fClxAV5BJGj3jJ-3`k+Cy>zd6m^ z7(nt(0+F;`r-gNcc0w5uPYetaj}LIyu|N60oTa^D>wyKw+|BuI|5XImv_Pl9 zq~L#oNQ`33sIZ%=gY`(N4psC=%1ni52c09cdJBTNw5PeHlHRns$~aF~7pvkHDb}&6 zuwO0JRxy?;+oB~%6u44Q{UkaakwimG1l8ktO=7Hk6I{GNxM)HF|BlJFvIFu*hc4Y$ z-0q9vSh`I#`W7pf6{5dKH1;I)yTwD7B>epSO0R;Ew*AUer}*=Lun9e=ri7kOSTe{l zO07K(zpYOi8D*Q00U6oijGX0r=?+jNQ}HQ}v~=$`sq&w1ZhqIvUkoqR&mT^>TOPVw zelh&T1|15&YqLKo&&#^+e^!N{_NvX16ipzf-?tY(*jx?w;4fZXIy`?VQPK9WqU{&2 ze!+hP;G5bcGXhER=0}Q5tAI!dT1TudM&da^G~ncX4E?@t;rq%9`$UWmZK>eQ9J2(gQ~W6TwZ@ z0}P1?PJoyI3Eo?uv;;P_gRCh#Im7<}qUDHU^Dn|ZTl=MxNm};$KTyd@+VeGT$@|C6 zKWmm`uOW(lEqK$f<*6(@os4-m8Fc#PN$@U*z2GHhH5oN9pk-(x*1^O?aAG)=R>)43 zG7M+j{J%v^iAg01CiB6W32-4WF^mQ$vgyEjAa|;0EHn%@&Xmk7PDgeSSv;?5vZOcj zEETGUvMRnbc=q_Y)4hF%pkOm~6C8}ODSnhf+yrCOY(&h8v{o5U>mtf9J##aq2?T>P zWRn!*7(<4ZxRf5OW)ORr#xyi1CFyghY=9*aN)=w8fi}FPH5t?#d_Be0hJxX=f_zGi1_`B9D6Yg&`ZmS-Ly83z5g&-~KFnGr z(?}(o8Y!jAHUa7+aUMxWFa&9xjNW+40+bJLSqjqhGD;%xI#YI{pJm;wEm+@48JQ@~ z^yOg?-z1H0Ry<6mTXe{!PWlqnf*L7RwyBG;q^zf=3~0?U@Q>vA7{oi$iOsr8WH;+? zs6jfIIfo@p@|02P+SICP3Hfwq&wwqEDo~z_#NnLy5;`-{^||rDykcVW*+Lnm9+gvU zTW*3Z;8e*b+@#rHS!AU&Sxlxg-y-afu1yo8T+_m_05uK&SA^RCj! zY_uOMPo{z?^~}f^pHhKem@;B(5acIJ8~WtuXc zA%o6~yoU^$eNrV<6OLUfLj!;0jK1t=In<+@SIfN@GQfewYn45G# z1j7YE0H+(UpnM~Fk>HBtg+a!zP>yjjXaWn-3F6E7ci$xRz=1TkPa{tzxyoF!Uk6Rt ze;>QI-$Cot&Q&xe+)-*Wg*XMZvKpyn5^#%+Uhx<@w0d{f+3H>U&P{fOe60(^{0 z?MH{tE<5M?-yFO(2u_c_ez7sG@4WwQa0V*DVsV1$r8kId4;wG-UDfrjnadwlRmW;} zK$rDT9AXuiByOnJ%)8=N-<%da3Zo@@bH(5RL!{I&*B5PC(O0aQsuHF>qG`|a`9B@{ z#n1z_xc6k-bPA;}E`O}&oE49Waq{TUu?M?X4_#vHl?_hm)XtTnZnfK&hQylvF^vZt z1$QO!ki0d7ry3vplGU{_P1Ty&3f^A@n32x4a_5}>6KCaHE%#a$)&H&bJ#C_4x7e^d z?%XqHK=BY#aj#7xp&OroL-=&q@Y={?CRlFdn0c!^1~xNo^dY&!Cw zF-|<1M~=$**@UApTCmpG95dD@J+-7C2`ymbN((X;#>y!4@q`e(J8wQQT_L)gzwI%Hh%-6?) zJidjCRG{73oUm*c zE!$%qeXExKq@_uA9$mE@OIjKemTjVC+fvJ_FfOA38=t&ss zMPq%eq31*6?)9L8b2dGmP;eD>3HLV9y$yvD-TjXX3OuF1b@xa6KXz3``@jH>?*F`q zGg;PLRj7-}l63duT~hHGhltPtZ)l9^YabcPV&|QbV%VN++MQ@RAT}L{Hysp> z4QM!L)lWjPnvQr?=hCcL)%&62V6-<`(Sn_#cWL~OT4RLXOpSluw_Nxyk3X~=Kt9es z1<+msHl7dc%Xda&jR)g(eXIKZq~0ELw~PArH%_ddQF4_#A74~*Hb>0S6t_0dX&-Ai zhg+m`LfmySZa=wt`dm`)LQguQ01IB}(9ThRa#o162amf4v~pcW%tzh!eZ}kFrCgmP2vhK+;{C^mHWa_9yK%$#O4bCLUXLPVG9UGm=rP{o?Zr1(>P7*mzEfar?za z9eVl~8&?&a?U;hGN&lysii>UP2R&sMJN19jRCcjPBN|FCaT@+V(mGjy(wsl7U?vJnULLAN zFy-Z!TE@+CF4?zSvj9pi=L}kV@wg=$N480MN_5+Z^(nbn^YKx~K_S}uY|fFz?*|%J#} z_@q-F-cnML6%msvY`XL@!k4i+ih4@K8UKaFA1ms(i79Oql?=lvmtd z5~?DlS&nlKXWJkc_$k4V7OVF#Tx3#1=S>g(WG3Guudh*Op<1fX?^W~C zudKNNHAme-_l1(|3~(!>nMXul{%1l-#B)n6VK?6eJm4?ug_@k6rj0VVZD;zL0?MC!FrTgcXH3x*Z9e(&R-4P0LNk0L21Bh%JFKT?}s1&#@I`Dz0wEn8t@B;>23Vw|YKTb`$s zE`Qn7QsNvu+4GetC-*uzuKY%;IP-3=uE07>DnadU+Nypw>y`;$xo+L=5~P(mvl^k!G@{x1EI6gQ$@vYJ$@$$r3!GuK zi>hyTJY|=n6)VR8`vFdiO<3aQZ26Lh$sVzV0LM zK8dTtxT2eQA1WlI9qk5V?I`bZ;5$keEx~i^a|;&_Qs1GTF5gJ=5O&sG@BK!x z<>ZR~Bx9m5?xLj9s!VCl;Yymb!O@qQ@2CF>kH7E~dn7LHZ>R#U918rug$MtCk@4@y zc$N%eNAQ0`1_?u?G{n1LL`;x+ap5Q*VE5aGQpM8Ex46lXZ@>X8rJ9+Z;r|~>_aPaC zxRm8&?=!=vE}R@bdg}Cr^HAA4e(2OW{(qw&JHbtpelwD;L?-3CDfvN)!+0;F{B$Uk zj+wn|H%r+!>Q&*s~fooW2f`Js%q>vRja^!d!5t!)*5%F3gJ~K=c z_h)jFF9I4FV_-@1_B3d=O5tixXq)HiW8@C$|kKzWVYpJpu;PVYnfgeP~dx;!f zm}VMiGhAVCOwtu#_vPrOf;WcKrRKL&+ZBS9f-L!85;CayB$j2GUt$=&&hxnf$CkY9 z<|IfZn+Al`ER3Ao1gJ!*{sS_;@{y?2{O3rNdxu;u7felqTkt4HB^(8iN%e%Q_^j5Yl7;^|T5i;$%ElE6cTWI>hPN z0b;AAe7-bc2fcAe+|o5y{K#aB-u%$yo$F1Sy$N%>Xl`FJ;r_*h8S0STAm`{YH+e<9 zccJs$y>IXR>Hej@SatWReiw+P345n#?_4^wYTp@ECykW}qYu&q3*mRa^Y(WV^*v&J z&vIX^?!ZH1FK9-NZV;Qxs!)O{`tb|Ux&{Tx zE83c3tph8zfse{7apU8=uD4x_XMb8XUzoI4y;XXzbm8SyqWt>bdv@6tZ#f{^@$X+t zht00qw@YEpIP@;;{UFpP?G+IEFK>`R$87^>K5JFN;u9@UHC+X<)#797oH2_pX+Z!{ z#$xrmmye0{haY&w`s1slRgex?m|eAO|GbJbJJ&s2dG*|3kRr<~B$}60qlGi~hN9Y! zoW6zexN{rnqt`Cn5Y4UBO50;QPOg|wCM{d4ZgeJ$UTCWRzKcgOaf678oRIa@zhDI9GSQ29xnf3g0j6VbtBdEH{`N_qG43lCme*?XGM z49yo0u4yiBN|bkrF#txV&6Y}L}9#pz3{7Ley%cTe3pwa~L_ zX2|Y&n={ zIf<3;-l?^kLjykaqH%l7Si3N>UdCA}7Zl&$2fjhV z(k5Ekz$Ho8NjL0~yJ|uE`!PklQxmJ}iC6Cyjk}|zNu%v2HS-g3+{A5ciWYw2sQgK2 zVPCvz`-hH>Xz!Z6Zs8>iM+`N4GiIBtBi7b;zgKJ<5bXod&GFl!rxv!YSbW$>IXgE> zxtd1wQflwGr4xcpmR)!!%bOODiRC+&x2=@#S#!58@}ir#TJHVpD&>A>%-*)7U9l4* z&(Xf5MQ2Ue4~q7KXx(hg|DaMjoKWGaX~d9kY>w(auJ$9R+U{kiSlb^xnJjnB-&`qg zNc!sTY1e!$i{qkiSHjmX`ugW_?QQMC>`M7IP>~b%M$z85*z?fdMT4LblMhWW_g@lQ zj)?XnG|TiHdC(*FT$tA^fML`8(7toMoU*lZuDWQ^XAlN6nAcxcgLjwEREV02`Ilq$ zyJD3+tD4=^ZLCRsbT{}N3jmDTX;#oB;OfYhRn1%ew?nA1@vc&q#F|| zfUSU?2CKpu3OAFswt~W{64am%=h6~vk#d}LZR{*<63T>fW-Ghh4mc7u>}hba`WM)% zM4krcJm5&D(~2BfbCUk1@|3=hHl>HP&eLGL3>aEgr;0F6p3;-4)7OJB4;Z>M3_AL- z=NrQK9|TKYifcy4h95#Z)6r(jlb5Dxe@a}>mc(#I98cmdtFeIFmmG}RX(!pzfI^lh z{yMmY_JHpxwaRGc{Qp?1CtBjaSgoR2%`4pDWubfT| zM9Ou@#Sh@+E77KTDN;JM`z!N3kj+oFJ%KxWR+lG0t(>PdHjn%8AsJTw*jNwTpv`mR zB+|d-36#-fB~-qTLpeR*ndYBEs^n3^G~Pzk*niwUNHY#J2R&&mTTp(%P4%GfACaKt zn$&*K4l*;nT92SbI&V%oui*U}>AX3b8=ka=hBD&CB*=oRei!r7Q*l1FZ&+42ih zq3bgHL< za8!#faG9RD#^jy(e@01&ddPPXN$Xj%wUO~I89T{OLBt{*``D9o(>~u#-n0Wu757d| zoW)1BMsbBbtcOQ%OADVWk-W~0Vm}S11|jFCVWiBbro%_+iXXbRJ{XX0%_*gvXq-uu ztQUz|7V=k1WWm3t!2d|bIm+}r85hX-75N>cR7~zy`kIrpotJ1~d_SV4j1b?Hz8aPm zTV-;jjJ(b0-6MQrt76 z;p+V$VL<>+V}IoFFSaHc4~vb56P_cG@S5wLi>$dC60RMhYscJ)HAs`X=~@s4B(IXD ziq8s^9$ft8#^DZ;oI6G5&bV_It}(;$F)hs)w>Qll1#yi8D65jS^)Wr<8hr`-cHBym zu=j~}Ix^b_aJ@@y%e#Yb4<>4Nh(=Gecz#FH>|Q8dG5Z(4wd`MM>3h`Fk+ehH5y>l< z31=rr03Z@Hlhk1J$ox2H1lESdZpa(PEcHo?e{o2(?1cDb(ptIDv|?>}3XF;CQVrVqSYU3+`Ve0%i~Q)hDCQrZ08ZtedK{RZa=Z1E6i58G*6XR z7{=wy_pMs@CmVKrb=FkFO#A-+S%2cJjn(g4zPWrY<~tU59*H{NSZINL>MJBY?v`Zee)pkJHI6XqJx z3#k@0F(*c5DP4Ar(_!!;Y7w)|9&MS%f zU1I&N73(ev3naP*MEutbJaCCM=OMg|>jBk?hHer67W~)dq}waj?_II(m0?Ctr0b1O z7!8W}ZyJo9IUBD(w>ed}Sl_*3?UqyBIq`|JCF$I@RQt!Rkgna<8?zt8aC6z9D~OAX z@Bs#p4(&uuh0HUf8-m7dzkBS?vH9yDdM7QOjgq2@vS=|;{czPOV*-qQXl(zyS3zX| z<0{U3RPotqHP=0$h#Lm}=sQc^s3LkQVQCR9E!6Da5e)+$b@U0o$Sm8RZFIEf#UwqDyC%aZwY_py9saJHivyFxPG_4qWVQl(1n+DT9 zqd~2~@-*$SeDWN)UKkU*^f0+e<&!$-`1}n%fJ4$kbbmJQ$x2${BQ21U#GaG#-|__5 z$LMhD1(QShpg=y+pW`;A2ZV2$9<0+R*vB2neS@ReTt>(nO{{dUs)(hcy@}Ooe9sbUsa~n zcb#qp{}ytVcDrozSRk=qU!i>h+ev^TDeVqvXCSG(%_7iWkh7yBq9AV|gLYhgjkL?k z^dV_)McXQA1K0~&N-NcdZ9}MC0#L=a8O#AFbSVcl^$H!qzO> znxQM=t`e#8@v3Ly?q`U(XRV9*_leeh_;`xW`p8^MqtWR~R@WqLPD;68h150VJ%CcTWHVsnD1)@7|O^U_tjbeCx zZZ9fufhb>DmBhcNI_a)T_Ur*mGFk7(jna_hhn~!5j+3f_p81)y+pE?a@BPUql=wO1@G;S)pd#9o$CfE#0Y(w`O$lh zb(7>Sa>;JzY|i1-&fmw~5r)J&6tYy?sg11~7X6Bq#I|@qIoE7T@cq1EpZT8Z=lJz2TX$V#^ z*|d~iM>cd!!{ZXzq#jS5-QSF_K+9i@#;xzPilqv1j=~XG8dL2P0VS0=_<&xb23fc zPmMd1iEE@FD3eI(M{;m6RiK6s#j!ag<{||rx B?M(mx literal 0 HcmV?d00001 diff --git a/apex_plus/simulator/__pycache__/trace.cpython-312.pyc b/apex_plus/simulator/__pycache__/trace.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3eb32aae74016a7895a07390e07ec147f53f3316 GIT binary patch literal 3661 zcmahMTWnLwb?(#GuOyD`JV;C&vR-Ny62Uf0i=ep4(t_9zr0xg(W7YjwsXvY!M0yF*R{K-_455n3PkZLt*Cbug zj&*0woO9;PnKS1x{=3)fLeL)IeKSh{eMcvD6B@u~27qOxB9%#^6cb|@3bRQz#l<)a z=8}Aj2OO6aVgihOQcQ_45op39ia9ifMPo=67m@0?!`7wVX$X%=7EJ=0)279o^IX*R zgmOd~)2S+k5>G0Gz=S`SAjUWV5@0nWk<#K!LlPY%_&E4BzXa?unnnf2Xwvg=fG=2k zlVMa~4a#e^S>A?Tj(PS@V{L&m95z1BHL?6Fv4Y{Wd1o!-+OztY7gVkrKx(F4U?5wc zf|zG!y*qj7OyyvONLRqv#oTuoz1?a zg7ek7<(+v)LpR6wb-VsPkDT?9k~$Dk!;v#bBJUrGObkY*p3wVwLYbnH$)_`#ZgO$*J5$7( z0;^44*RC3v`mrey181nen4*#aM%DBhX+ufMM2n|&l|)^pb1IS4n2z++6wwS~ z@)T#fK&af1DbCbo12-myjMHCDhC=8c)OKBQ!{t8ylpCEFm9NM)0Xk;X#?R^r-VX5MsW6ihT3 z6vVq>1L}I@U(hX}^_I4Uu??rUm{^HFaPBFq8^NyS*ix*_SA&tFYr_{T%~pN=MR6n0 zwX&-kc%$fi?n1%P!$4#;5Lx#GOUku#EB=)|6;G(rH@wNQL08dP6Ops+M$5I96=vn9 zbqNz{8&Xt3CVI+x(@IOJa$vuqAgT^fpN7Z7H4u z37rGwORF7&4}-^6gU71DUFmuIf#=6h-6*ho zs|UFTYCl3A|HAlI^d)?AvBCQiyzKu4izBoj?u-)wefcT;NJpzNZ z`9B^9bAEgty2bN&pUt)7uy)$n{0)#_9#&=O5v`QNs@xQ>@{4>-&_q?xAb8<(z{jd+ zN>Zgo-VXVg1RPGyX;FC9wTh*2m+G3=c{mL}7Xfo9)mo zL!jr83{btzLwbi?jt{^z%9=dUl2g`%&}HGUWfE%Ggq|>DIoD!sIcU%6of0>63#cZc z?*1bG2d`xs)1zlo5U8?eId}KqgTQd*_=TcW@lX7twY~I{PYM;O_dm}#6qsZPHPIgf`*}=# zZ^u=(0K&S;_uzJ{+R8#zXjDj`H5(YFfyJL}GZ~|9uf3dAxzd(d?S}@Mr3e1MmgjA0 z1tHJQ+D^WW%b-4BwY>1^3TjJu4ZXR8(X4^0@ETsxmX!moRYwkNVPkA}u;&cAe&tma zP{*&an`E!VzchT!Gc~OVF9Z>){V7`Qf5;LqPXZNMpuum;-a&D`mg2G!hMS(!Uipgi zjqyl#8`M>-rh05%1|lCqq4|RPe$Bt`YgByicupLgC22j0=_AYZj-+)1>h@&%vZfQW zbxhMW439mvj=;2znynLtf{jVYz~`+>J#FoO;>L*_KS*mKbqOopEW*O#;K^L)MV(|b znKTAbVOrM1Q)!%145<7Pya&i!!kEFdnwUJ*#uSoiMJ1SCgcYkMH?0|XFP+j#YYG_* z={zdoVTz(Qu!0r`DUnYmbj{@H(-j9Of`fEKXwIp}vz^>LR`{y(P#_?r+qZ>u14qjd zK@d-n5Q-oqQcePyOR`T2*ReOMVhUX7es4WGO} zu^K*8?LE8c;JbJIn-hZ_PX!d}-{MgFn-yQA7C-oCQ9=H0u|(N}g>JN6aF z9(C?6aqDee%dJbTe|CJq)eHF@_m>m*{P#ZiYsX)H_4UC0_&5DuO?^YYN><;UsD>vW zv|W74p{~B?BJ#FZ+TU99?EhyoFKrFfx2zZX^2y=RZtm-DVRVlp>c($?ad3ooZ_xPn06FJ3eYX08qW*;-?n#@ z=9UiB1c0F!yIR030C|w;tFaCt^le9XO@I+-fzFzPLJ|sg)tnS^A(!u&8=y_j=C8F| zP$#VPchm$JsZ`%H2S66rQx>4QW>g>WP#=M3 zXWbCinRcG@g> None: self.model = model self.cluster = cluster self.trace = trace self.dtype = dtype + self.prefill_gpu = prefill_gpu + self.decode_gpu = decode_gpu self.gpu = cluster.get_device().device_type self.gpu_memory = cluster.get_device_memory_capacity() self.peak_flops = cluster.get_device().peak_flops[self.highest_prec()] @@ -63,12 +62,7 @@ def __init__( self.cluster_size_per_node = self.num_total_devices // self.num_total_nodes def highest_prec(self) -> DTYPE: - data_type = [] - data_type.append(self.dtype["w"]) - data_type.append(self.dtype["kv"]) - data_type.append(self.dtype["act"]) - # Dealing with mixed precesion - # Assuming we dequantize the value for computation + data_type = [self.dtype["w"], self.dtype["kv"], self.dtype["act"]] highest_precision = DTYPE.FLOAT8 if DTYPE.FLOAT16 in data_type: highest_precision = DTYPE.FLOAT16 @@ -76,14 +70,8 @@ def highest_prec(self) -> DTYPE: highest_precision = DTYPE.FLOAT32 return highest_precision - def dispatch( - self, - requests: List[Request], - factor: int, - ) -> List[List[Request]]: + def dispatch(self, requests: List[Request], factor: int) -> List[List[Request]]: sublists = [[] for _ in range(factor)] - # Distribute elements in a round-robin fashion - # Can be replaced with more sophisticated strategy for index, element in enumerate(requests): sublists[index % factor].append(element) return sublists @@ -110,15 +98,11 @@ def get_metrics( seq_lens: List[int] = [], arch: str = '', slo_targets: List[int] = [], - ): - - + ): def calculate_tbt_percentiles(latency_dict): token_latencies_per_request = [latency for latency in latency_dict.values()] avg_tbt_vals = [] percentile_vals = [] - - # Calculate avg TBT per request for latency_list in token_latencies_per_request: list_length = len(latency_list) avg_tbt = 0 @@ -127,11 +111,8 @@ def calculate_tbt_percentiles(latency_dict): ttlt = sum(latency_list) avg_tbt = (ttlt - ttft) / list_length avg_tbt_vals.append(avg_tbt) - - # Calculate all necessary percentiles for tbt for percentile in token_percentiles: percentile_vals.append(np.percentile(avg_tbt_vals, percentile)) - return percentile_vals def calculate_slo_metrics(latency_dict, slo_targets): @@ -140,56 +121,27 @@ def calculate_slo_metrics(latency_dict, slo_targets): ttft_target = slo_targets[0] tpot_target = slo_targets[1] slo_metrics = [] - - # Calculate Percentage of requests that are <= TTFT_SLO - # TTFT is just the first token in the latency ttft_slo_counter = 0 for latency_list in token_latencies_per_request: - if(latency_list[0]/US_TO_MS <= ttft_target): + if latency_list[0] / US_TO_MS <= ttft_target: ttft_slo_counter += 1 - slo_metrics.append( (ttft_slo_counter/num_requests) * 100 ) - - # Calculate Percentage of requests that have an avg TPOT <= TPOT_SLO + slo_metrics.append((ttft_slo_counter / num_requests) * 100) tpot_slo_counter = 0 - tok_latencies_per_req_after_first_tok = [sublist[1:] for sublist in token_latencies_per_request] - for latency_list in token_gen_times: - # Calculate Avg TPOT per request - avg_tpot = np.mean(latency_list)/US_TO_MS - if(avg_tpot <= tpot_target): - tpot_slo_counter += 1 - slo_metrics.append( (tpot_slo_counter/num_requests) * 100 ) - + for latency_list in token_latencies_per_request: + avg_tpot = np.mean(latency_list[1:]) / US_TO_MS if len(latency_list) > 1 else float('inf') + if avg_tpot <= tpot_target: + tpot_slo_counter += 1 + slo_metrics.append((tpot_slo_counter / num_requests) * 100) return slo_metrics - - # Store performance metrics - Time to first token, TPOT ,P50, P95, & other latencies + performance_metrics: List[Tuple[str, float]] = [] performance_metrics_units: List[str] = [] - performance_metrics.append( - ("Throughput: Avg. Tokens generated per second", float("NaN")) - ) - performance_metrics.append( - ("Throughput: Avg. Tokens processed per second", float("NaN")) - ) + performance_metrics.append(("Throughput: Avg. Tokens generated per second", float("NaN"))) + performance_metrics.append(("Throughput: Avg. Tokens processed per second", float("NaN"))) performance_metrics.append(("Throughput: Requests per second", float("NaN"))) - performance_metrics.append( - ("Latency: Avg. Time to first token (TTFT in msec)", float("NaN")) - ) - performance_metrics.append( - ("Latency: Avg. Time per output token (TPOT in msec)", float("NaN")) - ) - performance_metrics_units += [ - "tokens/sec", - "tokens/sec", - "requests/sec", - "msec", - "msec", - ] - - num_layers = 0 - num_heads = 0 - head_dim = 0 - hidden_size = 0 - theoretical_peak_flops = self.peak_flops + performance_metrics.append(("Latency: Avg. Time to first token (TTFT in msec)", float("NaN"))) + performance_metrics.append(("Latency: Avg. Time per output token (TPOT in msec)", float("NaN"))) + performance_metrics_units += ["tokens/sec", "tokens/sec", "requests/sec", "msec", "msec"] if hasattr(model_config, "num_layers"): num_layers = model_config.num_layers @@ -202,106 +154,53 @@ def calculate_slo_metrics(latency_dict, slo_targets): else: raise ValueError("Unable to get model layers, heads, or hidden size") head_dim = hidden_size // num_heads - num_parameters = num_layers * hidden_size * hidden_size * 12 tpot = 0.0 avg_ttft = 0.0 mbu = 0.0 - # If encoder, there the output_len is 0 - if arch == "encoder": - avg_output_len = 0.0 - # Decoders that generate tokens - else: - # TPOT after the first token(this is also known as inter-token latency) + if arch != "encoder": token_gen_times = [value[1:] for value in request_token_gen_times.values()] - flat_token_gen_times = [ - item for sublist in token_gen_times for item in sublist - ] - tpot = np.mean(flat_token_gen_times) / MS_TO_SEC - avg_ttft = ( - np.mean([value[0] for value in request_token_gen_times.values()]) - / US_TO_MS - ) - - # Calculate token percentiles - token_percentiles = token_percentiles + [50, 95] - token_percentiles.sort() - # Avg percentile vals are returned in order of sorted percentiles + flat_token_gen_times = [item for sublist in token_gen_times for item in sublist] + tpot = np.mean(flat_token_gen_times) / MS_TO_SEC if flat_token_gen_times else 0.0 + avg_ttft = np.mean([value[0] for value in request_token_gen_times.values()]) / US_TO_MS + token_percentiles = sorted(token_percentiles + [50, 95]) avg_percentile_vals = calculate_tbt_percentiles(request_token_gen_times) - # Add to performance_metrics for index, percentile in enumerate(token_percentiles): - performance_metrics.append( - ( - f"Avg. TBT Percentile: P{percentile}", - avg_percentile_vals[index] / US_TO_MS, - ) - ) + performance_metrics.append((f"Avg. TBT Percentile: P{percentile}", avg_percentile_vals[index] / US_TO_MS)) performance_metrics_units.append("msec") - # MBU - kv_cache_size = ( - 2 * num_layers * num_heads * head_dim * self.dtype["kv"].size - ) + kv_cache_size = 2 * num_layers * num_heads * head_dim * self.dtype["kv"].size tpot_sec = tpot / MS_TO_SEC - theoretical_peak_mem_bandwidth = self.peak_mem_bandwidth - observed_mem_bandwidth = (num_parameters + kv_cache_size) / tpot_sec - mbu = (observed_mem_bandwidth / theoretical_peak_mem_bandwidth) * 100 + observed_mem_bandwidth = (num_parameters + kv_cache_size) / tpot_sec if tpot_sec else 0 + mbu = (observed_mem_bandwidth / self.peak_mem_bandwidth) * 100 - # Tokens gen per second - token_throughput = avg_output_len * len(requests) / (total_time / US_TO_SEC) + token_throughput = avg_output_len * len(requests) / (total_time / US_TO_SEC) if total_time else 0 performance_metrics[0] = (performance_metrics[0][0], token_throughput) - # Tokens processed per second - performance_metrics[1] = ( - performance_metrics[1][0], - (avg_input_len + avg_output_len) * len(requests) / (total_time / US_TO_SEC), - ) - # Requests per second - performance_metrics[2] = ( - performance_metrics[2][0], - len(requests) / (total_time / US_TO_SEC), - ) - # Time to first token + performance_metrics[1] = (performance_metrics[1][0], (avg_input_len + avg_output_len) * len(requests) / (total_time / US_TO_SEC)) + performance_metrics[2] = (performance_metrics[2][0], len(requests) / (total_time / US_TO_SEC)) performance_metrics[3] = (performance_metrics[3][0], avg_ttft) - # TPOT after the first token(this is also known as inter-token latency) performance_metrics[4] = (performance_metrics[4][0], tpot) - # Calculate request percentiles - request_latencies = [ - sum(token_latencies) for token_latencies in request_token_gen_times.values() - ] - req_percentiles = req_percentiles + [50, 95] - req_percentiles.sort() + request_latencies = [sum(token_latencies) for token_latencies in request_token_gen_times.values()] + req_percentiles = sorted(req_percentiles + [50, 95]) for percentile in req_percentiles: percentile_val = np.percentile(request_latencies, percentile) / US_TO_SEC - performance_metrics.append( - ( - f"Request Completion Latency: {percentile}th percentile", - percentile_val, - ) - ) + performance_metrics.append((f"Request Completion Latency: {percentile}th percentile", percentile_val)) performance_metrics_units.append("sec") - # Calculate Avg MFU observed_throughput = token_throughput mfus: List[float] = [] - for index, seq_len in enumerate(seq_lens): - theoretical_throughput = theoretical_peak_flops / ( - 6 * num_parameters + 12 * num_layers * num_heads * head_dim * seq_len - ) + for seq_len in seq_lens: + theoretical_throughput = self.peak_flops / (6 * num_parameters + 12 * num_layers * num_heads * head_dim * seq_len) mfu = (observed_throughput / theoretical_throughput) * 100 mfus.append(mfu) - avg_mfu = np.mean(mfus) performance_metrics.append((f"Avg. MFU Per iteration", avg_mfu)) performance_metrics_units.append("%") - - # Append mbu - performance_metrics.append((f"MBU ", mbu)) + performance_metrics.append((f"MBU", mbu)) performance_metrics_units.append("%") - # Get SLO Metrics slo_metrics = calculate_slo_metrics(request_token_gen_times, slo_targets) - return performance_metrics, performance_metrics_units, slo_metrics def simulate( @@ -314,147 +213,148 @@ def simulate( token_percentiles: List[int] = [], slo_targets: List[int] = [], max_batch_size: int = 0, + distserve: bool = False, ) -> Optional[SimulatorOutput]: + if self.num_total_devices < 2: + raise ValueError("Need at least 2 GPUs for prefill and decode disaggregation") + parallel_schedule = execution_plan.parallel_schedule stage_schedule = parallel_schedule.stage_schedule num_stages = parallel_schedule.num_stages num_model_replicas = parallel_schedule.num_model_replicas - num_attn_cell_replicas = 0 - for cell_schedule in stage_schedule.cell_schedules: - if cell_schedule.cell.is_attn(): - num_attn_cell_replicas = cell_schedule.num_replicas - break - assert num_attn_cell_replicas > 0 - + num_attn_cell_replicas = next( + cell_schedule.num_replicas for cell_schedule in stage_schedule.cell_schedules if cell_schedule.cell.is_attn() + ) + # print(f"cluster: {self.cluster}") param_sizes = stage_schedule.get_param_size_per_device(self.dtype["w"]) - num_devices = len(param_sizes) - - available_memories = [ - self.gpu_memory - WORKSPACE - param_size for param_size in param_sizes - ] - if any(avail_mem < 0 for avail_mem in available_memories): - # Invalid. + if len(param_sizes) == 1: + return None, None + # print(f"param_sizes: {param_sizes}") + # print(f"self.prefill_gpu: {self.prefill_gpu}") + # print(f"self.decode_gpu: {self.decode_gpu}") + available_memory_prefill = self.gpu_memory - WORKSPACE - param_sizes[self.prefill_gpu] + available_memory_decode = self.gpu_memory - WORKSPACE - param_sizes[self.decode_gpu] + if any(avail_mem < 0 for avail_mem in [available_memory_prefill, available_memory_decode]): return None - min_available_memory = min(available_memories) + WORKSPACE + min_available_memory_prefill = available_memory_prefill + WORKSPACE + min_available_memory_decode = available_memory_decode + WORKSPACE param_size = max(param_sizes) - # Calculate the maximum number of tokens that can be stored in KV cache. - # This limits the maximum number of sequences that can be batched. - kv_token_sizes = ( - [1] * num_devices - if arch == "encoder" - else stage_schedule.get_kv_token_size_per_device(self.dtype["kv"]) - ) - - max_num_tokens = min( - int(available_memories[i] // kv_token_sizes[i]) for i in range(num_devices) - ) - # Evenly partition the KV cache for each stage. + # KV cache on decode GPU only + kv_token_sizes = [0] * self.num_total_devices + kv_token_sizes[self.decode_gpu] = stage_schedule.get_kv_token_size_per_device(self.dtype["kv"])[0] + max_num_tokens = int(available_memory_decode // kv_token_sizes[self.decode_gpu]) max_num_tokens_per_stage = max_num_tokens // num_stages - # Statistics list_of_exe_time = [] num_reqs_per_iteration = [] num_tokens_per_iteration = [] request_token_gen_times = {} - - requests = [] - model_replica_time = [] - total_energy = 0.0 - # Copy to avoid altering the original traces copied_requests = copy.deepcopy(self.trace.requests) - ### Finished housekeeping; starts the actual simulation ### - - # Split a list of requests to n sublists, - # where n = num_model_replicas + # Split requests across model replicas model_requests = self.dispatch(copied_requests, num_model_replicas) + total_time = 0.0 + for model_replica in range(num_model_replicas): model_replica_energy = 0.0 - stage_iter_times = [] - stage_requests = self.dispatch(model_requests[model_replica], num_stages) - for stage in range(num_stages): + model_replica_time = [] + + # Prefill phase on prefill GPU + prefill_requests = copy.deepcopy(model_requests[model_replica]) + ( + prefill_updated_requests, + prefill_iter_times, + prefill_reqs_per_iter, + prefill_tokens_per_iter, + prefill_exe_time, + prefill_token_gen_times, + prefill_energy, + ) = self.sub_simulate( + execution_plan, + arch, + frequency, + prefill_requests, + num_attn_cell_replicas, + max_num_tokens_per_stage, + max_batch_size, + phase="prefill", + ) + if prefill_updated_requests is None: + return None - cell_replica_iter_times = [] - cell_requests = self.dispatch( - stage_requests[stage], num_attn_cell_replicas + # Calculate KV cache transfer time + kv_cache_size = sum( + 2 * model_config.num_layers * model_config.num_heads * (model_config.hidden_size // model_config.num_heads) * self.dtype["kv"].size * req.input_len + for req in prefill_updated_requests + ) + if self.num_total_nodes == 1: + kv_transfer_time = get_p2p_comm_time( + gpu=self.gpu, + num_nodes=1, + num_gpus_per_node=2, + dtype=self.dtype["kv"], + num_elements=kv_cache_size, ) - for cell_replica in range(num_attn_cell_replicas): - target_requests = cell_requests[cell_replica] - - ( - updated_requests, - cell_replica_iter_time, - reqs_per_iter, - tokens_per_iter, - exe_time, - request_token_gen_time, - stage_energy, - ) = self.sub_simulate( - execution_plan, - arch, - frequency, - target_requests, - num_attn_cell_replicas, - max_num_tokens_per_stage, - max_batch_size, - ) - - if updated_requests is None: - return None, None - requests.extend( - updated_requests - ) # timestamp updated with completion time - id_num = model_replica * stage * cell_replica - renamed_gen_time = { - f"{key}_{id_num}": value - for key, value in request_token_gen_time.items() - } - request_token_gen_times.update(renamed_gen_time) - cell_replica_iter_times.append(cell_replica_iter_time) - num_reqs_per_iteration.append(reqs_per_iter) - num_tokens_per_iteration.append(tokens_per_iter) - list_of_exe_time.append(exe_time) - model_replica_energy += ( - stage_energy // num_attn_cell_replicas * num_stages - ) - # Note: dividied by cell replicas as the energy scaling of cell is already handled in Line 693 - # Multiplied with num_stages because we only simulate one stage, but num_stages run concurrently - - # iteration time = slowest among the cell replicas - max_cell_iter_time = self.merge_max_elements(cell_replica_iter_times) - stage_iter_times.append(max_cell_iter_time) - - interleaved_list = [ - val - for pair in itertools.zip_longest(*stage_iter_times) - for val in pair - if val is not None - ] - - model_replica_iter_times = [] - if len(interleaved_list) <= num_stages: - model_replica_iter_times = interleaved_list.copy() else: - # Creates a sliding window to find the bottlenecked stage in the pipeline - for i in range(len(interleaved_list) - num_stages): - window = interleaved_list[i : i + num_stages] - model_replica_iter_times.append(max(window)) - model_replica_time.append(sum(model_replica_iter_times)) - total_energy += model_replica_energy + kv_transfer_time = get_p2p_comm_time( + gpu=self.gpu, + num_nodes=2, + num_gpus_per_node=1, + dtype=self.dtype["kv"], + num_elements=kv_cache_size, + ) - # Final execution time = the slowest among the replicas - total_time = max(model_replica_time) - ### Finished simulation; calculate the statistics of the results ### + # Decode phase on decode GPU + decode_requests = prefill_updated_requests + ( + decode_updated_requests, + decode_iter_times, + decode_reqs_per_iter, + decode_tokens_per_iter, + decode_exe_time, + decode_token_gen_times, + decode_energy, + ) = self.sub_simulate( + execution_plan, + arch, + frequency, + decode_requests, + num_attn_cell_replicas, + max_num_tokens_per_stage, + max_batch_size, + phase="decode", + ) + if decode_updated_requests is None: + return None + + # Combine statistics + renamed_gen_times = { + f"{key}_{model_replica}": prefill_token_gen_times.get(key, []) + decode_token_gen_times.get(key, []) + for key in set(list(prefill_token_gen_times.keys()) + list(decode_token_gen_times.keys())) + } + request_token_gen_times.update(renamed_gen_times) + # list_of_exe_time.extend(prefill_exe_time + [(name, t + kv_transfer_time) if name != "Wait" else (name, t) for name, t in decode_exe_time]) + # integrate the prefill and decode exe time + prefill_exe_time_dict = {name: t for name, t in prefill_exe_time} + decode_exe_time_dict = {name: t for name, t in decode_exe_time} + total_exe_time = [] + for name, t in decode_exe_time_dict.items(): + if name in prefill_exe_time_dict: + total_exe_time.append((name, prefill_exe_time_dict[name] + decode_exe_time_dict[name])) + total_exe_time.append(("KV Transfer", kv_transfer_time)) + list_of_exe_time.append(total_exe_time) + num_reqs_per_iteration.extend(prefill_reqs_per_iter + decode_reqs_per_iter) + num_tokens_per_iteration.extend(prefill_tokens_per_iter + decode_tokens_per_iter) + model_replica_energy += prefill_energy + decode_energy + kv_transfer_time * KV_CACHE_TRANSFER_POWER + model_replica_time.extend(prefill_iter_times + decode_iter_times + [kv_transfer_time]) + total_energy += model_replica_energy + total_time = max(total_time, sum(model_replica_time)) - avg_input_len = sum(request.input_len for request in copied_requests) / len( - copied_requests - ) - avg_output_len = sum(request.output_len for request in copied_requests) / len( - copied_requests - ) + avg_input_len = sum(request.input_len for request in copied_requests) / len(copied_requests) + avg_output_len = sum(request.output_len for request in copied_requests) / len(copied_requests) performance_metrics, performance_metrics_units, slo_metrics = self.get_metrics( model_config, avg_input_len, @@ -466,28 +366,24 @@ def simulate( token_percentiles, num_tokens_per_iteration, arch, - slo_targets) + slo_targets, + ) exe_stat_dict = {} idle_time = [] + # print(f"list_of_exe_time: {list_of_exe_time}") for exe_t in list_of_exe_time: - # Calculating idle time + # print(f"exe_t: {exe_t}") idle_time.append(total_time - sum(t * num_stages for _, t in exe_t)) summed_data = defaultdict(float) - # A function may be called multiple times in one iter, creating multiple entries - # merging these entries for each iter for key, value in exe_t: summed_data[key] += value result = [(key, value) for key, value in summed_data.items()] - # Modifying the data structure so it's easier to compute mean and std later for name, time in result: exe_stat_dict.setdefault(name, []).append(time * num_stages) - - # Compute the statistics exe_stat = [] for name in exe_stat_dict.keys(): exe_lst = exe_stat_dict.get(name) - # Wait time will be counted as Idle time if name == "Wait": for i, val in enumerate(exe_lst): idle_time[i] += val @@ -497,11 +393,10 @@ def simulate( exe_stat.append((name, exe_mean, exe_std)) exe_stat.append(("Idle", np.mean(idle_time), np.std(idle_time))) - requests = sorted(requests, key=lambda x: x.time_stamp) - + requests = sorted(copied_requests, key=lambda x: x.time_stamp) return requests, SimulatorOutput( param_size_per_device=param_size / GB, - available_memory_per_device=min_available_memory / GB, + available_memory_per_device=min_available_memory_decode / GB, num_requests_per_iteration=np.mean(num_reqs_per_iteration), num_tokens_per_iteration=np.mean(num_tokens_per_iteration), time_statistics=exe_stat, @@ -521,208 +416,223 @@ def sub_simulate( num_attn_cell_replicas: int, max_num_tokens_per_stage: int, max_batch_size: int, + phase: str = "combined", # "prefill" or "decode" ): parallel_schedule = execution_plan.parallel_schedule stage_schedule = parallel_schedule.stage_schedule num_stages = parallel_schedule.num_stages - - min_num_replicas = min( - cell_schedule.num_replicas - for cell_schedule in stage_schedule.cell_schedules - ) + min_num_replicas = min(cell_schedule.num_replicas for cell_schedule in stage_schedule.cell_schedules) num_cached_tokens = 0 req_counter = 0 - num_generated_tokens: Dict[int, int] = {} # request_id -> num_tokens - running: List[int] = [] # request_ids - stopped: List[int] = [] # request_ids - + num_generated_tokens: Dict[int, int] = {} + running: List[int] = [] + stopped: List[int] = [] get_seq_len = lambda request_id: ( - requests[request_id].input_len + num_generated_tokens[request_id] + requests[request_id].input_len + num_generated_tokens.get(request_id, 0) ) - # Statistics. execution_time: List[Tuple[str, float]] = [] - num_cells = len(stage_schedule.cell_schedules) - for i in range(num_cells): - cell = stage_schedule.cell_schedules[i].cell + for i, cell_schedule in enumerate(stage_schedule.cell_schedules): + cell = cell_schedule.cell execution_time.append((cell.get_name(), 0.0)) for comm in stage_schedule.reshard_comms[i]: execution_time.append((comm.comm_type.name, 0.0)) - if parallel_schedule.num_stages > 1: + if num_stages > 1: execution_time.append(("SendRecv", 0.0)) num_reqs_per_iteration: List[int] = [] num_tokens_per_iteration: List[int] = [] - # Simulate the execution. time_per_iteration: List[float] = [] - # Time metrics for each request, request id is the key and value of list is time per token request_token_gen_times: Dict[str, List[float]] = {} - internal_clock = 0 # decide whether a request has arrived - wait_next_req_time = 0 # the idle time of waiting for next request to come - energy = 0 # energy consumption + internal_clock = 0 + wait_next_req_time = 0 + energy = 0 + while True: - # Batch requests. input_lens: List[int] = [] cached_lens: List[int] = [] - new_running: List[int] = [] - while running: - request_id = running.pop(0) - while num_cached_tokens + 1 > max_num_tokens_per_stage: - if running: - victim = running.pop(-1) - stopped.append(victim) - num_cached_tokens -= get_seq_len(victim) - else: + + if phase == "prefill": + # Batch requests for prefill + while running: + request_id = running.pop(0) + seq_len = get_seq_len(request_id) + if num_cached_tokens + seq_len > max_num_tokens_per_stage: stopped.append(request_id) - num_cached_tokens -= get_seq_len(request_id) - break - else: - input_lens.append(1) - num_cached_tokens += 1 - cached_lens.append(num_generated_tokens[request_id] + 1) - new_running.append(request_id) - running = new_running - - # Resume the stopped requests. - # Sort in the order of request_id. - stopped = sorted(stopped) - while stopped: - request_id = stopped[0] - seq_len = get_seq_len(request_id) - if num_cached_tokens + seq_len + 1 > max_num_tokens_per_stage: - break - request_id = stopped.pop(0) - input_lens.append(1) - num_cached_tokens += seq_len + 1 - cached_lens.append(num_generated_tokens[request_id] + 1) - running.append(request_id) - - # Batch new requests. - if not stopped: + num_cached_tokens -= seq_len + else: + input_lens.append(requests[request_id].input_len) + cached_lens.append(0) + new_running.append(request_id) + running = new_running + + # Add new requests while req_counter < len(requests): request_id = req_counter input_len = requests[request_id].input_len - # If the KV cache does not have enough space, stop. if num_cached_tokens + input_len > max_num_tokens_per_stage: break - num_tokens = sum(input_lens) + input_len - # If the total number of tokens exceeds the maximum, stop. - if ( - num_tokens * num_attn_cell_replicas / min_num_replicas - > MAX_NUM_INPUT_TOKENS - ): + if num_tokens * num_attn_cell_replicas / min_num_replicas > MAX_NUM_INPUT_TOKENS: break - - curr_batch_size = len(running) - if(curr_batch_size == max_batch_size and max_batch_size != 0): + if len(running) == max_batch_size and max_batch_size != 0: break - - # Request has not yet arrived if requests[request_id].time_stamp > internal_clock: break - num_cached_tokens += input_len input_lens.append(input_len) cached_lens.append(0) running.append(request_id) - num_generated_tokens[request_id] = 0 req_counter += 1 - if not running: - if req_counter < len(requests): - # Cannot proceed. - # This can happen when the space for the KV cache is - # too small to store even a single sequence. - if num_cached_tokens + input_len > max_num_tokens_per_stage: - return None, None, None, None, None, None + if not running: + if req_counter < len(requests): + if num_cached_tokens + input_len > max_num_tokens_per_stage: + return None, None, None, None, None, None, None + wait_next_req_time += requests[req_counter].time_stamp - internal_clock + internal_clock = requests[req_counter].time_stamp else: - # Or because the requests are coming too slow; - # wait until next request comes. - if requests[req_counter].time_stamp > internal_clock: - wait_next_req_time += ( - requests[req_counter].time_stamp - internal_clock - ) - internal_clock = requests[req_counter].time_stamp - else: - return None, None, None, None, None, None - - else: - # All the requests are finished. - assert num_cached_tokens == 0, num_cached_tokens - assert not stopped, stopped - break - - # Record the number of requests and tokens. - num_reqs_per_iteration.append(len(running) * num_attn_cell_replicas) - num_tokens_per_iteration.append(sum(input_lens) * num_attn_cell_replicas) + break - # Get the execution time of a stage with the given input if running - if running: - stage_execution_time, stage_energy = self.get_stage_execution_time( - execution_plan.parallel_schedule.stage_schedule, - num_attn_cell_replicas, - input_lens, - cached_lens, - self.gpu, - frequency, - self.cluster_size_per_node, - ) - if num_stages > 1: - stage_execution_time.append( - self.get_cross_stage_comm_time( - sum(input_lens), - execution_plan.stage_clusters, - self.gpu, - self.cluster_size_per_node, - ) - ) - time_per_iteration.append(sum(stage_execution_time)) - internal_clock += sum(stage_execution_time) - energy += sum(stage_energy) - # Update the statistics. - for i in range(len(execution_time)): - execution_time[i] = ( - execution_time[i][0], - execution_time[i][1] + stage_execution_time[i], + # Execute prefill + if running: + stage_execution_time, stage_energy = self.get_stage_execution_time( + stage_schedule, + num_attn_cell_replicas, + input_lens, + cached_lens, + self.gpu, + frequency, + self.cluster_size_per_node, + phase="prefill", + gpu_id=self.prefill_gpu, ) - - # Remove finished requests from the batch. Update logged time per token - for request_id in running: - num_generated_tokens[request_id] += 1 - if num_generated_tokens[request_id] == 1: - request_token_gen_times[request_id] = [ - time_per_iteration[-1] * num_stages - ] - else: - request_token_gen_times[request_id].append( - time_per_iteration[-1] * num_stages + time_per_iteration.append(sum(stage_execution_time)) + internal_clock += sum(stage_execution_time) + energy += sum(stage_energy) + for i in range(len(execution_time)): + execution_time[i] = ( + execution_time[i][0], + execution_time[i][1] + stage_execution_time[i], ) - new_running: List[int] = [] - for request_id in running: - num_generated = num_generated_tokens[request_id] - if arch == "encoder": - output_len = 0 + + # Record TTFT + for request_id in running: + request_token_gen_times[str(request_id)] = [time_per_iteration[-1] * num_stages] + requests[request_id].time_stamp = internal_clock * num_stages + running = [] # Prefill complete, move to decode + + elif phase == "decode": + # Batch requests for decode + while running: + request_id = running.pop(0) + seq_len = get_seq_len(request_id) + if num_cached_tokens + seq_len + 1 > max_num_tokens_per_stage: + stopped.append(request_id) + num_cached_tokens -= seq_len else: - output_len = requests[request_id].output_len - if num_generated < output_len: + input_lens.append(1) + num_cached_tokens += 1 + cached_lens.append(num_generated_tokens.get(request_id, 0) + 1) new_running.append(request_id) - else: - # Finished processing; update the time_stamp to completed time. - num_cached_tokens -= get_seq_len(request_id) - 1 - requests[request_id].time_stamp = internal_clock * num_stages running = new_running - execution_time.append(("Wait", wait_next_req_time)) + # Resume stopped requests + stopped = sorted(stopped) + while stopped: + request_id = stopped[0] + seq_len = get_seq_len(request_id) + if num_cached_tokens + seq_len + 1 > max_num_tokens_per_stage: + break + request_id = stopped.pop(0) + input_lens.append(1) + num_cached_tokens += seq_len + 1 + cached_lens.append(num_generated_tokens.get(request_id, 0) + 1) + running.append(request_id) + # Add new requests + while req_counter < len(requests): + request_id = req_counter + input_len = requests[request_id].input_len + if num_cached_tokens + input_len + 1 > max_num_tokens_per_stage: + break + num_tokens = sum(input_lens) + 1 + if num_tokens * num_attn_cell_replicas / min_num_replicas > MAX_NUM_INPUT_TOKENS: + break + if len(running) >= max_batch_size and max_batch_size != 0: + break + if requests[request_id].time_stamp > internal_clock: + break + num_cached_tokens += input_len + 1 + input_lens.append(1) + cached_lens.append(num_generated_tokens.get(request_id, 0) + 1) + running.append(request_id) + num_generated_tokens[request_id] = 0 + req_counter += 1 + + if not running: + if req_counter < len(requests): + if num_cached_tokens + input_len > max_num_tokens_per_stage: + return None, None, None, None, None, None, None + wait_next_req_time += requests[req_counter].time_stamp - internal_clock + internal_clock = requests[req_counter].time_stamp + else: + break + + # Execute decode + if running: + stage_execution_time, stage_energy = self.get_stage_execution_time( + stage_schedule, + num_attn_cell_replicas, + input_lens, + cached_lens, + self.gpu, + frequency, + self.cluster_size_per_node, + phase="decode", + gpu_id=self.decode_gpu, + ) + time_per_iteration.append(sum(stage_execution_time)) + internal_clock += sum(stage_execution_time) + energy += sum(stage_energy) + for i in range(len(execution_time)): + execution_time[i] = ( + execution_time[i][0], + execution_time[i][1] + stage_execution_time[i], + ) + + # Update token generation + for request_id in running: + num_generated_tokens[request_id] = num_generated_tokens.get(request_id, 0) + 1 + if str(request_id) not in request_token_gen_times: + request_token_gen_times[str(request_id)] = [] + request_token_gen_times[str(request_id)].append(time_per_iteration[-1] * num_stages) + + # Remove finished requests + new_running = [] + for request_id in running: + if num_generated_tokens[request_id] < requests[request_id].output_len: + new_running.append(request_id) + else: + num_cached_tokens -= get_seq_len(request_id) - 1 + requests[request_id].time_stamp = internal_clock * num_stages + running = new_running + + else: + raise ValueError(f"Unsupported phase: {phase}") + + num_reqs_per_iteration.append(len(running) * num_attn_cell_replicas) + num_tokens_per_iteration.append(sum(input_lens) * num_attn_cell_replicas) + + execution_time.append(("Wait", wait_next_req_time)) return ( requests, time_per_iteration, - np.mean(num_reqs_per_iteration), - np.mean(num_tokens_per_iteration), + num_reqs_per_iteration, + num_tokens_per_iteration, execution_time, request_token_gen_times, energy, @@ -737,33 +647,23 @@ def get_stage_execution_time( gpu_type: str, frequency: int, cluster_size_per_node: int, - ) -> List[float]: - # Calculate the number of input tokens per cell. - num_total_input_tokens = ( - sum(input_lens_per_attn_replica) * num_attn_cell_replicas - ) - + phase: str = "combined", + gpu_id: int = 0, + ) -> Tuple[List[float], List[float]]: + num_total_input_tokens = sum(input_lens_per_attn_replica) * num_attn_cell_replicas execution_time: List[float] = [] execution_energy: List[float] = [] for i, cell_schedule in enumerate(stage_schedule.cell_schedules): - # Split the input tokens evenly among the replicas. num_replicas = cell_schedule.num_replicas - num_input_tokens = ( - num_total_input_tokens + num_replicas - 1 - ) // num_replicas + num_input_tokens = (num_total_input_tokens + num_replicas - 1) // num_replicas num_devices = cell_schedule.get_num_devices() - # For mixed precision, assume the data with lower precision - # will be dequantized to match data of higher precision comp_type = self.highest_prec() - # Cell execution. - # We leverage the fact that the 0-th device is always assigned the - # most number of tasks. cell_execution_time = 0.0 cell_execution_energy = 0.0 task_dict = cell_schedule.task_mapping.tasks_per_device[0] for task_type, tasks in task_dict.items(): - if task_type == "MHAHead" or task_type == "MQAHead": + if task_type in ["MHAHead", "MQAHead"]: exe_time, exe_energy = mha_time( gpu_type, frequency, @@ -806,8 +706,6 @@ def get_stage_execution_time( cell_execution_time += exe_time cell_execution_energy += exe_energy elif task_type.startswith("ExpertMLPFilter"): - # Each expert will get topk / E of the input tokens where E - # is the total number of experts. num_total_experts = cell_schedule.cell.num_experts topk = cell_schedule.cell.topk exe_time, exe_energy = mlp_time( @@ -836,16 +734,12 @@ def get_stage_execution_time( execution_time.append(cell_execution_time) execution_energy.append(cell_execution_energy * num_devices) - if ( - cell_schedule.cell.get_name() == "MoE" - or cell_schedule.cell.get_name() == "SwiMoE" - ): + if cell_schedule.cell.get_name() in ["MoE", "SwiMoE"]: if len(task_dict) < cell_schedule.cell.num_experts: num_devices = len(cell_schedule.task_mapping.tasks_per_device) num_input_tokens = max(num_input_tokens // num_devices, 1) hidden_size = self.model.hidden_size - # Resharding. for comm in stage_schedule.reshard_comms[i]: if comm.num_devices < cluster_size_per_node: num_nodes = 1 @@ -853,7 +747,6 @@ def get_stage_execution_time( else: num_nodes = comm.num_devices // cluster_size_per_node num_devices_per_node = cluster_size_per_node - num_input_tokens *= comm.size_factor num_input_tokens = max(num_input_tokens, 1) if comm.comm_type == CommType.AllReduce: @@ -867,9 +760,7 @@ def get_stage_execution_time( elif comm.comm_type == CommType.AllToAll: num_elements = num_input_tokens * hidden_size else: - raise NotImplementedError( - f"Unsupported comm type: {comm.comm_type}" - ) + raise NotImplementedError(f"Unsupported comm type: {comm.comm_type}") comm_time = get_comm_time( comm.comm_type, gpu_type, @@ -880,7 +771,6 @@ def get_stage_execution_time( ) execution_time.append(comm_time) - # Multiply the block execution time by the number of blocks. return [t * stage_schedule.num_blocks for t in execution_time], [ e * stage_schedule.num_blocks for e in execution_energy ] @@ -895,19 +785,10 @@ def get_cross_stage_comm_time( hidden_size = self.model.hidden_size num_total_devices = sum(cluster.get_num_devices() for cluster in stage_clusters) cross_node = num_total_devices > cluster_size_per_node - if cross_node: - return get_p2p_comm_time( - gpu=gpu_type, - num_nodes=2, - num_gpus_per_node=1, - dtype=self.dtype["act"], - num_elements=num_input_tokens * hidden_size, - ) - else: - return get_p2p_comm_time( - gpu=gpu_type, - num_nodes=1, - num_gpus_per_node=2, - dtype=self.dtype["act"], - num_elements=num_input_tokens * hidden_size, - ) + return get_p2p_comm_time( + gpu=gpu_type, + num_nodes=2 if cross_node else 1, + num_gpus_per_node=1, + dtype=self.dtype["act"], + num_elements=num_input_tokens * hidden_size, + ) \ No newline at end of file diff --git a/apex_plus/simulator/simulator_origin.py b/apex_plus/simulator/simulator_origin.py new file mode 100644 index 0000000..5a279f6 --- /dev/null +++ b/apex_plus/simulator/simulator_origin.py @@ -0,0 +1,914 @@ +from dataclasses import dataclass +from typing import Dict, List, Optional, Tuple +from collections import defaultdict + +import numpy as np +import itertools +import copy + +from apex_plus.cluster.cluster import Cluster +from apex_plus.execution.plan import ExecutionPlan +from apex_plus.ir.transformer import Transformer +from apex_plus.parallel.comm import CommType +from apex_plus.parallel.schedule import StageSchedule +from apex_plus.simulator.comm_profile import get_comm_time, get_p2p_comm_time +from apex_plus.simulator.comp_profile import mha_time, mlp_time, glu_time, swiglu_time +from apex_plus.simulator.trace import Trace, Request +from apex_plus.utils.dtype import DTYPE + +GB = 1024 * 1024 * 1024 +WORKSPACE = 1 * GB # a constant buffer for each device to run the program + +MAX_NUM_INPUT_TOKENS = 64 * 1024 # Max in profile/scripts/gemm.py + +US_TO_SEC = 1000000 +MS_TO_SEC = 1000 +US_TO_MS = 1000 + + +@dataclass +class SimulatorOutput: + + param_size_per_device: float + available_memory_per_device: float + num_requests_per_iteration: float + num_tokens_per_iteration: float + time_statistics: List[Tuple[str, float]] + performance_metrics: List[Tuple[str, float]] + performance_metrics_units: List[str] + slo_metrics: List[float] + total_time: float + total_energy: float + + +class Simulator: + + def __init__( + self, + model: Transformer, + cluster: Cluster, + trace: Trace, + dtype: dict, + ) -> None: + self.model = model + self.cluster = cluster + self.trace = trace + self.dtype = dtype + self.gpu = cluster.get_device().device_type + self.gpu_memory = cluster.get_device_memory_capacity() + self.peak_flops = cluster.get_device().peak_flops[self.highest_prec()] + self.peak_mem_bandwidth = cluster.get_device().peak_mem_bandwidth + self.num_total_nodes = cluster.get_num_nodes() + self.num_total_devices = cluster.get_num_devices() + self.cluster_size_per_node = self.num_total_devices // self.num_total_nodes + + def highest_prec(self) -> DTYPE: + data_type = [] + data_type.append(self.dtype["w"]) + data_type.append(self.dtype["kv"]) + data_type.append(self.dtype["act"]) + # Dealing with mixed precesion + # Assuming we dequantize the value for computation + highest_precision = DTYPE.FLOAT8 + if DTYPE.FLOAT16 in data_type: + highest_precision = DTYPE.FLOAT16 + if DTYPE.FLOAT32 in data_type: + highest_precision = DTYPE.FLOAT32 + return highest_precision + + def dispatch( + self, + requests: List[Request], + factor: int, + ) -> List[List[Request]]: + sublists = [[] for _ in range(factor)] + # Distribute elements in a round-robin fashion + # Can be replaced with more sophisticated strategy + for index, element in enumerate(requests): + sublists[index % factor].append(element) + return sublists + + def merge_max_elements(self, lists): + max_length = max(len(lst) for lst in lists) + extended_lists = [lst + [None] * (max_length - len(lst)) for lst in lists] + merged_list = [ + max(filter(lambda x: x is not None, elements)) + for elements in zip(*extended_lists) + ] + return merged_list + + def get_metrics( + self, + model_config: Transformer, + avg_input_len: int = 0, + avg_output_len: int = 0, + requests: List[Trace] = [], + total_time: float = 0.0, + request_token_gen_times: Dict[str, List[float]] = {}, + req_percentiles: List[int] = [], + token_percentiles: List[int] = [], + seq_lens: List[int] = [], + arch: str = '', + slo_targets: List[int] = [], + ): + + + def calculate_tbt_percentiles(latency_dict): + token_latencies_per_request = [latency for latency in latency_dict.values()] + avg_tbt_vals = [] + percentile_vals = [] + + # Calculate avg TBT per request + for latency_list in token_latencies_per_request: + list_length = len(latency_list) + avg_tbt = 0 + if list_length > 1: + ttft = latency_list[0] + ttlt = sum(latency_list) + avg_tbt = (ttlt - ttft) / list_length + avg_tbt_vals.append(avg_tbt) + + # Calculate all necessary percentiles for tbt + for percentile in token_percentiles: + percentile_vals.append(np.percentile(avg_tbt_vals, percentile)) + + return percentile_vals + + def calculate_slo_metrics(latency_dict, slo_targets): + token_latencies_per_request = [latency for latency in latency_dict.values()] + num_requests = len(token_latencies_per_request) + ttft_target = slo_targets[0] + tpot_target = slo_targets[1] + slo_metrics = [] + + # Calculate Percentage of requests that are <= TTFT_SLO + # TTFT is just the first token in the latency + ttft_slo_counter = 0 + for latency_list in token_latencies_per_request: + if(latency_list[0]/US_TO_MS <= ttft_target): + ttft_slo_counter += 1 + slo_metrics.append( (ttft_slo_counter/num_requests) * 100 ) + + # Calculate Percentage of requests that have an avg TPOT <= TPOT_SLO + tpot_slo_counter = 0 + tok_latencies_per_req_after_first_tok = [sublist[1:] for sublist in token_latencies_per_request] + for latency_list in token_gen_times: + # Calculate Avg TPOT per request + avg_tpot = np.mean(latency_list)/US_TO_MS + if(avg_tpot <= tpot_target): + tpot_slo_counter += 1 + slo_metrics.append( (tpot_slo_counter/num_requests) * 100 ) + + return slo_metrics + + # Store performance metrics - Time to first token, TPOT ,P50, P95, & other latencies + performance_metrics: List[Tuple[str, float]] = [] + performance_metrics_units: List[str] = [] + performance_metrics.append( + ("Throughput: Avg. Tokens generated per second", float("NaN")) + ) + performance_metrics.append( + ("Throughput: Avg. Tokens processed per second", float("NaN")) + ) + performance_metrics.append(("Throughput: Requests per second", float("NaN"))) + performance_metrics.append( + ("Latency: Avg. Time to first token (TTFT in msec)", float("NaN")) + ) + performance_metrics.append( + ("Latency: Avg. Time per output token (TPOT in msec)", float("NaN")) + ) + performance_metrics_units += [ + "tokens/sec", + "tokens/sec", + "requests/sec", + "msec", + "msec", + ] + + num_layers = 0 + num_heads = 0 + head_dim = 0 + hidden_size = 0 + theoretical_peak_flops = self.peak_flops + + if hasattr(model_config, "num_layers"): + num_layers = model_config.num_layers + num_heads = model_config.num_heads + hidden_size = model_config.hidden_size + elif hasattr(model_config, "num_decoder_layers"): + num_layers = model_config.num_decoder_layers + num_heads = model_config.num_decoder_heads + hidden_size = model_config.decoder_hidden_size + else: + raise ValueError("Unable to get model layers, heads, or hidden size") + head_dim = hidden_size // num_heads + + num_parameters = num_layers * hidden_size * hidden_size * 12 + + tpot = 0.0 + avg_ttft = 0.0 + mbu = 0.0 + # If encoder, there the output_len is 0 + if arch == "encoder": + avg_output_len = 0.0 + # Decoders that generate tokens + else: + # TPOT after the first token(this is also known as inter-token latency) + token_gen_times = [value[1:] for value in request_token_gen_times.values()] + flat_token_gen_times = [ + item for sublist in token_gen_times for item in sublist + ] + tpot = np.mean(flat_token_gen_times) / MS_TO_SEC + avg_ttft = ( + np.mean([value[0] for value in request_token_gen_times.values()]) + / US_TO_MS + ) + + # Calculate token percentiles + token_percentiles = token_percentiles + [50, 95] + token_percentiles.sort() + # Avg percentile vals are returned in order of sorted percentiles + avg_percentile_vals = calculate_tbt_percentiles(request_token_gen_times) + # Add to performance_metrics + for index, percentile in enumerate(token_percentiles): + performance_metrics.append( + ( + f"Avg. TBT Percentile: P{percentile}", + avg_percentile_vals[index] / US_TO_MS, + ) + ) + performance_metrics_units.append("msec") + # MBU + kv_cache_size = ( + 2 * num_layers * num_heads * head_dim * self.dtype["kv"].size + ) + tpot_sec = tpot / MS_TO_SEC + theoretical_peak_mem_bandwidth = self.peak_mem_bandwidth + observed_mem_bandwidth = (num_parameters + kv_cache_size) / tpot_sec + mbu = (observed_mem_bandwidth / theoretical_peak_mem_bandwidth) * 100 + + # Tokens gen per second + token_throughput = avg_output_len * len(requests) / (total_time / US_TO_SEC) + performance_metrics[0] = (performance_metrics[0][0], token_throughput) + # Tokens processed per second + performance_metrics[1] = ( + performance_metrics[1][0], + (avg_input_len + avg_output_len) * len(requests) / (total_time / US_TO_SEC), + ) + # Requests per second + performance_metrics[2] = ( + performance_metrics[2][0], + len(requests) / (total_time / US_TO_SEC), + ) + # Time to first token + performance_metrics[3] = (performance_metrics[3][0], avg_ttft) + # TPOT after the first token(this is also known as inter-token latency) + performance_metrics[4] = (performance_metrics[4][0], tpot) + # Calculate request percentiles + + request_latencies = [ + sum(token_latencies) for token_latencies in request_token_gen_times.values() + ] + req_percentiles = req_percentiles + [50, 95] + req_percentiles.sort() + for percentile in req_percentiles: + percentile_val = np.percentile(request_latencies, percentile) / US_TO_SEC + performance_metrics.append( + ( + f"Request Completion Latency: {percentile}th percentile", + percentile_val, + ) + ) + performance_metrics_units.append("sec") + + # Calculate Avg MFU + observed_throughput = token_throughput + mfus: List[float] = [] + for index, seq_len in enumerate(seq_lens): + theoretical_throughput = theoretical_peak_flops / ( + 6 * num_parameters + 12 * num_layers * num_heads * head_dim * seq_len + ) + mfu = (observed_throughput / theoretical_throughput) * 100 + mfus.append(mfu) + + avg_mfu = np.mean(mfus) + performance_metrics.append((f"Avg. MFU Per iteration", avg_mfu)) + performance_metrics_units.append("%") + + # Append mbu + performance_metrics.append((f"MBU ", mbu)) + performance_metrics_units.append("%") + + # Get SLO Metrics + slo_metrics = calculate_slo_metrics(request_token_gen_times, slo_targets) + + return performance_metrics, performance_metrics_units, slo_metrics + + def simulate( + self, + execution_plan: ExecutionPlan, + arch: str, + frequency: int, + model_config: Transformer, + req_percentiles: List[int] = [], + token_percentiles: List[int] = [], + slo_targets: List[int] = [], + max_batch_size: int = 0, + distserve: bool = False, + ) -> Optional[SimulatorOutput]: + parallel_schedule = execution_plan.parallel_schedule + stage_schedule = parallel_schedule.stage_schedule + num_stages = parallel_schedule.num_stages + num_model_replicas = parallel_schedule.num_model_replicas + num_attn_cell_replicas = 0 + for cell_schedule in stage_schedule.cell_schedules: + if cell_schedule.cell.is_attn(): + num_attn_cell_replicas = cell_schedule.num_replicas + break + assert num_attn_cell_replicas > 0 + + param_sizes = stage_schedule.get_param_size_per_device(self.dtype["w"]) + num_devices = len(param_sizes) + + available_memories = [ + self.gpu_memory - WORKSPACE - param_size for param_size in param_sizes + ] + if any(avail_mem < 0 for avail_mem in available_memories): + # Invalid. + return None + min_available_memory = min(available_memories) + WORKSPACE + param_size = max(param_sizes) + + # Calculate the maximum number of tokens that can be stored in KV cache. + # This limits the maximum number of sequences that can be batched. + kv_token_sizes = ( + [1] * num_devices + if arch == "encoder" + else stage_schedule.get_kv_token_size_per_device(self.dtype["kv"]) + ) + + max_num_tokens = min( + int(available_memories[i] // kv_token_sizes[i]) for i in range(num_devices) + ) + # Evenly partition the KV cache for each stage. + max_num_tokens_per_stage = max_num_tokens // num_stages + + # Statistics + list_of_exe_time = [] + num_reqs_per_iteration = [] + num_tokens_per_iteration = [] + request_token_gen_times = {} + + requests = [] + model_replica_time = [] + + total_energy = 0.0 + # Copy to avoid altering the original traces + copied_requests = copy.deepcopy(self.trace.requests) + + ### Finished housekeeping; starts the actual simulation ### + + # Split a list of requests to n sublists, + # where n = num_model_replicas + model_requests = self.dispatch(copied_requests, num_model_replicas) + for model_replica in range(num_model_replicas): + model_replica_energy = 0.0 + stage_iter_times = [] + stage_requests = self.dispatch(model_requests[model_replica], num_stages) + for stage in range(num_stages): + + cell_replica_iter_times = [] + cell_requests = self.dispatch( + stage_requests[stage], num_attn_cell_replicas + ) + for cell_replica in range(num_attn_cell_replicas): + target_requests = cell_requests[cell_replica] + + ( + updated_requests, + cell_replica_iter_time, + reqs_per_iter, + tokens_per_iter, + exe_time, + request_token_gen_time, + stage_energy, + ) = self.sub_simulate( + execution_plan, + arch, + frequency, + target_requests, + num_attn_cell_replicas, + max_num_tokens_per_stage, + max_batch_size, + ) + + if updated_requests is None: + return None, None + requests.extend( + updated_requests + ) # timestamp updated with completion time + id_num = model_replica * stage * cell_replica + renamed_gen_time = { + f"{key}_{id_num}": value + for key, value in request_token_gen_time.items() + } + request_token_gen_times.update(renamed_gen_time) + cell_replica_iter_times.append(cell_replica_iter_time) + num_reqs_per_iteration.append(reqs_per_iter) + num_tokens_per_iteration.append(tokens_per_iter) + list_of_exe_time.append(exe_time) + model_replica_energy += ( + stage_energy // num_attn_cell_replicas * num_stages + ) + # Note: dividied by cell replicas as the energy scaling of cell is already handled in Line 693 + # Multiplied with num_stages because we only simulate one stage, but num_stages run concurrently + + # iteration time = slowest among the cell replicas + max_cell_iter_time = self.merge_max_elements(cell_replica_iter_times) + stage_iter_times.append(max_cell_iter_time) + + interleaved_list = [ + val + for pair in itertools.zip_longest(*stage_iter_times) + for val in pair + if val is not None + ] + + model_replica_iter_times = [] + if len(interleaved_list) <= num_stages: + model_replica_iter_times = interleaved_list.copy() + else: + # Creates a sliding window to find the bottlenecked stage in the pipeline + for i in range(len(interleaved_list) - num_stages): + window = interleaved_list[i : i + num_stages] + model_replica_iter_times.append(max(window)) + model_replica_time.append(sum(model_replica_iter_times)) + total_energy += model_replica_energy + + # Final execution time = the slowest among the replicas + total_time = max(model_replica_time) + + ### Finished simulation; calculate the statistics of the results ### + + avg_input_len = sum(request.input_len for request in copied_requests) / len( + copied_requests + ) + avg_output_len = sum(request.output_len for request in copied_requests) / len( + copied_requests + ) + performance_metrics, performance_metrics_units, slo_metrics = self.get_metrics( + model_config, + avg_input_len, + avg_output_len, + copied_requests, + total_time, + request_token_gen_times, + req_percentiles, + token_percentiles, + num_tokens_per_iteration, + arch, + slo_targets) + + exe_stat_dict = {} + idle_time = [] + for exe_t in list_of_exe_time: + # Calculating idle time + idle_time.append(total_time - sum(t * num_stages for _, t in exe_t)) + summed_data = defaultdict(float) + # A function may be called multiple times in one iter, creating multiple entries + # merging these entries for each iter + for key, value in exe_t: + summed_data[key] += value + result = [(key, value) for key, value in summed_data.items()] + # Modifying the data structure so it's easier to compute mean and std later + for name, time in result: + exe_stat_dict.setdefault(name, []).append(time * num_stages) + + # Compute the statistics + exe_stat = [] + for name in exe_stat_dict.keys(): + exe_lst = exe_stat_dict.get(name) + # Wait time will be counted as Idle time + if name == "Wait": + for i, val in enumerate(exe_lst): + idle_time[i] += val + continue + exe_mean = np.mean(exe_lst) + exe_std = np.std(exe_lst) + exe_stat.append((name, exe_mean, exe_std)) + exe_stat.append(("Idle", np.mean(idle_time), np.std(idle_time))) + + requests = sorted(requests, key=lambda x: x.time_stamp) + + return requests, SimulatorOutput( + param_size_per_device=param_size / GB, + available_memory_per_device=min_available_memory / GB, + num_requests_per_iteration=np.mean(num_reqs_per_iteration), + num_tokens_per_iteration=np.mean(num_tokens_per_iteration), + time_statistics=exe_stat, + performance_metrics=performance_metrics, + performance_metrics_units=performance_metrics_units, + slo_metrics=slo_metrics, + total_time=total_time, + total_energy=total_energy, + ) + + def sub_simulate( + self, + execution_plan: ExecutionPlan, + arch: str, + frequency: int, + requests: List[Request], + num_attn_cell_replicas: int, + max_num_tokens_per_stage: int, + max_batch_size: int, + ): + parallel_schedule = execution_plan.parallel_schedule + stage_schedule = parallel_schedule.stage_schedule + num_stages = parallel_schedule.num_stages + + min_num_replicas = min( + cell_schedule.num_replicas + for cell_schedule in stage_schedule.cell_schedules + ) + + num_cached_tokens = 0 + req_counter = 0 + num_generated_tokens: Dict[int, int] = {} # request_id -> num_tokens + running: List[int] = [] # request_ids + stopped: List[int] = [] # request_ids + + get_seq_len = lambda request_id: ( + requests[request_id].input_len + num_generated_tokens[request_id] + ) + + # Statistics. + execution_time: List[Tuple[str, float]] = [] + num_cells = len(stage_schedule.cell_schedules) + for i in range(num_cells): + cell = stage_schedule.cell_schedules[i].cell + execution_time.append((cell.get_name(), 0.0)) + for comm in stage_schedule.reshard_comms[i]: + execution_time.append((comm.comm_type.name, 0.0)) + if parallel_schedule.num_stages > 1: + execution_time.append(("SendRecv", 0.0)) + + num_reqs_per_iteration: List[int] = [] + num_tokens_per_iteration: List[int] = [] + # Simulate the execution. + time_per_iteration: List[float] = [] + # Time metrics for each request, request id is the key and value of list is time per token + request_token_gen_times: Dict[str, List[float]] = {} + internal_clock = 0 # decide whether a request has arrived + wait_next_req_time = 0 # the idle time of waiting for next request to come + energy = 0 # energy consumption + while True: + # Batch requests. + input_lens: List[int] = [] + cached_lens: List[int] = [] + + new_running: List[int] = [] + while running: + request_id = running.pop(0) + while num_cached_tokens + 1 > max_num_tokens_per_stage: + if running: + victim = running.pop(-1) + stopped.append(victim) + num_cached_tokens -= get_seq_len(victim) + else: + stopped.append(request_id) + num_cached_tokens -= get_seq_len(request_id) + break + else: + input_lens.append(1) + num_cached_tokens += 1 + cached_lens.append(num_generated_tokens[request_id] + 1) + new_running.append(request_id) + running = new_running + + # Resume the stopped requests. + # Sort in the order of request_id. + stopped = sorted(stopped) + while stopped: + request_id = stopped[0] + seq_len = get_seq_len(request_id) + if num_cached_tokens + seq_len + 1 > max_num_tokens_per_stage: + break + request_id = stopped.pop(0) + input_lens.append(1) + num_cached_tokens += seq_len + 1 + cached_lens.append(num_generated_tokens[request_id] + 1) + running.append(request_id) + + # Batch new requests. + if not stopped: + while req_counter < len(requests): + request_id = req_counter + input_len = requests[request_id].input_len + # If the KV cache does not have enough space, stop. + if num_cached_tokens + input_len > max_num_tokens_per_stage: + break + + num_tokens = sum(input_lens) + input_len + # If the total number of tokens exceeds the maximum, stop. + if ( + num_tokens * num_attn_cell_replicas / min_num_replicas + > MAX_NUM_INPUT_TOKENS + ): + break + + curr_batch_size = len(running) + if(curr_batch_size == max_batch_size and max_batch_size != 0): + break + + # Request has not yet arrived + if requests[request_id].time_stamp > internal_clock: + break + + num_cached_tokens += input_len + input_lens.append(input_len) + cached_lens.append(0) + running.append(request_id) + + num_generated_tokens[request_id] = 0 + req_counter += 1 + + if not running: + if req_counter < len(requests): + # Cannot proceed. + # This can happen when the space for the KV cache is + # too small to store even a single sequence. + if num_cached_tokens + input_len > max_num_tokens_per_stage: + return None, None, None, None, None, None + else: + # Or because the requests are coming too slow; + # wait until next request comes. + if requests[req_counter].time_stamp > internal_clock: + wait_next_req_time += ( + requests[req_counter].time_stamp - internal_clock + ) + internal_clock = requests[req_counter].time_stamp + else: + return None, None, None, None, None, None + + else: + # All the requests are finished. + assert num_cached_tokens == 0, num_cached_tokens + assert not stopped, stopped + break + + # Record the number of requests and tokens. + num_reqs_per_iteration.append(len(running) * num_attn_cell_replicas) + num_tokens_per_iteration.append(sum(input_lens) * num_attn_cell_replicas) + + # Get the execution time of a stage with the given input if running + if running: + stage_execution_time, stage_energy = self.get_stage_execution_time( + execution_plan.parallel_schedule.stage_schedule, + num_attn_cell_replicas, + input_lens, + cached_lens, + self.gpu, + frequency, + self.cluster_size_per_node, + ) + if num_stages > 1: + stage_execution_time.append( + self.get_cross_stage_comm_time( + sum(input_lens), + execution_plan.stage_clusters, + self.gpu, + self.cluster_size_per_node, + ) + ) + time_per_iteration.append(sum(stage_execution_time)) + internal_clock += sum(stage_execution_time) + energy += sum(stage_energy) + # Update the statistics. + for i in range(len(execution_time)): + execution_time[i] = ( + execution_time[i][0], + execution_time[i][1] + stage_execution_time[i], + ) + + # Remove finished requests from the batch. Update logged time per token + for request_id in running: + num_generated_tokens[request_id] += 1 + if num_generated_tokens[request_id] == 1: + request_token_gen_times[request_id] = [ + time_per_iteration[-1] * num_stages + ] + else: + request_token_gen_times[request_id].append( + time_per_iteration[-1] * num_stages + ) + new_running: List[int] = [] + for request_id in running: + num_generated = num_generated_tokens[request_id] + if arch == "encoder": + output_len = 0 + else: + output_len = requests[request_id].output_len + if num_generated < output_len: + new_running.append(request_id) + else: + # Finished processing; update the time_stamp to completed time. + num_cached_tokens -= get_seq_len(request_id) - 1 + requests[request_id].time_stamp = internal_clock * num_stages + running = new_running + + execution_time.append(("Wait", wait_next_req_time)) + + return ( + requests, + time_per_iteration, + np.mean(num_reqs_per_iteration), + np.mean(num_tokens_per_iteration), + execution_time, + request_token_gen_times, + energy, + ) + + def get_stage_execution_time( + self, + stage_schedule: StageSchedule, + num_attn_cell_replicas: int, + input_lens_per_attn_replica: List[int], + cached_lens_per_attn_replica: List[int], + gpu_type: str, + frequency: int, + cluster_size_per_node: int, + ) -> List[float]: + # Calculate the number of input tokens per cell. + num_total_input_tokens = ( + sum(input_lens_per_attn_replica) * num_attn_cell_replicas + ) + + execution_time: List[float] = [] + execution_energy: List[float] = [] + for i, cell_schedule in enumerate(stage_schedule.cell_schedules): + # Split the input tokens evenly among the replicas. + num_replicas = cell_schedule.num_replicas + num_input_tokens = ( + num_total_input_tokens + num_replicas - 1 + ) // num_replicas + num_devices = cell_schedule.get_num_devices() + # For mixed precision, assume the data with lower precision + # will be dequantized to match data of higher precision + comp_type = self.highest_prec() + + # Cell execution. + # We leverage the fact that the 0-th device is always assigned the + # most number of tasks. + cell_execution_time = 0.0 + cell_execution_energy = 0.0 + task_dict = cell_schedule.task_mapping.tasks_per_device[0] + for task_type, tasks in task_dict.items(): + if task_type == "MHAHead" or task_type == "MQAHead": + exe_time, exe_energy = mha_time( + gpu_type, + frequency, + tasks, + comp_type, + input_lens_per_attn_replica, + cached_lens_per_attn_replica, + True, + ) + cell_execution_time += exe_time + cell_execution_energy += exe_energy + elif task_type == "BiMHAHead": + exe_time, exe_energy = mha_time( + gpu_type, + frequency, + tasks, + comp_type, + input_lens_per_attn_replica, + cached_lens_per_attn_replica, + False, + ) + cell_execution_time += exe_time + cell_execution_energy += exe_energy + elif task_type == "MLPFilter": + exe_time, exe_energy = mlp_time( + gpu_type, frequency, tasks, comp_type, num_input_tokens + ) + cell_execution_time += exe_time + cell_execution_energy += exe_energy + elif task_type == "GLUFilter": + exe_time, exe_energy = glu_time( + gpu_type, frequency, tasks, comp_type, num_input_tokens + ) + cell_execution_time += exe_time + cell_execution_energy += exe_energy + elif task_type == "SwiGLUFilter": + exe_time, exe_energy = swiglu_time( + gpu_type, frequency, tasks, comp_type, num_input_tokens + ) + cell_execution_time += exe_time + cell_execution_energy += exe_energy + elif task_type.startswith("ExpertMLPFilter"): + # Each expert will get topk / E of the input tokens where E + # is the total number of experts. + num_total_experts = cell_schedule.cell.num_experts + topk = cell_schedule.cell.topk + exe_time, exe_energy = mlp_time( + gpu_type, + frequency, + tasks, + comp_type, + max(num_input_tokens * topk // num_total_experts, 1), + ) + cell_execution_time += exe_time + cell_execution_energy += exe_energy + elif task_type.startswith("ExpertSwiGLUFilter"): + num_total_experts = cell_schedule.cell.num_experts + topk = cell_schedule.cell.topk + exe_time, exe_energy = swiglu_time( + gpu_type, + frequency, + tasks, + comp_type, + max(num_input_tokens * topk // num_total_experts, 1), + ) + cell_execution_time += exe_time + cell_execution_energy += exe_energy + else: + raise ValueError(f"Unsupported task type: {task_type}") + execution_time.append(cell_execution_time) + execution_energy.append(cell_execution_energy * num_devices) + + if ( + cell_schedule.cell.get_name() == "MoE" + or cell_schedule.cell.get_name() == "SwiMoE" + ): + if len(task_dict) < cell_schedule.cell.num_experts: + num_devices = len(cell_schedule.task_mapping.tasks_per_device) + num_input_tokens = max(num_input_tokens // num_devices, 1) + + hidden_size = self.model.hidden_size + # Resharding. + for comm in stage_schedule.reshard_comms[i]: + if comm.num_devices < cluster_size_per_node: + num_nodes = 1 + num_devices_per_node = comm.num_devices + else: + num_nodes = comm.num_devices // cluster_size_per_node + num_devices_per_node = cluster_size_per_node + + num_input_tokens *= comm.size_factor + num_input_tokens = max(num_input_tokens, 1) + if comm.comm_type == CommType.AllReduce: + num_elements = num_input_tokens * hidden_size + elif comm.comm_type == CommType.AllGather: + num_elements = num_input_tokens * comm.num_devices * hidden_size + num_input_tokens *= comm.num_devices + elif comm.comm_type == CommType.ReduceScatter: + num_elements = num_input_tokens * hidden_size + num_input_tokens = max(num_input_tokens // comm.num_devices, 1) + elif comm.comm_type == CommType.AllToAll: + num_elements = num_input_tokens * hidden_size + else: + raise NotImplementedError( + f"Unsupported comm type: {comm.comm_type}" + ) + comm_time = get_comm_time( + comm.comm_type, + gpu_type, + num_nodes, + num_devices_per_node, + self.dtype["act"], + num_elements, + ) + execution_time.append(comm_time) + + # Multiply the block execution time by the number of blocks. + return [t * stage_schedule.num_blocks for t in execution_time], [ + e * stage_schedule.num_blocks for e in execution_energy + ] + + def get_cross_stage_comm_time( + self, + num_input_tokens: int, + stage_clusters: List[Cluster], + gpu_type: str, + cluster_size_per_node: int, + ) -> float: + hidden_size = self.model.hidden_size + num_total_devices = sum(cluster.get_num_devices() for cluster in stage_clusters) + cross_node = num_total_devices > cluster_size_per_node + if cross_node: + return get_p2p_comm_time( + gpu=gpu_type, + num_nodes=2, + num_gpus_per_node=1, + dtype=self.dtype["act"], + num_elements=num_input_tokens * hidden_size, + ) + else: + return get_p2p_comm_time( + gpu=gpu_type, + num_nodes=1, + num_gpus_per_node=2, + dtype=self.dtype["act"], + num_elements=num_input_tokens * hidden_size, + ) diff --git a/apex_plus/utils/__pycache__/dtype.cpython-312.pyc b/apex_plus/utils/__pycache__/dtype.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..07324d267180480ccbfb46ea2afffdfd8b1e47d5 GIT binary patch literal 1753 zcmaJ>&1)N15P!ScU45-~<=C?0k|rv_(9MU1Z5j$`11WY~L*tm(E<_L(HM_22uO;>E zs)?Eyq}Gt?qzVFZNlFX7B=~>mKhTTgy0G2mU}!JCsmLdvI`dYFWk<;zelzpl@6FDe znSH;DA0JryQkIZ^!V+5Z{ z>5vgJ!$#QTmjolSE~F#i^WcZUkAfd9ahkk`SDC%jeIcknlh!Fv4PufsXJCD~bim-# zf)PlIhLDy@q9%Fbt00HSMx^ELR;4LuZkI~ zy%W6l*^R3U6XTw5j9v01w(_AD`h*!{m!6Xr1SAJnQ;W7q?bJ%iGVN?8pPDr76(?Uv z&6S*NK9|ZA%=>!5D%xFE(Yb5cDZ?oh%+W%L;w&&g!yW`!C$6HFr+tVibkwa^uRgk!M3&!()Fa<6EKem(h1N zKKS|akC&f({35Dt2^#A`?`r)95wThhXkU804J+?M)3_2;r;CRbKPGJ+e}vcLC28{l zM|h!Q!N6_v1Si_rGsNQ^(xh2B0NWI2kPac7M8JPell(cw;}rjS8biQWhhjl#5}>>2 zGAp?53(! z$ahEUqub%6s}BGBT9;T;G}#l(TyfPCrT`C<^#uFwH|Ft_PeU4B05ijhVChj6E%uM$ z)EtC9@?VE56k0}boa|o#ngNdE{wC3VN#OdvpRCPPXZ8u0%{UnxDvOU|mDmnB;U)$T z?STZ?az)-DeJ 0: @@ -89,6 +94,7 @@ def main(args: argparse.Namespace): args.ttft_slo, args.tpot_slo, args.max_batch_size, + args.distserve, ) @@ -201,6 +207,15 @@ def main(args: argparse.Namespace): default=0, help="Define max batch size. This is also known as max number of sequences." ) + # Define if the prefill and decode phases should be run on different GPUs + parser.add_argument( + "--distserve", + action="store_true", + default=True, + help="Enable 1P1D DistServe simulation (enabled by default)." + ) + + args = parser.parse_args() main(args)