
    9jU              #          d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZmZ d d
lmZ d dlmZmZ d Zd Zd Zd Z d Z!g dZ"ejF                  jI                  ddejJ                  jL                         ejF                  jO                  dd      d        Z(ejF                  jO                  dd      d        Z)ejF                  jO                  dd      d        Z*ejF                  jW                  d      d        Z,d Z-d Z.d Z/ed        Z0d  Z1d! Z2d" Z3d# Z4d$ Z5d% Z6d& Z7d' Z8d( Z9d) Z:d* Z;d+ Z<d, Z=d- Z> e       Z?d. Z@d/ ZAd0 ZBd1 ZC ed2d3e>e= eej                  ej                        d4d4d4d4d4d4d45       ed6d3e1e/ eej                  ej                        d4d4d4d4d4d7d45       ed8d3ee eej                  ej                        d4d4d4d4d49
       ed8d:e e eej                  ej                        d4d4d4d4d49
       ed8d;e!e eej                  ej                        d4d4d4d4d49
       ed<d3e.e- eej                  ej                        d4d4d4d4d4d7d45       ed=d3e@e/ eej                  ej                        d4d4d4d4d4d7 eej                  d>d?       eej                  d>d@       eej                  d>dA       eej                  d>dB      fd4C       edDd3eAe/ eej                  ej                        d4d4d4d4d4d7d45       edEd3e9e8 eej                  ej                        d4d4d4d4d4d4F       edGd3e:e8 eej                  ej                        d4d4d4d4d4d4F       edHd3e3e2 eej                  ej                        d4d4d4d4d4d4F       edId3ee4 eej                  ej                        d4d4d4d4d4 eej                  d>d?       eej                  d>d@       eej                  d>dA       eej                  d>dB      fegJ       edKd3e7e5 eej                  ej                        d4d4d4d4d4d4d4 eej                  d>d?       eej                  d>d@       eej                  d>dA       eej                  d>dB      fegL       edKdMe7e6 eej                  ej                        d4d4d4d4d4d4d4 eej                  d>d?       eej                  d>d@       eej                  d>dA       eej                  d>dB      fegL       edNd3e<e; eej                  ej                        d4d4d4d4d4 eej                  d>d?       eej                  d>d@       eej                  d>dA       eej                  d>dB      fe ej                  ej                  j                          dO      gJ       edPd3eCeB eej                        d4d4d4d4d4d4egQ      gZLy)R    Nmap)flex_attention)_create_empty_block_maskcreate_block_maskr   )make_tensor)inline_asm_elementwise)onlyCUDA)all_types_andcustom_types)DecorateInfoOpInfoSampleInput)mark_compile_region)InvokeQuantinvoke_quant_packedc           
   +      K   t        j                  t        |||      }t         |ddddd       |ddddd      g |ddd       |ddd      f       y w)Ndevicedtyperequires_grad   皙?lowhigh   args	functoolspartialr   r   opinfor   r   r   kwargsmake_args         ^/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/testing/_internal/hop_db.pysample_inputs_mapr(      sj       F%}H 	!Qs	+XaA3Q-OPqc*HQCa,HI s   AAc                     | d   j                         j                  d      |z  | d   |j                         z   j                         j	                  | d   j                               gS )Nr         ?r   )cosadd_sincos_viewsizexy0y1s      r'   inner_fr5   #   sQ    aDHHJOOC 2%!rvvx'='='?'D'DQqTYY['QRR    c                 $    d }t        || ||      S )Nc                     t        | ||      S Nr5   r1   s      r'   fzsimple_map.<locals>.f(   s    q"b!!r6   r   )xsr3   r4   r;   s       r'   
simple_mapr=   '   s    " q"b"r6   c                 $    d }t        || ||      S )Nc                 $    d }t        || ||      S )Nc                     t        | ||      S r9   r:   r1   s      r'   f2z"nested_map.<locals>.f1.<locals>.f20   s    1b"%%r6   r   xxr3   r4   rA   s       r'   f1znested_map.<locals>.f1/   s    	& 2r2r""r6   r   r<   r3   r4   rD   s       r'   
nested_maprF   .   s    # r2r2r6   c                 $    d }t        || ||      S )Nc                 $    d }t        || ||      S )Nc                 $    d }t        || ||      S )Nc                     t        | ||      S r9   r:   r1   s      r'   rA   z5triple_nested_map.<locals>.f0.<locals>.f1.<locals>.f2;   s    q"b))r6   r   rB   s       r'   rD   z)triple_nested_map.<locals>.f0.<locals>.f1:   s    * r2r2&&r6   r   rE   s       r'   f0ztriple_nested_map.<locals>.f09   s    	' 2r2r""r6   r   )r<   r3   r4   rK   s       r'   triple_nested_maprL   8   s    # r2r2r6   )"custom_function_callautograd_function_applyrun_and_save_rng_staterun_with_rng_staterun_dtensor_rng_opgraphsafe_run_with_rng_state	out_dtypetrace_wrappedtag_activation_checkpointexecutorch_call_delegatewrapwrap_with_set_grad_enabledauto_functionalized_v2associative_scan
flat_applywrap_with_autocastwrap_activation_checkpointrun_const_graphauto_functionalizedr   map_implwith_effectsstrict_mode_export_tracepointcall_torchbindtriton_kernel_wrapper_mutation triton_kernel_wrapper_functionalhints_wrapperdynamo_bypassing_wrapperforeach_mapaoti_call_delegateprintinductor_compiled_codeinvoke_leaf_functionztestlib::mutating_custom_opz8(Tensor(a!) x, Tensor(b!) z) -> (Tensor, Tensor, Tensor))tagscpuc                     | j                  d       |j                  d       | j                         |j                         | |z   fS N   r,   cloner2   zs     r'   foo_impl_cpurw      5    FF1IFF1I779aggiQ&&r6   cudac                     | j                  d       |j                  d       | j                         |j                         | |z   fS rq   rs   ru   s     r'   foo_impl_cudar{      rx   r6   xpuc                     | j                  d       |j                  d       | j                         |j                         | |z   fS rq   rs   ru   s     r'   foo_impl_xpur~      rx   r6   c                 J    | j                         |j                         | |z   fS r9   rt   ru   s     r'   foo_impl_abstractr      s    779aggiQ&&r6   c           	   +   v   K   t        j                  t        |||      }t         |ddddd             y wNr   r   r   r   r    r#   s         r'   sample_inputs_condr      9       F%}H hq!QCa8
99   79c                 Z    t        j                  | j                         dkD  d d | g      S )Nr   c                 $    | j                         fS r9   r+   r2   s    r'   <lambda>zsimple_cond.<locals>.<lambda>   s    aeegZ r6   c                 $    | j                         fS r9   )r-   r   s    r'   r   zsimple_cond.<locals>.<lambda>   s    AEEG: r6   )torchcondsumr   s    r'   simple_condr      s&    ::aeegk#79MPQsSSr6   c           	   +   v   K   t        j                  t        |||      }t         |ddddd             y wr   r    r#   s         r'   sample_inputs_invoke_subgraphr      r   r   c                 ,    t        j                  |       S r9   r   r-   r   s    r'   fn_for_invoke_subgraphr      s    99Q<r6   c                     t        |       S r9   )r   r   s    r'   simple_invoke_subgraphr      s    !!$$r6   c           
   +      K   t        j                  t        ||d      }t         |ddddd       |ddddd             y w)NFr   r   r   r   r    r#   s         r'    sample_inputs_auto_functionalizer      sM       F%uH Aqc*HQ1#A,N    AAc                 V    t         j                  j                  j                  | |      S r9   )r   opstestlibmutating_custom_opru   s     r'   simple_auto_functionalizer      s    99//155r6   c              +      
K   t        j                  t        |||      
d }
fdt        d      D        \  }}}t	        ||      }	t        |||||	       y w)Nr   c                     | |z   S r9    scorebhmns        r'   	score_modz/sample_inputs_flex_attention.<locals>.score_mod   s    qyr6   c           	   3   :   K   | ]  } d d dddd         yw)r         r   r   Nr   .0_r&   s     r'   	<genexpr>z/sample_inputs_flex_attention.<locals>.<genexpr>   s#     I1x1c1#A66I      )r!   r"   r   ranger   r   )r$   r   r   r   r%   r   qkv
block_maskr&   s             @r'   sample_inputs_flex_attentionr      s[       F%}H JaIGAq!)!Q/J
aAy*
55s   AAc              +     K   t        j                  t        ||d      d }d }fdt        d      D        \  }}}	t	        |dddd|	      }
d
|j                  d      dz  z  }t        |||	||
j                         |i       \  }}}t        |||	|j                         |j                         t        j                  |      d |d |
j                         |i ddf       y w)NFr   c                     | S r9   r   r   s        r'   r   z8sample_inputs_flex_attention_backward.<locals>.score_mod       r6   c                     ||k\  S r9   r   )r   r   r   r   s       r'   mask_modz7sample_inputs_flex_attention_backward.<locals>.mask_mod   s    Avr6   c           	   3   :   K   | ]  } d d dddd         ywr   r      r   r   Nr   r   s     r'   r   z8sample_inputs_flex_attention_backward.<locals>.<genexpr>   #     JAx1c23Q77Jr   r   r   r   BHQ_LENKV_LENr   r*         ?r   r   )r!   r"   r   r   r   r0   flex_attention_hopas_tupler   detachr   	rand_like)r$   r   r   r   r%   r   r   r   r   r   r   scaleout	logsumexpr   r&   s                  @r'   %sample_inputs_flex_attention_backwardr      s        F%uH KqJGAq!"8qASU[\J!&&*##E*	1aJ//15"CA 	q#**,	 0 0 2EOOC4H$tZ0022r2
 s   C!C$c              +      K   t        j                  t        ||d      t        j                  dd|t        j
                        d }fd}fdt        d	      D        \  }}}	t        |d
d
dd|      }
d|j                  d      dz  z  }t        |||	||
j                         |i       \  }}}t        |||	|j                         |j                         t        j                  |      d |d |
j                         |i ddf       y w)NFr   r   r   )r   r   c                     | S r9   r   r   s        r'   r   zIsample_inputs_flex_attention_backward_explicit_buffers.<locals>.score_mod   r   r6   c                     |z   |k\  S r9   r   )r   r   r   r   mask_offsets       r'   r   zHsample_inputs_flex_attention_backward_explicit_buffers.<locals>.mask_mod   s    ;!##r6   c           	   3   :   K   | ]  } d d dddd         ywr   r   r   s     r'   r   zIsample_inputs_flex_attention_backward_explicit_buffers.<locals>.<genexpr>   r   r   r   r   r   r*   r   r   r   )r!   r"   r   r   fullint32r   r   r0   r   r   r   r   r   )r$   r   r   r   r%   r   r   r   r   r   r   r   r   r   r   r&   r   s                  @@r'   6sample_inputs_flex_attention_backward_explicit_buffersr      s       F%uH **RV5;;GK$ KqJGAq!"8qASU[\J!&&*##E*	1aJ//15"CA 	q#**,	 0 0 2EOOC4H$tZ0022r2
 s   D
Dc                 n    t         j                  j                  j                  | |||||||||	|
|||      S r9   )r   r   higher_orderflex_attention_backward)querykeyvaluer   r   grad_outgrad_logsumexpfw_graphjoint_graphr   r   kernel_optionsscore_mod_other_buffersmask_mod_other_bufferss                 r'   simple_flex_attention_backwardr   	  sK      99!!99 r6   c           
   +      K   t        j                  t        ||d      }t        t	        j
                  d       |ddddd             y w)NFr   r   r      r   r   )r!   r"   r   r   r   tensorr#   s         r'   sample_inputs_while_loopr   +  sI       F%uH QAqc* s   AAc                 T    d }d }t         j                  j                  ||| |f      S )Nc                     | dkD  S Nr   r   iter_tr2   s     r'   cond_fnz"simple_while_loop.<locals>.cond_fn6      zr6   c                 ,    | dz
  |j                         fS Nr   r   r   s     r'   body_fnz"simple_while_loop.<locals>.body_fn9      z1557""r6   )r   _higher_order_ops
while_loopr   r2   r   r   s       r'   simple_while_loopr   5  s-    # ""--gwLLr6   c                 f    d }d }t         j                  j                  ||| |ft                     S )Nc                     | dkD  S r   r   r   s     r'   r   z/simple_while_loop_stack_output.<locals>.cond_fn@  r   r6   c                 ,    | dz
  |j                         fS r   r   r   s     r'   r   z/simple_while_loop_stack_output.<locals>.body_fnC  r   r6   )r   r   while_loop_stack_outputtupler   s       r'   simple_while_loop_stack_outputr   ?  s6    # ""::61+uw r6   c           
   +      K   t        j                  t        ||d      }t         |ddddd       |ddddd             y w)NFr   r   r   r   r   r   r    r#   s         r'   sample_inputs_local_map_hopr  K  sN       F%uH Aqc*Aqc* r   c                 T   d }t         j                  j                  |      }t         j                  j	                         st        d      ddlm}  |        |        |       f |        |        |       ffd|j                  d<   t         j                  j                  || |      S )Nc                 D    | j                         |j                         z   S r9   )r+   r-   )inp1inp2s     r'   body_gmz%simple_local_map_hop.<locals>.body_gmW  s    xxzDHHJ&&r6   z*Expected torch.distributed to be availabler   )	Replicate)in_placementsout_placementslocal_map_kwargs)r   fxsymbolic_tracedistributedis_availableAssertionError(torch.distributed.tensor.placement_typesr  metar   local_map_hop)r  r  r  gmr  s        r'   simple_local_map_hopr  V  s    ' 
	 	 	)B))+IJJB $+y{IK@%KikBD#BGG ""00T4@@r6   c           
   +      K   t        j                  t        |||      }t         |dddd       |ddddd             y wr   r    r#   s         r'   sample_inputs_scanr  i  sL       F%}H A3Q'Aqc* s   AAc                 J    d }t         j                  j                  || |      S )Nc                 6    | |z  |z   }|| j                         fS r9   r   )carryr2   results      r'   
combine_fnzsimple_scan.<locals>.combine_fnt  s     Qu{{}$$r6   )r   r   scan)initr<   r  s      r'   simple_scanr  s  s$    % ""''
D"==r6   c                 .    d }t        || |       d   dz  S )Nc                 4    t        j                  |       |z  fS r9   r   )r2   ys     r'   fnzsimple_invoke_quant.<locals>.fn  s    		!q ""r6   r          @)quant_tracerr2   r"  s     r'   simple_invoke_quantr&  ~  s"    # Aq!!$s**r6   c                 ,    d }t        ||       d   dz  S )Nc                 .    t        j                  |       fS r9   r   r   s    r'   r"  z&simple_invoke_quant_packed.<locals>.fn  s    		!r6   r   r#  )r   r%  s     r'   simple_invoke_quant_packedr)    s      r1%a(3..r6   c           	   +   v   K   t        j                  t        |||      }t         |ddddd             y wr   r    r#   s         r'   sample_inputs_inline_asmr+    r   r   c                     t         j                  j                  rt        | ddt         j                        S t        | ddt         j                        S )Nzv_mov_b32_e32 $0, $1z=v, v)asm_strconstraintsr   zmov.f32 $0, $1;z=f,f)r   versionhipr	   float32r   s    r'   simple_inline_asmr2    sI    }}%*--	
 	
 "	$& r6   r  simpleF)namevariant_test_nameopsample_inputs_funcdtypessupports_outcheck_batched_gradcheck_batched_gradgradcheck_batched_forward_grad"check_inplace_batched_forward_gradsupports_autogradsupports_gradgradinvoke_subgraphTr   )
r4  r5  r6  r7  r8  r9  r:  r;  r<  r=  nestedtriple_nestedr   invoke_quantTestHOPtest_aot_exporttest_pre_dispatch_exporttest_serialize_exporttest_retrace_export)r4  r5  r6  r7  r8  r9  r:  r;  r<  r=  r>  skipsr?  r   r   )r4  r5  r6  r7  r8  r9  r:  r;  r<  r=  r>  r   auto_functionalizer   )r4  r5  r6  r7  r8  r9  r:  r;  r<  r=  rI  
decoratorsr   )r4  r5  r6  r7  r8  r9  r:  r;  r<  r=  r>  r?  rI  rK  explicit_buffersr  zrequires distributed buildr	   )r4  r5  r6  r7  r8  r9  r:  r;  r<  r=  r>  rK  )Mr!   unittestr   #functorch.experimental.control_flowr   &torch._higher_order_ops.flex_attentionr   r   !torch.nn.attention.flex_attentionr   r   torch.testingr   .torch._higher_order_ops.inline_asm_elementwiser	   *torch.testing._internal.common_device_typer
   $torch.testing._internal.common_dtyper   r   #torch.testing._internal.opinfo.corer   r   r   'torch._higher_order_ops.invoke_subgraphr   torch._higher_order_opsr   r   r(   r5   r=   rF   rL   0FIXME_hop_that_doesnt_have_opinfo_test_allowlistlibrarydefineTagpt2_compliant_tagimplrw   r{   r~   register_faker   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r$  r&  r)  r+  r2  boolhalfexpectedFailurefloat16r1  skipIfr  r  hop_dbr   r6   r'   <module>re     sP      3 
 & Q ? L Q Q G DS
6#4 0J   !>		$	$   159' :' 16:' ;' 159' :' :;' <':T:  %6
6:<DM	A&> }+/: "-UZZ4 $#(+0 "!8UZZ4 $#(+0 ",UZZ4 $#(+0 ",UZZ4 $#(+0 ),UZZ4 $#(+0 "-UZZ4 $#(+0 "8UZZ4 $#(+0 119>OP(()5O 119>UV119>ST
  -0 ""%8UZZ4 $#(+0 "3UZZ4 $#(+0 &")3UZZ4 $#(+0 !"$;UZZ4 $#(+0 "7EMM5==9 $#(+0119>OP(()5O 119>UV119>ST
 :'* &")@EMM5==9 $#(+0119>OP(()5O 119>UV119>ST
 :+. &,)QEMM5==9 $#(+0119>OP(()5O 119>UV119>ST
 :+. "6EMM5==9 $#(+0119>OP(()5O 119>UV119>ST
 HOO%%22446R
'4 %"3EMM* $#(+0:{K
r6   