
    9jM              #       F   U d Z ddlZddlmZ ddlmZ ddlmZ ddlZddl	m
Z
  ed      Z ed      Zi Zeej                  j                   ef   ed	<    eh d
      Zdededeeef   deeeef   geeef   f   fdZ	 d9dddddej.                  dej.                  dej.                  dej.                  dz  dedededej.                  fdZ edde      	 d9dddddej.                  dej.                  dej.                  dej.                  dz  dedededej.                  fd       Zdedz  dedefdZd ej.                  d!ededej.                  fd"Zd#ej.                  d$ej.                  d%ed&ededz  d'edej.                  fd(Zd%ed&eddfd)Zd#ej.                  d$ej.                  d%ed&ededz  dej.                  fd*Z 	 	 	 d:dddddd+dd,d#ej.                  d$ej.                  d-ej.                  d.ej.                  dz  d/ej.                  dz  d0ej.                  dz  d1ed2ed3ed'ededz  d4ed5edz  de!ej.                  ej.                  ej.                  ej.                  f   fd6Z" ed7de"      	 	 	 d:dddddd+dd,d#ej.                  d$ej.                  d-ej.                  d.ej.                  dz  d/ej.                  dz  d0ej.                  dz  d1ed2ed3ed'ededz  d4ed5edz  de!ej.                  ej.                  ej.                  ej.                  f   fd8       Z#y);zImplementations of ONNX operators as native Torch ops.

NOTE: Fake implementations:
    Refer to https://docs.pytorch.org/docs/stable/library.html#torch.library.register_fake
    for more details on how to create fake kernels.
    N)Callable)TypeVar)	ParamSpec)_dtype_mappings_P_RONNX_ATEN_DECOMP_TABLE>      
         op_typeopset_version	fake_implreturnc                 h     dt         t        t        f   dt         t        t        f   f fd}|S )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                     d }t        j                  j                  d d| d      |       }| t        t	        t	        t         j
                  j                        |      <   |j                         |S )Nopsetzonnx::. )mutates_args)torchlibrary	custom_opr	   getattropsonnxregister_fake)r   overloadtorch_opr   r   r   s      T/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/onnx/ops/_impl.py	decoratorz_onnx_op.<locals>.decorator'   s|    =/*==**WIQxj) + 

  	wwuyy~~w'GRS 	y)    )r   r   r   )r   r   r   r#   s   ``` r"   _onnx_opr%   "   s0    
	R( 	Xb"f-= 	 r$   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr&   r'   r(   c                "    | j                         S )zFFake implementation for RotaryEmbedding-23 for torch.compile purposes.)clone)r)   r*   r+   r,   r&   r'   r(   s          r"   _rotary_embedding_23_fake_implr/   5   s     779r$   RotaryEmbedding   c                V   | j                   t              }d   d   t        j                  j	                         dk(  fd       t        j                  j                   d   k(  fd       t        j                  j                   d   k(  fd       t        j                  j	                         dk(  xr j	                         dk(  fd	       n@t        j                  j	                         d
k(  xr j	                         d
k(  fd       |dk(  rt        j
                  | d      } nG|d
k(  rBt        j                  |dk7  fd       d   }||z  }	||	g}
t        j                  | |
      } t        j                  t        | j                         dk(  d        | j                   d
   }	|dk(  r|	}| ddddddd|f   }| dddddd|df   }|dz        nt        j                  j                   d   k(  xr j                   d   k(  fd       t        j                  j                   d   k(  xr j                   d   k(  fd       t        j                  j                   d   k(  fd       t        j                  j                   d   k(  fd       t        j                  d      t        j                  d      |r%|dddddddddf   }|dddddddddf   }nt        j                  |dd      \  }}|z  |z  z
  }|z  |z  z   }|rft        j                  |d      }t        j                  |d      }t        j                  ||fd      }t        j                  ||j                         }nt        j                  ||fd      }t        j                  ||fd      }|d
k(  rt        j                  |      S t        j
                  |d      S )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   N   c                  "    d j                    S )Nz6position_ids must be 2D when provided. Received shape shape)r,   s   r"   <lambda>z%rotary_embedding_23.<locals>.<lambda>Z   s    L\M_M_L`a r$   c                  .    d  dj                   d    S )Nz6position_ids first dim (batch) must match x.shape[0] (). Received r   r6   )
batch_sizer,   s   r"   r8   z%rotary_embedding_23.<locals>.<lambda>^   s$    LZLXdeqewewxyezd{| r$   r
   c                  .    d d j                   d    S )Nz;position_ids second dim (sequence) must match x.shape[-2] (r:   r
   r6   )r,   sequence_lengths   r"   r8   z%rotary_embedding_23.<locals>.<lambda>b   s=    QRaQbbno{  pB  pB  CD  pE  oF  G r$   c                  <    d j                    dj                    S )NzWcos_cache/sin_cache must be 2D when position_ids is provided. Received cos_cache shape , sin_cache shape r6   r*   r+   s   r"   r8   z%rotary_embedding_23.<locals>.<lambda>f   &     ((1'88J9??J[] r$      c                  <    d j                    dj                    S )Nz[cos_cache/sin_cache must be 3D when position_ids is not provided. Received cos_cache shape r?   r6   r@   s   r"   r8   z%rotary_embedding_23.<locals>.<lambda>l   rA   r$      )r   r4   r
   rB   c                      d  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape r   )input_shapes   r"   r8   z%rotary_embedding_23.<locals>.<lambda>y   s    abmano r$   c                       y)Nzx should be a 4D tensor by nowr   r   r$   r"   r8   z%rotary_embedding_23.<locals>.<lambda>       r$   c                  0    dj                    d  d dS )Nzcos has shape  but expected (batch=, seq=, ...)r6   )r;   cosr=   s   r"   r8   z%rotary_embedding_23.<locals>.<lambda>   $    .+@FSbRccij r$   c                  0    dj                    d  d dS )Nzsin has shape rJ   rK   rL   r6   )r;   r=   sins   r"   r8   z%rotary_embedding_23.<locals>.<lambda>   rN   r$   c                  0    d j                   d    d dS )NzLast dimension of cos cache (rQ   ') should match rotary_embedding_dim/2 ().r6   )rM   rotary_embedding_dim_halfs   r"   r8   z%rotary_embedding_23.<locals>.<lambda>   .    /		">ef  fA  AC  D r$   c                  0    dj                   d    d  dS )NzLast dimension of sin cache (rQ   rS   rT   r6   )rU   rP   s   r"   r8   z%rotary_embedding_23.<locals>.<lambda>   rV   r$   dim)
r7   lenr   _checkrY   permutereshape	unsqueezechunkcat)r)   r*   r+   r,   r&   r'   r(   
input_rankhidden_size	head_size	new_shapex_rotatex_not_rotatex1x2realimagx_rotate_concatoutputr;   rM   rF   rU   r=   rP   s    ```               @@@@@@r"   rotary_embedding_23rm   C   s    ''K[!JQJ!"oO !#a	
 	q!Z/|	
 	q!_4 G	
 	MMOq 9Y]]_%9]	
 	MMOq 9Y]]_%9]	
 Q MM!\*	qNo	
 "!n9,	)YG	MM!Y'	LLQWW"$LM
I q (Aq////0HQ12334L 4 9 
 
 	LL		!
"Fsyy|'Fj 
LL		!
"Fsyy|'Fj 
LL		"22 	D 
LL		"22 	D //QC //QC
 aAqt!tm$aAqt!tm$Xqb1B 8cBhD8cBhD  tR(tR())T4Lb9==(..A99dD\r2YY,/R8FQ}}V[11 ==..r$   scalerc   c                 :    | | S dt        j                  |      z  S )z/Get the scale factor for attention computation.g      ?)mathsqrt)rn   rc   s     r"   _get_scale_factorrr      s     %5GC$))I2F,FGr$   tensorr;   c                     | j                   d   | j                   d   }}||z  }| j                  ||||      j                  dd      j                         S )z1Reshape 3D tensor to 4D for multi-head attention.r
   r4   )r7   view	transpose
contiguous)rs   r;   r'   r=   rb   rc   s         r"   _reshape_3d_to_4drx      sP     $*<<?FLLO[Oy(IJIF	1a	r$   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec           	          |dk(  rt        | ||||      S t        j                  t        j                  | |j	                  dd                  S )z1Get QK output tensor based on the specified mode.r   r3   rQ   )_compute_qk_output_for_mode_0r   
zeros_likematmulrv   )ry   rz   r{   r|   rn   r}   s         r"   _get_qk_output_for_aten_spdar      sO     !,q%';U
 	

 QB0C DEEr$   c                 H     t        j                   z  dk(   fd       y)z-Validate Group Query Attention configuration.r   c                      d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   )r|   r{   s   r"   r8   z-_validate_gqa_configuration.<locals>.<lambda>   s    - 344YZnYooxy r$   N)r   r[   )r{   r|   s   ``r"   _validate_gqa_configurationr      s"     
LL22a7yr$   c                     |}||k7  r||z  }|j                  |d      }t        || j                  d         }t        j                  |      }| |z  }	||z  }
t        j                  |	|
j                  dd            S )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r
   rX   rB   r3   rQ   )repeat_interleaverr   r7   rp   rq   r   r   rv   )ry   rz   r{   r|   rn   K_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaleds              r"   r   r      s     H22+/CC&&}!&<$UAGGAJ7L<(J:~H*$H<<("4"4R"<==r$           )	is_causalkv_num_headsq_num_headsr}   rn   softcapsoftmax_precisionV	attn_maskpast_key
past_valuer   r   r   r   r   c                   | j                   d   }t        | j                         dk(  r| j                   d   }| j                   }|4|||j                   d   |j                   d   z   |j                   d   |z  f}n#|||j                   d   |j                   d   |z  f}|}||||d   f}n| j                   d   }| j                   }|K|j                   d   |j                   d   |j                   d   |j                   d   z   |j                   d   f}n|j                   }|}| j                   d   | j                   d   | j                   d   |d   f}t        j                  || j                  | j
                        }t        j                  ||j                  |j
                        }t        j                  ||j                  |j
                        }t        j                  || j                  | j
                        }||||fS )z@Fake implementation for Attention-23 for torch.compile purposes.r   rB   r
   r4   dtypedevice)r7   rZ   r   emptyr   r   )ry   rz   r   r   r   r   r   r   r   r}   rn   r   r   r;   q_sequence_lengthoutput_shapepresent_key_shapepresent_value_shapeqk_output_shaperl   present_keypresent_value	qk_outputs                          r"   _attention_23_fake_implr     s   " J 177|qGGAJww q!AGGAJ.
l*	! 

l*	! 0 a 	
 GGAJww 

q!AGGAJ.
	! !"/ GGAJGGAJGGAJa 	
 [[QWWQXXFF++/qwwqxxPKKK 3177188TMO177188LI;y88r$   	Attentionc                	   d\  }}}t        | j                        }| j                  d   }t        | j                        dk(  rWt        j                  |dk7  xr |dk7  d        | j                  d   }t	        | ||      } t	        |||      }t	        |||      }t        j                  t        | j                        dk(  xr2 t        |j                        dk(  xr t        |j                        dk(  d        | j                  |   }t        |
|      }
|t        j                  ||g|	      n|j                         }|t        j                  ||g|	      n|j                         }||}}| j                  |   }|j                  |   }| j                  |   }|j                  |   }|d
k(  xr0 |	dk(  xr) |du xr# |du xs |j                  t        j                  k(  }t        ||       |rOt        j                  j                  j                  | |||d
||
t        ||k7              }t        | ||||
|	      }nY||k7  r+||z  }|j                  ||	      }|j                  ||	      }t        j                   ||| j                  | j"                        }|ryt        j                  |du d        t        j$                  t        j&                  ||t        j                  | j"                              }|j)                  | t+        d            }|?|j                  t        j                  k(  r|j)                  | t+        d            }n||z   }t        |
| j                  d         }t-        j.                  |      } | | z  }!|| z  }"t        j0                  |!|"j3                  dd            }#|#}|#|z   }$|	dk(  r|$}|d
kD  r|t        j4                  |$|z        z  }$|	dk(  r|$}|w|t6        v rW|$j                  }%|$j9                  t:        j<                  |         }$t        j>                  |$d	      }&|&j9                  |%      }&n/t        j>                  |$d	      }&nt        j>                  |$d	      }&|	dk(  r|&}t        j0                  |&|      }|dk(  r1|j3                  dd      jA                         jC                  ||d      }||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r
   r4   rB   r   rB   c                       y)Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r$   r"   r8   zattention_23.<locals>.<lambda>  rH   r$   r
   rD   c                       y)Nz'Q, K, and V should be 4D tensors by nowr   r   r$   r"   r8   zattention_23.<locals>.<lambda>  rH   r$   NrX   r   )r   	dropout_pr   rn   
enable_gqar   c                       y)Nz'Cannot use both is_causal and attn_maskr   r   r$   r"   r8   zattention_23.<locals>.<lambda>  rH   r$   z-infr3   rQ   r4   )"rZ   r7   r   r[   rx   rr   r`   r.   r   boolr   nn
functionalscaled_dot_product_attentionr   r   zerosr   trilonesmasked_fillfloatrp   rq   r   rv   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ONNX_DTYPE_TO_TORCH_DTYPEsoftmaxrw   ru   )'ry   rz   r   r   r   r   r   r   r   r}   rn   r   r   num_head_dimsequence_dimhead_diminput_shape_lenr;   r   q_head_sizer   r   r{   r|   kv_sequence_lengthcan_use_sdparl   r   r   	attn_biascausal_maskr   r   r   r   qk_matmul_outputqk_with_biasoriginal_dtype
qk_softmaxs'                                          r"   attention_23r   c  s   & ,3(L, !''lOJ 177|q12!2Q	
 GGAJa[9a\:a\:	LLAGGEc!''la/ECLA4E9 ''(#Ke[1E
  			8Q-\2WWY  ! 			:q/|4WWY  qA '',/77<0-. 	3 	A!Q&	A%	A $?)//UZZ"?	    35IJ$$AA#';; B 
 1 !
	 "66/3GGM##M|#DA##M|#DA KK1
	
 LLT!#T  **

%&**88	K "--{lE&MJI  %**,%119*eFmL	 &	1	 )
; YY|,
z>z> !<<(2D2DR2LM %	 ()3 A%$I S="UZZw0F%GGL A%$I ( $QQ!-!3!3+#==>OP  #]]<R@
']]>:
"]]<R@
|<J A%"I j!, ! Q"--/44ZARTVW 	 ;y88r$   )N)NNN)$__doc__rp   collections.abcr   typingr   typing_extensionsr   r   torch.onnx.opsr   r   r   r	   dict_ops
OpOverload__annotations__	frozensetr   strintr%   Tensorr   r/   rm   r   rr   rx   r   r   r   tupler   r   r   r$   r"   <module>r      s,    $  '  * t_T] AC UZZ22H<= B091 -!$19"b&1AxB (2r6"223. )-	  !|||| || ,,%	    \\ 

R!?@
 )-	D/  !D/||D/||D/ ||D/ ,,%	D/ D/ D/ D/ \\D/ AD/NHUT\ Hc He H

LL
&)
69

\\
F||F||F F 	F
 4<F F \\F$47	>||>||> > 	>
 4<> \\>4 &*$(&*Q9 !"$(Q9||Q9||Q9 ||Q9 ||d"	Q9
 llT!Q9 t#Q9 Q9 Q9 Q9 Q9 4<Q9 Q9 TzQ9 5<<u||U\\ABQ9h 
+r23
 &*$(&*9 !"$(9||9||9 ||9 ||d"	9
 llT!9 t#9 9 9 9 9 4<9 9 Tz9 5<<u||U\\AB9 49r$   