
    9j                     x   d dl mZ d dlmZmZ d dlZd dlZd dlmZm	Z	 d dl
mZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZmZ dd	lm Z  	 	 	 	 d4d
ddddddee!   dee!   dee!   de!de"dee!   dz  de#d   dz  ded   fdZ$	 	 	 d5d
dddddde#d   dz  ded   de"fdZ%d Z& G d de      Z' G d de      Z( G d de      Z) G d  d!e      Z* G d" d#e      Z+ G d$ d%e      Z, G d& d'e      Z- G d( d)e      Z. G d* d+e      Z/ G d, d-e      Z0 G d. d/e      Z1 G d0 d1e      Z2 G d2 d3e      Z3y)6    )Sequence)AnyOptionalN)make_channels_last_strides_for
StrideType
OrderedSet   )ExternKernelAllocFixedLayoutFlexibleLayoutget_device_typeir_node_to_tensorIRNode is_contiguous_storage_and_layoutLayoutmay_convert_to_optionalMultiOutputMultiOutputLayoutMutationOutput
NoneLayout	TensorBox)convert_shape_to_inductorpad_listlikeSUPPORTED_MKLDNN_DEVICES)Vxr   weightbiaspaddingstridedilationgroups
transposedoutput_paddingquantize_argsotherc                 j   d }dd}d }|j                          |j                          ||j                          t        j                  j                  5  t	        |      }t	        |      }t        |j                               dz
  }dt        |      cxk  r|k  sJ  J dt        |      cxk  r|k  sJ  J dt        |      cxk  r|k  sJ  J t        ||      }t        ||      }t        ||      }|	t        dg|      }	n%dt        |	      cxk  r|k  sJ  J t        |	|      }	t        |t        t        j                  j                  j                  f      sJ |r( |||      }|j                         } |||||	|||      }n|t        |j                        }t        |j                        }t        |      t        |      k7  r/t        |      dk(  rt        |      dk(  sJ |j!                  d        ||||||      }dgt        t#        t%        d	t        |      d	z                     z   }t        |      g|z   }ddd       | j'                  |      }t)        d
 D               }|st+        |      dk(  r!t-        |      rt/        j0                  |      }nEt+        |      dk(  r,|j3                         d   dk(  rt/        j0                  |      }nt5        |      }t+        |      t+        |      k(  sJ t+        |      t6        v sJ |g}|
X|
\  }}}}|j                          |j                          |j                          |j                          |||gz   |gz   ||gz   }n||gz  }|*| j'                  ||      }t        |t8              sJ ||gz  }t;        |j=                         |j?                         tA        |      tA        |            }||||g} |r| jC                  d	|	       ||jE                  |       n| jC                  d|       || |||fS # 1 sw Y   xY w)a}  
    This function is a helper function to prepare inputs, layout and constant args
    for convolution post-op fusion's create function, including deciding the output
    layout (channels first or channels last), realizing inputs and make them etc. The
    function only supports the CPU/XPU device since conv post-op fusion kernel is only
    supported on CPU/XPU right now.
    c                    t        |       t        |      k(  sJ d       t        |       }|dkD  sJ d       d}d}	g }
|
j                  | |          |
j                  ||	   |z         t        d|      D ]P  }||   dz
  ||dz
     z  dz   }| |   dz
  ||dz
     z  ||dz
     dz  z
  |z   ||dz
     z   }|
j                  |       R t        t	        t
        |
            S )NzExpect input dim == weight dim   zExpect input dim > 2r   r
   )lenappendrangelistmapint)output_sizeweight_sizer    r%   r!   r"   r#   dim	BATCH_DIMWEIGHT_INPUT_CHANNELS_DIM
input_sizedkernelinput_size_ds                 Y/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/_inductor/mkldnn_ir.py_conv_input_sizez<_prepare_convolution_fusion_create.<locals>._conv_input_size6   s!    ;3{#33U5UU3+Qw...w	$%!
+i01+&?@6IJq# 	,A!!nq(HQUO;a?FQ!#va!e}41q5>A%' !Q'(  l+	, CZ())    Nc                 0   |d u}t        |       }g }|j                  | d          |j                  |d          t        d|      D ]M  }|r||dz
     nd}	|	||   dz
  z  dz   }
| |   d||dz
     z  z   |
z
  ||dz
     z  dz   }|j                  |       O |S )Nr   r*   r
   )r+   r,   r-   )r6   r2   r    r!   r"   has_dilationr3   r1   r7   	dilation_r8   output_size_ds               r:   _conv_output_sizez=_prepare_convolution_fusion_create.<locals>._conv_output_sizeN   s    t+*o:a=);q>*q# 	.A+7QQI+a.1"459F']a'!a%..@AFJvAP M }-	. r<   c                 L   | j                         t              }|dkD  sJ d       |dkD  rVg }|j                  d   |z         |j                  d   |z         |j                  fdt	        d|      D               |S | j                  dd      j                         }|S )Nr*   zExpect weight dim > 2r
   r   c              3   (   K   | ]	  }|     y wN ).0r7   prepacked_weight_sizes     r:   	<genexpr>z[_prepare_convolution_fusion_create.<locals>._original_deconv_weight_size.<locals>.<genexpr>m   s     OA4Q7Os   )sizer+   r,   extendr-   	transpose)prepacked_weightr#   r3   r2   rG   s       @r:   _original_deconv_weight_sizezH_prepare_convolution_fusion_create.<locals>._original_deconv_weight_sizeb   s     !1 5 5 7'(Qw///wA:K4Q7&@A4Q7&@AOq#OO  +44Q:??AKr<   r*   r         r
   c              3   <   K   | ]  }t        |t                y wrD   )
isinstancer0   )rF   is     r:   rH   z5_prepare_convolution_fusion_create.<locals>.<genexpr>   s     GAZ3/Gs   xpurD   )#realizer   graph	fake_moder   r+   rI   r   rQ   r0   sympycorenumbersIntegerr.   shapepopreversedr-   require_stride_orderallr   r   r   contiguous_strides
get_strider   r   r   r   get_device_or_error	get_dtyper   insertr,   )!clsr   r   r   r    r!   r"   r#   r$   r%   r&   r'   r;   rA   rM   x_fakeweight_fakedimsr2   r6   r1   x_shapeweight_shapereq_stride_orderdynamic_shapesoutput_strideinputsx_scalex_zero_pointw_scalew_zero_pointkernel_layoutconstant_argss!                                    r:   "_prepare_convolution_fusion_createru      s9   .*0(  IIK
NN	
		 /F"1%'/6;;=!A%3w<'4'''''3x=(D(((((3v;&$&&&&&w-$/fd+!)1#t4Ns>*2d22222).$?N&3

(:(:(B(B"CDDD 7{FKKJ*K 6<<(G 1 12L7|s<007|q(S->!-CCC  #+K 3huQFa/H&I!JJ 0125EE_/Fb 	  $45A G+GGGN/!,5
*1
-$2$E$Ek$R 
	u	$):a)?&99+F6{C1!88881!9999SF 7D4w7L11VH<?VV6(((0@A%+++5'		!+.!-0	M fh7MQ/dQ%=-1A5HHW/F /Fs   G0P((P2
binary_sumc           
         |j                          |j                          ||j                          |j                         ^ }}|j                         \  }}	t        |      |	gz   }
t        t        t	        t        |j                                                 }| j                  ||      }t        |      t        |      k(  sJ t        |      t        v sJ |g}|X|\  }}}}|j                          |j                          |j                          |j                          |||gz   |gz   ||gz   }n||gz  }||r| j                  ||      }||gz   }t        j                  |
      }t        |j                         |j                         |
|      }g }||j                  |       n|j                  d|       |||||fS )z
    This function is a helper function to prepare inputs, layout and constant args
    for linear post-op fusion's create function. The function only supports the CPU device
    since linear post-op fusion kernel is only supported on CPU right now.
    r   )rT   get_sizer.   r]   r-   r+   r^   r   r   r   r`   r   
get_devicerc   r,   rd   )re   r   r   r   r&   r'   rv   m_ocr1   rk   rn   ro   rp   rq   rr   rm   rs   rt   s                       r:   _prepare_linear_fusion_creater}      s    IIK
NNJJLEQ OOEArq'RD.KHU3qzz|+<%=>?  $45A1!88881!9999SF 7D4w7L11VH<?VV6(,,U4DEE5'!"55kBM			M  "MdQ%=-1A5HHr<   c                     t        | j                         | g       }t        | j                               | _        |g| _        |S )Ndevice)r   
get_layoutr   ry   layoutoutputs)packed	output_irs     r:   _create_output_noder   #  sD    
I
 &V->->-@AFM[FNr<   c                        e Zd Z	 d	 d fdZ fdZedddddddee   d	ee   d
ee   dedee   dz  fd       Z	 xZ
S )ConvolutionUnaryNc           
          t        |d         | _        t        |   |||d t        j
                  j                  j                  j                  d| j                   d       y )Nr   aoti_torch__mkldnn__convolution_pointwiseop_overloadcpp_kernel_name)	r   device_typesuper__init__torchopsmkldnn_convolution_pointwisedefaultselfr   rn   rt   	__class__s       r:   r   zConvolutionUnary.__init__/  sa     +6!95		((??GG)$*:*:);;YZ 	 	
r<   c                 b    |j                  d| j                   d       t        |   |       y Nz&torch/csrc/inductor/aoti_torch/c/shim_z.hinclude_extra_headerr   r   codegenr   wrapperr   s     r:   r   zConvolutionUnary.codegen?  2    $$4T5E5E4FbI	
 	 r<   r   r   r   r   padding_stride_r?   r#   scalarsc           
          t        | |||||||      \  }}}}}||t        |	      |
gz   }t        |||      }t        |      S )Nr   rn   rt   )ru   r   r   r   )re   r   r   r   r   r   r?   r#   attrr   	algorithmrn   rt   rs   r{   r   s                   r:   createzConvolutionUnary.createE  sr    ( /FD(GY
	
 &#G,)
 

 " '

 #6**r<   rE   returnN__name__
__module____qualname__r   r   classmethodr.   r0   r   r   __classcell__r   s   @r:   r   r   .  s    
 	

 

 !  + +  + 	 +
 s) + c + 9 +  + cT! +  +r<   r   c                        e Zd Z	 	 d	 d fdZ fdZeddddddddd	ee   d
ee   dee   dedede	dz  dedz  dee
   dz  dedz  fd       Z xZS )ConvolutionBinaryNc           
          t        |d         | _        t        |   |||d t        j
                  j                  j                  j                  d| j                   d       || _	        y )Nr   r   %_mkldnn__convolution_pointwise_binaryr   )
r   r   r   r   r   r   r   r   binarycpp_constant_args)r   r   rn   rt   r   r   s        r:   r   zConvolutionBinary.__init__j  sk     +6!95		((??FF)$*:*:);;`a 	 	
 "3r<   c                 b    |j                  d| j                   d       t        |   |       y r   r   r   s     r:   r   zConvolutionBinary.codegen|  r   r<   r   r   r'   r   r   r   r   r?   r#   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmc           
          t        | |||||||      \  }}}}}| j                  ||      }|j                  d|       ||	|
|t        |      |gz   }t	        |||      }t        |      S )Nr
   r   )ru   r^   rd   r   r   r   )re   r   r'   r   r   r   r   r?   r#   r   r   r   r   r   rn   rt   rs   rk   r{   r   s                       r:   r   zConvolutionBinary.create  s    . /FD(GY
	

 ((0@Aa%#M2)
 
 # '

 #6**r<   )rE   rE   r   )r   r   r   r   r   r   r.   r0   strfloatr   r   r   r   s   @r:   r   r   i  s    
 3 
3$! (+(+ (+ 	(+
 (+ s)(+ c(+ 9(+ (+ (+ dl(+ $J(+ Cy4'(+ t(+ (+r<   r   c                        e Zd Z	 d	 d fdZ fdZdeej                     fdZe	ddddd	dd
dde
e   de
e   de
e   dedededz  dedz  de
e   dz  dedz  fd       Z xZS )ConvolutionBinaryInplacer   Nc           
         t        |d         | _        |d   |d   g|dd  z   }t        |   |||d t        j
                  j                  j                  j                  d| j                   d       t        t        |d   j                               |d   |       t        t        |d   j                               |d   |       g| _        y )Nr   r
   r*   r   &_mkldnn__convolution_pointwise_binary_r   r   )r   r   r   r   r   r   r   _convolution_pointwise_r   r   r   ry   mutation_outputs)r   rs   rn   rt   reordered_inputsr   s        r:   r   z!ConvolutionBinaryInplace.__init__  s     +6!95"1Ivay1F12J>		((@@GG)$*:*:);;ab 	 	
 :VAY-A-A-CDfQiQUV:VAY-A-A-CDfQiQUV!
r<   c                 b    |j                  d| j                   d       t        |   |       y r   r   r   s     r:   r   z ConvolutionBinaryInplace.codegen  r   r<   c                     t               S rD   r   r   s    r:   get_unbacked_symbol_defsz1ConvolutionBinaryInplace.get_unbacked_symbol_defs  
    |r<   r   r   r'   r   r   r   r   r?   r#   r   r   r   r   r   c           
         t        | |||||||      \  }}}}}| j                  ||      }|j                  d|       ||	|
|t        |      |gz   }t	        t        |d   j                               ||      }|j                  d   S )Nr
   r   )rs   rn   rt   r   )ru   r^   rd   r   r   r   ry   rn   )re   r   r'   r   r   r   r   r?   r#   r   r   r   r   r   rn   rt   r{   rk   r   s                      r:   r   zConvolutionBinaryInplace.create  s    . /FD(GY
	

 ((0@Aa%#M2)
 
 *$F1I,@,@,BC'
 }}Qr<   r   r   )r   r   r   r   r   r	   rW   Symbolr   r   r.   r0   r   r   r   r   r   r   s   @r:   r   r     s    
 	

 

0!*U\\*B  + +  +  	+ 
 +  s)+  c+  9+  +  +  dl+  $J+  Cy4'+  t+  + r<   r   c                        e Zd Z	 d	 d fdZ fdZedddddddee   d	ee   d
ee   dee   dedee   dz  fd       Z	 xZ
S )ConvolutionTransposeUnaryNc           
          t        |d         | _        t        |   |||d t        j
                  j                  j                  j                  d| j                   d       y )Nr   r   (_mkldnn__convolution_transpose_pointwiser   )	r   r   r   r   r   r   r    _convolution_transpose_pointwiser   r   s       r:   r   z"ConvolutionTransposeUnary.__init__   sa     +6!95		((IIQQ)$*:*:);;cd 	 	
r<   c                 b    |j                  d| j                   d       t        |   |       y r   r   r   s     r:   r   z!ConvolutionTransposeUnary.codegen  r   r<   r   r   r   r   r   output_padding_r   r?   groups_r   c                     d}t        | |||||||||
      \  }}}}}||	t        |
      |gz   }t        |||      }t        |      S )NTr   )ru   r   r   r   )re   r   r   r   r   r   r   r?   r   r   r   r   r$   rn   rt   rs   r{   r   s                     r:   r   z ConvolutionTransposeUnary.create  s     
 /
	
 &#G,)
 

 + '

 #6**r<   r   r   r   r   s   @r:   r   r     s    
 	

 

 ! ++++ ++ 	++
 s)++ c++ c++ 9++ ++ cT!++ ++r<   r   c                        e Zd Z	 d	 d fdZ fdZeddddddddddd	dd
ee   dee   dee   dededefd       Z	 xZ
S )QConvPointWisePT2Ec           
          t        |d         | _        t        |      dk(  | _        t        |   |||dt        j                  j                  j                  j                  d| j                   d       y)a  
        if bias is not None
            - inputs = [x, w, b, weight_scale, weight_zp]
            - const_args is: [stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, weight_scale, weight_zp]
            - const_args is: [bias, stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        r      Nr   __qconv_pointwise_tensorr   )r   r   r+   has_biasr   r   r   r   onednnqconv_pointwisetensorr   s       r:   r   zQConvPointWisePT2E.__init__F  sq      +6!95Fq(		((88??)$*:*:);;ST 	 	
r<   c                     |j                  d| j                   d       t        |   |       t	        | j
                  t              r| j                  |       y y r   r   r   r   r   rQ   r   r   codegen_size_assertsr   s     r:   r   zQConvPointWisePT2E.codegena  S    $$4T5E5E4FbI	
 	 dkk6*%%g. +r<   qxr   ro   rp   qwrq   r   r!   r    r"   r#   output_scaleoutput_zero_pointc                 0   d}d }t        | ||||	||
|||||||g      \  }}}}}||d   |d   c|d<   |d<   n|d   |d   c|d<   |d<   |||||t        |      |gz   }|J |t        j                  t        j                  fv r||_        t        |||      S )NFr*   r
   r   r   )ru   r   r   float32bfloat16dtyper   )re   r   ro   rp   r   rq   rr   r   r!   r    r"   r#   r   r   output_dtyper   r   r   r$   r%   rn   rt   rs   r{   s                           r:   r   zQConvPointWisePT2E.createi  s   * 
 /lG\:
	
 <1>q1A=QRCS.M!mA.1>q1A=QRCS.M!mA.%#G,)
 
 '''EMM5>>:: #/M! '
 	
r<   r   r   )r   r   r   r   r   r   r.   r0   r   r   r   r   s   @r:   r   r   E  s    
 	

 

6/ B
B
 B
 "	B

 B
 B
 B
 S	B
 cB
 s)B
 B
 B
 B
 B
r<   r   c                        e Zd Z	 d	 d fdZ fdZdee   fdZdee	j                     fdZeddddd	dd
ddddddee   dee   dee   deddddfd       Z xZS )QConvPointWiseBinaryPT2Er   c           
          t        |d         | _        t        |      dk(  | _        d| _        t
        |   |||dt        j                  j                  j                  j                  d| j                   d       y)ag  
        Needs input/weight/output qparams
        if bias is not None
            - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum, b]
            - const_args = [stride, padding, dilation, groups, o_scale, o_zp,
            output_dtype, accum_scale, accum_zp, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum]
            - const_args [b, stride, padding, dilation, groups, o_scale, o_zp,
             output_dtype, accum_scale, accum_zp, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
        r         Nr   !__qconv2d_pointwise_binary_tensorr   )r   r   r+   r   idx_for_inplace_sumr   r   r   r   r   qconv2d_pointwisebinary_tensorr   s       r:   r   z!QConvPointWiseBinaryPT2E.__init__  sy    " +6!95Fq(#$ 		((::HHd..//PQ 	 		
r<   c                     |j                  d| j                   d       t        |   |       t	        | j
                  t              r| j                  |       y y r   r   r   s     r:   r   z QConvPointWiseBinaryPT2E.codegen  r   r<   c                 :    | j                  | j                        gS rD   )
input_namer   r   s    r:   get_mutation_namesz+QConvPointWiseBinaryPT2E.get_mutation_names  s     8 89::r<   c                     t               S rD   r   r   s    r:   r   z1QConvPointWiseBinaryPT2E.get_unbacked_symbol_defs  r   r<   r   r   ro   rp   r   qaccumr   r!   r    r"   r#   r   r   c                    d}d }t        | ||||
|	||||||||g|      \  }}}}}||d   |d   c|d<   |d<   n|d   |d   c|d<   |d<   |||||||||t        |      |g
z   }|dk(  sJ d       t        j                  j	                  |j                                t        t        |j                               ||      }|j                  |j                     S )	NFr*   r
   r   sumzCFor now, only post op sum is supported in QConvPointWiseBinaryPT2E.r   r   )ru   r   r   rU   mark_buffer_mutatedget_namer   r   ry   rn   r   )re   r   ro   rp   r   rq   rr   r   r   r!   r    r"   r#   r   r   r   accum_scaleaccum_zero_pointr   alphar   r   r   r$   r%   rn   rt   _kernel_layoutrk   r   s                                 r:   r   zQConvPointWiseBinaryPT2E.create  sA   4 
 /lG\:
	
" <1>q1A=QRCS.M!mA.1>q1A=QRCS.M!mA.%#M2)
 
 e# 	
Q	
# 	
##FOO$56)V%6%6%89'
 }}V7788r<   r   r   )r   r   r   r   r   r   r   r   r	   rW   r   r   r   r.   r0   r   r   r   s   @r:   r   r     s    
 	

 

>/;HSM ;*U\\*B  O9O9 O9 "	O9
 O9 O9 O9 S	O9 cO9 s)O9 O9 "O9 'O9 O9r<   r   c                   @     e Zd Z	 d	 d fdZ fdZed        Z xZS )MKLPackedLinearc                     t         |   |||d t        j                  j                  j
                  j                         y N)r   )r   r   r   r   mkl_mkl_linearr   r   s       r:   r   zMKLPackedLinear.__init__1  s:     			1199 	 	
r<   c                 F    |j                  d       t        | 	  |       y Nz+torch/csrc/inductor/aoti_torch/c/shim_cpu.hr   r   r   r   s     r:   r   zMKLPackedLinear.codegen?  s    $$%RS r<   c                    | j                  | j                  |            }| j                  | j                  |            }|j                         ^ }}|j                         \  }}t        |      |gz   }	t	        j
                  |	      }
|||g}|g}|||gz  }n|j                  dd        |j                         }|J t        t        ||j                         |	|
      ||      S )Nr   r   )require_stride1realize_inputrx   r.   r   r`   rd   ry   r  r   rc   )re   r   packed_worig_wB
batch_sizerz   r{   r|   r1   rm   rn   rt   r   s                 r:   r   zMKLPackedLinear.createC  s     1 1! 45$$S%6%6v%>?

A!A1gn&99+FXv&#=qcMF  D)!!!vq{{}k=Q'
 	
r<   r   r   r   r   r   r   r   r   r   r   r   s   @r:   r  r  0  s0    
 	

 

! 
 
r<   r  c                   F     e Zd Z	 d	 d fdZ fdZed        Zd Z xZS )LinearUnaryc           
          t        |d         | _        t        |   |||d t        j
                  j                  j                  j                  d| j                   d       y )Nr   r   __linear_pointwiser   )	r   r   r   r   r   r   r   _linear_pointwiser   r   s       r:   r   zLinearUnary.__init__\  sa     +6!95		((::BB)$*:*:);;MN 	 	
r<   c                 b    |j                  d| j                   d       t        |   |       y r   r   r   s     r:   r   zLinearUnary.codegenl  r   r<   c                 "   | j                  | j                  |            }| j                  | j                  |            }|j                         ^ }}|j                         \  }	}t        |      |	gz   }
||g}||r|ndg|g}|2| j                  | j                  |            }|j	                  |       n|j                  dd        |j                         }|J t        t        ||j                         |
      ||      }t        |      S )Nr   r   r   rI   r   )require_contiguousr  rx   r.   r,   rd   ry   r  r   rc   r   )re   r   wr  r   r   r   rz   _icr|   r1   rn   rt   r   r   s                  r:   r   zLinearUnary.creater  s	   ""3#4#4Q#78""3#4#4Q#78**,C**,C1gnQ'wtYG=&&s'8'8';<AMM!  D)!!!kkm 
 '
 #6**r<   c                      y rD   rE   r   s    r:   apply_constraintzLinearUnary.apply_constraint      r<   r   r   )	r   r   r   r   r   r   r   r%  r   r   s   @r:   r  r  [  s5    
 	

 

 ! + +:r<   r  c                   J     e Zd ZdZ	 d	 d fdZ fdZed        Zd Z xZ	S )LinearBinaryz)torch.ops.mkldnn._linear_pointwise.binaryc           
          t        |d         | _        t        |   |||d t        j
                  j                  j                  j                  d| j                   d       y )Nr   r   __linear_pointwise_binaryr   )	r   r   r   r   r   r   r   r  r   r   s       r:   r   zLinearBinary.__init__  sa     +6!95		((::AA)$*:*:);;TU 	 	
r<   c                 b    |j                  d| j                   d       t        |   |       y r   r   r   s     r:   r   zLinearBinary.codegen  r   r<   c                 V   | j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }|j                         ^ }}|j                         \  }}t        |      |gz   }	|||g}
|g}|2| j                  | j                  |            }|
j	                  |       n|j                  d|       |j                         }|J t        t        ||j                         |	      |
|      }t        |      S )Nr   r   r   )r!  r  rx   r.   r,   rd   ry   r(  r   rc   r   )re   r   yr"  r  r   rz   r#  r|   r1   rn   rt   r   r   s                 r:   r   zLinearBinary.create  s   ""3#4#4Q#78""3#4#4Q#78""3#4#4Q#78**,C**,C1gnQ=&&s'8'8';<AMM!  A&!!!kkm 
 '
 #6**r<   c                      y rD   rE   r   s    r:   r%  zLinearBinary.apply_constraint  r&  r<   r   r   )
r   r   r   r8   r   r   r   r   r%  r   r   s   @r:   r(  r(    s:    8F 	

 

 ! + +:r<   r(  c                   h     e Zd Z	 	 d	 d fdZ fdZeddddddddddd	dd
ddedefd       Z xZ	S )QLinearPointwisePT2Ec           
          t        |d         | _        || _        t        |   |||dt
        j                  j                  j                  j                  d| j                   d       y)a  
        if bias is not None
            - inputs = [x, w, b, weight_scale, weight_zp]
            - const_args is: [x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, weight_scale, weight_zp]
            - const_args is: [bias, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        r   Nr   __qlinear_pointwise_tensorr   )
r   r   r   r   r   r   r   r   qlinear_pointwiser   r   r   rn   rt   r   r   s        r:   r   zQLinearPointwisePT2E.__init__  sh    " +6!95 ));;BBd..//IJ 	 		
r<   c                     |j                  d| j                   d       t        |   |       t	        | j
                  t              r| j                  |       y y r   r   r   s     r:   r   zQLinearPointwisePT2E.codegen  sS    $$4T5E5E4FbI	
 	 dkk6*%%g. +r<   r   r   ro   rp   r   rq   rr   r   r   r   c           
          t        | |||||||g      \  }}}}}|||	|
|t        |      |gz   }|
J |
t        j                  t        j                  fv r|
|_        t        ||||d u      S )Nr   rn   rt   r   )r}   r   r   r   r   r   r0  )re   r   ro   rp   r   rq   rr   r   r   r   r   post_op_namepost_op_argspost_op_algorithmrn   rt   rs   r{   s                     r:   r   zQLinearPointwisePT2E.create  s    " 8UlG\:8
4q! &#L1)
 
 '''EMM5>>:: #/M# '$&	
 	
r<   rE   Tr   )
r   r   r   r   r   r   r   r0   r   r   r   s   @r:   r0  r0    s    
 
 

</ ,
,
 ,
 "	,

 ,
 ,
 ",
 ,
 ,
 ,
 ,
r<   r0  c                   ~     e Zd Z	 	 d	 d fdZ fdZdee   fdZeddddddd	dd
dddddddde	de
fd       Z xZS )QLinearPointwiseBinaryPT2Er   c           
          t        |d         | _        || _        d| _        t        |   |||dt        j                  j                  j                  j                  d| j                   d       y)a  
        if bias is not None
            - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2, bias]
            - const_args is: [o_scale, o_zp,
              fp32_output, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2]
            - const_args is: [bias, o_scale, o_zp,
              fp32_output, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
        r   r   Nr   !__qlinear_pointwise_binary_tensorr   )r   r   r   r   r   r   r   r   r   r3  r   r4  s        r:   r   z#QLinearPointwiseBinaryPT2E.__init__(  sp    " +6!95 #$ ));;II)$*:*:);;\] 	 	
r<   c                     |j                  d| j                   d       t        |   |       t	        | j
                  t              r| j                  |       y y r   r   r   s     r:   r   z"QLinearPointwiseBinaryPT2E.codegenE  r   r<   c                     | j                   d   }|dk(  r<| j                  | j                     }t        |t              sJ |j                         gS g S )Nr   )rt   rn   r   rQ   r   r  )r   binary_post_opinputs      r:   r   z-QLinearPointwiseBinaryPT2E.get_mutation_namesM  sS    ++B/U"KK 8 89EeV,,,NN$%%Ir<   r   r   ro   rp   r   rq   rr   r'   r   r   r   c                    t        | |||||||g||dk(        \  }}}}}||	|
||||||t        |      |g
z   }|dk(  rot        j                  j	                  |j                                t        t        |j                               |||d u      }|j                  |j                     S |J |t        j                  t        j                  fv r||_        t        ||||d u      S )Nr   r   r7  )r}   r   r   rU   r  r  r=  r   ry   rn   r   r   r   r   r   )re   r   ro   rp   r   rq   rr   r'   r   r   r   r   other_scaleother_zprC  r   unary_post_opunary_post_op_argsunary_post_op_algorithmrn   rt   rs   rk   r   s                           r:   r   z!QLinearPointwiseBinaryPT2E.createV  s$   8 *lG\:e#
	
 &#$67#)
 
 U"GG''(89/!)9)9);<+d*	F ==!;!;<<'''EMM5>>:: #/M) '$&	
 	
r<   r;  r   )r   r   r   r   r   r   r   r   r   r   r0   r   r   r   s   @r:   r=  r=  '  s    
 
 

:/HSM  H
H
 H
 "	H

 H
 H
 "H
 H
 H
 H
 H
 H
r<   r=  c            !            e Zd Z	 d	 d fdZeddddddddddddd	dd
edee   dededededededef d       Z fdZ	 xZ
S )MkldnnRnnLayerc                     t         |   |||d t        j                  j                  j
                  j                         y r
  )r   r   r   r   atenmkldnn_rnn_layerr   r   s       r:   r   zMkldnnRnnLayer.__init__  s:     			77?? 	 	
r<   r   r   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                    | j                  | j                  |            }|j                          | j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }|j                          | j                  | j                  |            }|j                          |j                         }t	        |      dk(  sJ d       |\  }}}|||g}|j                         }|j                         }|||||||g}||	|
||||||g	}|j                         }|J t        t        |      ||      }d }|||dgg} |||      t        j                  |      t        j                  |      dgg}t        t        ||            D  cg c]D  \  }\  }} t        t        |j                         |j                         ||       |t        |fg      F }!}}} |!|_        |!S c c} }}w )NrN   zExpect lstm input to be 3Dr   )rn   rt   c                 V    t        |       dk(  sJ d       t        j                  |       S )NrN   zExpect output_shape to be 3D)r+   r   r`   )output_shaper]  s     r:   get_strides_of_lstm_outputz9MkldnnRnnLayer.create.<locals>.get_strides_of_lstm_output  s,    |$)I+II)!44\BBr<   r
   )r  r  freeze_layoutrx   r+   ry   rL  r   r   r`   	enumeratezipr   r   rc   tupler   )"re   r   rP  rQ  rR  rS  rT  rU  rV  rW  rX  rY  rZ  r[  r\  r]  r^  r6   
seq_length
mini_batchra  hy_shapecy_shapern   rt   r   r   rb  output_sizesoutput_stridesrR   r1   rm   r   s"                                     r:   r   zMkldnnRnnLayer.create  sn   *  1 1! 45 	
  !2!22!67  !2!22!67  !2!22!67  !2!22!67  !2!22!67
  !2!22!67
ZZ\
:!#A%AA# .8*
J
"J<;;=;;=RRR,

 !!!V,'
	C %h1#>&|[A--h7--h7C	
" 4=L.14
 
 0/K LLNKKM!	 	
	 
 ##
s   8A	Ic                 D    |j                  d       t        | 	  |      S r  r  r   s     r:   r   zMkldnnRnnLayer.codegen  s!    $$%RSww''r<   r   r   )r   r   r   r   r   boolr.   r0   r   r   r   r   s   @r:   rL  rL    s    
 	

 

 dd d 	d
 d d d d d #Yd d d d d d  !d" #d dL( (r<   rL  c                   R     e Zd Z	 d	 d fdZ fdZe	 	 	 	 	 	 	 	 dd       Z xZS )WeightInt4PackMatmulc                     t        |      dk(  sJ t        |      dk(  sJ t        | 	  |||dt        j                  j
                  j                  j                  d       y)zY
        inputs = [x, w, qGroupSize, qScalesAndZeros]
        constant_args = ()
        rO   r   N-aoti_torch_cpu__weight_int4pack_mm_cpu_tensorr   )r+   r   r   r   r   	quantizedint4mm_packed_weight_cpur   r   s       r:   r   zWeightInt4PackMatmul.__init__  sd     6{a=!Q&&&,,EEMML 	 	
r<   c                     |j                  d       t        | 	  |       t        | j                  t
              r| j                  |       y y r  )r   r   r   rQ   r   r   r   r   s     r:   r   zWeightInt4PackMatmul.codegen4  s?    $$%RS dkk6*%%g. +r<   c                    ||||g}|j                         ^ }}|j                         \  }}t        |      |gz   }	t        j                  |	      }
t	        |j                         |j                         |	|
      }t        ||      S )N)r   rn   )rx   r.   r   r`   r   ry   rc   rp  )re   r   r"  
qGroupSizeqScalesAndZerosrn   rz   r{   nr1   rm   rs   s               r:   r   zWeightInt4PackMatmul.create;  s     Q
O4

Azz|11gm&99+F#LLNKKM	
 $ 
 	
r<   r   r   )r   r   r"  r   rw  r   rx  r   r  r   s   @r:   rp  rp    sV    
 	

 

*/ 

 
  	

 %
 
r<   rp  )FNNN)NNF)4collections.abcr   typingr   r   rW   r   torch._prims_commonr   r   torch.utils._ordered_setr	   irr   r   r   r   r   r   r   r   r   r   r   r   r   r   utilsr   r   r   virtualizedr   r0   rn  r.   ru   r}   r   r   r   r   r   r   r   r  r  r(  r0  r=  rL  rp  rE   r<   r:   <module>r     s   $     J /     U T  +/.2#'BIBI BI 	BI
 c]BI SMBI smBI BI BI SMD(BI $t+BI K BIT /3#'<I<I <I 	<I
 $t+<I K <I <I~8+( 8+vB+) B+JN 0 N bC+ 1 C+Lg
* g
T~90 ~9B(
' (
V6# 6r8$ 8vU
, U
px
!2 x
vx(& x(x3
, 3
r<   