
    9j                   J   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZmZmZ d dlZd dlmZ d dlZd dlZd dlZd dlmZmZ d dlmZ d d	lmZmZ d d
lm Z  d dl!m"Z"m#Z#m$Z$ d dl%m&Z& d dlm'Z'm(Z( d dl)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3m4Z4 d dl5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZC d dlDmEZE d dlFmGZG ddlHmIZImJZJmKZK ddlLmMZMmNZNmOZOmPZPmQZQmRZRmSZSmTZT ddlUmVZVmWZWmXZXmYZY ddlZm[Z[ ddlJm\Z\m]Z]m^Z^m_Z_m`Z`maZambZbmcZcmdZdmeZemfZfmgZgmhZh ddlimjZjmkZkmlZlmmZmmnZnmoZompZpmqZqmrZrmsZsmtZtmuZumvZv dd lwmxZx dd!lymzZz dd"l{m|Z| dd#l}m~Z~mZmZmZmZmZmZmZmZmZmZmZmZ dd$lmZmZ er;d d%lmZmZmZmZ d d&lmZ d d'lmZ d d(lmZ d d)lmZ dd*lmZ dd+lmZ dd,lmZ eeOz  Zd d-lmZ  ejF                  e      ZejL                  jO                  ed.      ZejR                  jT                  Z ejV                         Z eIjZ                         rd d/lmZ d d0lmZ nd>d1Zd>d2Zd?d3Zd@d4Z	 	 	 	 	 	 dAd5ZdBd6Z	 	 	 	 dCd7Z	 	 	 	 	 	 dDd8ZdEd9Z G d: d;ejr                  jt                        Z G d< d=e      Zy)F    )annotationsN)defaultdict)contextmanager)AnyNoReturnTYPE_CHECKING)Expr)deviceTensor)get_decompositions)defakedynamo_timed)FakeScriptObject)is_opaque_reference_typeis_opaque_typeis_opaque_value_type)get_layout_constraint_tag)
LazyStringtrace_structured)compute_required_storage_lengthmake_channels_last_strides_for)
FakeTensor)full_aoti_runtime_assert)BackwardState)magic_methodsmethod_to_operator)_get_placeholder_exprfree_unbacked_symbolshas_free_symbolsresolve_unbacked_bindingsRuntimeAssertShapeEnvSympyBooleanSymTypes)Node)_is_view_op)no_dispatch)
OrderedSet)int_oo   )configirmetrics)BackendFeatureDeviceOpOverridesFileBackedGraphModuleget_backend_featuresget_device_op_overridesget_wrapper_codegen_for_deviceinit_backend_registrationWorkspaceArg)CppWrapperCodegenErrorLoweringExceptionMissingOperatorWithDecompMissingOperatorWithoutDecomp)count_flops_fx)assign_origin_nodeConstantDonatedBufferFixedLayoutget_device_typeGraphPartitionSignatureInputBuffer	Pointwise	ReductionShapeAsConstantBuffer
StorageBox	TensorBoxTorchBindObject)constrain_to_fake_tensorsconstrain_to_fx_stridesFALLBACK_ALLOW_LISTfallback_handler%fallback_node_due_to_unsupported_type	loweringsmake_fallbackmaybe_layout_constraintsneeds_realized_inputsrequire_contiguoustag_to_layout_constraintunsupported_output_tensoruser_lowerings)autotune_cache)AutotuneCacheBundler)SizeVarAllocator)convert_shape_to_inductorgather_origins get_cloned_parameter_buffer_nameget_donated_idxsget_sympy_Expr_dtypeGraphPartitionMapis_same_tensor#maybe_get_suppress_shape_guards_ctxnormalize_nameshould_assume_input_alignedshould_fallback_by_defaultSUPPORTED_MKLDNN_DEVICESValueWithLineMap)NullHandlerV)CallableIterableIteratorSequence)
ModuleType)_EffectType)GraphModule)Graph)PythonWrapperCodegen)Dep)BaseSchedulerNode)output_code_log
perf_hints) save_triton_kernel_perf_artifact)log_module_codec                      y N argskwargss     U/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/_inductor/graph.pyru   ru              c                      y rw   rx   ry   s     r|   rt   rt      r}   r~   c                   t        | t        j                  t        j                  t        j                  j
                  j                  f      sJ d       t        | t        j                  j
                  j                        rt        j                  S t        | t        j                        rt        |       S | j                  rt        j                  S | j                  rt        j                  S y )Nzgget_constant_buffer_dtype only supports input of sympy.Symbol, sympy.Expr or sympy.core.numbers.Integer)
isinstancesympySymbolr	   corenumbersIntegertorchint64r\   
is_integeris_floatfloat32)constant_buffers    r|   may_get_constant_buffer_dtyper      s    %,,

EJJ4F4F4N4NO  	r 
 /5::#5#5#=#=>{{/5::.#O44!!{{		!	!}}r~   c                6    t        d t        D              }| |v S )Nc              3  2   K   | ]  }t        |        y wrw   )r   ).0ms     r|   	<genexpr>z"is_magic_method.<locals>.<genexpr>   s     HQ-a0Hs   )r(   r   )op	magic_opss     r|   is_magic_methodr      s    H-HHI?r~   c           	         |j                  d      }| }t        |      D ]=  \  }}t        ||      s t        ddj	                  |d |              t        ||      }? |S )N.z#Node referenced nonexistent target )split	enumeratehasattrRuntimeErrorjoingetattr)objtargettarget_atomsattr_itriatoms         r|   getattr_recursiver      sv     <<$LH\* +4x&5chh|BQ?O6P5QR  8T*+ Or~   c                b   i }| j                  d      d   }d|j                  vr|S t        |j                  d   t        j
                  j                        s|j                  d   }n|j                  }t        |      D ],  \  }}||j                  d   v s|j                  d   |   ||<   . |S )Noutputr   r   user_visible_output_idxsoriginal_output_strides)
find_nodesmetar   rz   r   fxr%   r   )gretoutput_nodeoutput_node_argsidxnodes         r|   get_user_visible_output_stridesr      s    ')C,,(,+A.K!)9)99
k&&q)588==9&++A.&++/0 I	T+""#=>>#(()BCCHCII Jr~   c                   i | }g |j                         }t        g |      }|r|j                         }t        |j                        r|j
                  rxt        |j
                  d   t        j                  j                        rG|j
                  d   }||vr4|j                  |d       |j                  |       |j                  |       |r|S )zc
    Extend user_visible_output_strides to include view ops that lead to user-visible outputs.
    r   N)keysr(   popr&   r   rz   r   r   r   r%   
setdefaultaddappend)user_visible_outputsresultqueuevisitedcurrentbases         r|   "extend_user_visible_output_stridesr      s     ":$8!9FfkkmE5"G
))+'7<<?EHHMM:<<?D7"!!$-D!T"  Mr~   c                   t         j                  syt        |      }t        t        j
                  t        j                  t        j                  g      }t        t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                   t        j"                  t        j$                  t        j&                  g      }	 	 	 	 dd}t)        | j*                        D ]T  }t-        |j.                  t0        j2                  j4                  j6                        rd|j8                  d<   Lt-        |j.                  t0        j:                  j<                        rJt?        |j.                        t0        j@                  jB                  jD                  k(  rd|j8                  d<    ||      }|s||v rd|j8                  d<   |j8                  jG                  dd      r0|jH                  D ]!  } ||      }	|	s|	|vsd|j8                  d<   # t         jJ                  r@||v sFd|j8                  d<   W y)a  
    Nodes like convolution/convolution_backward want its input to be dense.
    If we pad their inputs, we result in extra calls to copy kernels!  On the other hand, padding usually helps reduction.

    The pass finds nodes that dislike padding. These are nodes that can be reached
    from a convolution/convolution_backward in the backward direction without
    going thru a reduction.
    Nc                    | j                   dk(  rZt        | j                  t        j                  j
                        r,t        | j                  d      r| j                  j                  S d S )Ncall_function_overloadpacket)r   r   r   r   _ops
OpOverloadr   r   )r   s    r|   _get_overload_packetz8mark_nodes_dislike_padding.<locals>._get_overload_packet   sU    
 ww/)4;;

(=(=>%67	 KK''	
 	
r~   Tdislike_paddingF)r   torch.fx.Nodereturnz"torch._ops.OpOverloadPacket | None)&r+   comprehensive_paddingr   r(   atenconvolutionconvolution_backward
_scaled_mmvar_meansummeanprodanyaminamaxminmaxargminargmaxscatter_reducereversednodesr   r   r   _higher_order_opstriton_kernel_wrapTritonKernelWrapperMutationr   r   r   r   _CTagneeds_exact_stridesgetall_input_nodespad_outputs)
r   user_visible_output_stridesextended_user_visible_nodesops_dislike_paddingops_like_paddingr   curr   priorprior_ops
             r|   mark_nodes_dislike_paddingr      s    ''"D## %%%OO	
 "MMHHIIIIHHIIIIHHHHKKKK	
"



	+

    /JJ##66RR
 +/CHH&' szz5::#8#89)#**5xx||//0 +/CHH&'!#&$$*.CHH&'88<<)51,, 9/6#3348EJJ019 !!c-H&H*.CHH&'A /r~   c                   t        t        j                  dd       Rt        t        j                  j                  dd       ,t	        | j
                  t        j                        rt        | j
                  j                        dkD  rt        | j
                  j                  d   d      r| j
                  j                  d   j                  D ]  }|j                  d   t        j                  j                  j                  j                  t        j                  j                  j                  j                  t        j                  j                  j                   j                  fv s y y)Nmkldnn_convolution_pointwiser   targetsTF)r   r   opsr   r   r   	functoolspartiallenrz   r   r   fnsr   defaultbinary_convolution_pointwise_)r   r   s     r|   is_mkldnn_convr   O  s    			8T*6EII$$&>EQt{{I$5$56  !A%DKK$$Q'3kk&&q)11 	Fzz!}		  77??		  77>>		  88??! 
 	 r~   c                      e Zd ZU ded<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dF	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dG fdZdHdZ	 	 	 	 dIdZ	 	 	 	 dJdZ	 	 	 	 dKd	Z	 	 	 	 dLd
Z		 	 	 	 	 	 dMdZ
dNdOdZdPdZej                  dQd       ZdRdZedSd       ZdTdZ	 	 	 	 	 	 	 	 dUdZdVdZdWdZdXdZedYd       Z	 	 	 	 dZdZd[dZ	 	 	 	 d\dZd]dZd^dZd_ fdZd`dZ dddadZ!dbd Z"dcd!Z#dWd"Z$ddd#Z%ded$Z&dfdgd%Z'dhd&Z(	 	 	 	 	 	 	 	 di fd'Z)e*jV                  dj fd(       Z,edkd)       Z-	 	 	 	 	 	 	 	 dld*Z.dmd+Z/dmd,Z0e*jV                  	 	 	 	 	 	 	 	 dn fd-       Z1dHd.Z2edod/       Z3edpd0       Z4	 	 	 	 	 	 	 	 	 	 	 	 dqd1Z5edrd2       Z6dsd3Z7dt fd4Z8	 	 	 	 	 	 dud5Z9dHd6Z:	 	 	 	 dv	 	 	 	 	 	 	 	 	 dwd7Z;	 	 	 	 dxd8Z<	 	 dyd9Z=dHd:Z>dyd;Z?dzd<Z@	 	 d{d=ZAdZBd>ed?<   d|d@ZCd|dAZD	 	 	 	 d}dBZEd~dCZFddDZGddEZH xZIS )GraphLoweringlist[ir.IRNode]graph_outputsNFc                z   t         |   |       || _        || _        ||n| j	                  ||	      | _        d| _        |	| _        |
| _        || _	        || _
        || _        || _        || _        d| _        d| _        |t!               }d| _        nd| _        || _        |j&                  j)                         | _        t-        t.        j0                            | _        t5        |      | _        g | _        i | _        i | _        d | _        t-               | _         |r|jB                  n	t-               | _!        |r|jD                  n	t-               | _"        d| _#        tI        t,              | _%        tI        t,              | _&        i | _'        g | _(        g | _)        |r|ni | _*        |rt-        |jW                               n	t-               | _,        |r|jZ                  ni | _-        |r|j\                  ni | _.        |j^                  ja                  dt-                     | _1        |r|jd                  ni | _2        i | _3        i | _4        i | _5        i | _6        t-               | _7        t-               | _8        t-               | _9        t-               | _:        i | _;        t-               | _<        t-               | _=        d | _>        d | _?        ddl@mA} t        j                         r|r|n|| _D        d | _E        i | _F        t-               | _G        g | _H        i | _I        tI        t              | _K        i | _L        t        j                         | _N        || _O        || _P        || _Q        || _R        i | _S        || _T        || _U        t        t              | _X        d | _Y        d | _Z        d | _[        d | _\        d | _]        | j
                  r| j                         n	t-               | __        t-        dg      | _`        t        |j                        | _c        t        |j                  | j                         d	| _e        d	| _f        g | _g        d | _h        i | _i        |j                         | _k        | j                  j]                         D ]  \  }}|| j\                  |<    | j                  je                         D ]  \  }}|| jd                  |<    | j                  j^                  ja                  d
i       | _m        ||j                  ni | _n        t                 t        j                  d       t              | _r        i | _s        t-               | _t        t-               | _u        i | _v        t-               | _w        t-               | _x        t-               | _y        t        j                         | _|        d| _}        t               | _        i | _        y )N)is_inferencer   FTcpumutated_named_buffers)extern_node_json_serializerzaten.convolution_backward  dynamo_flat_name_to_original_fqn)super__init__get_decomp_fnexample_inputsdecide_layout_opt
layout_optnum_channels_last_convr  is_backwardis_const_graphconst_wrapper_codeconst_kernel_codeconst_moduleinputs_to_check_defers_input_alignmentextra_tracebackr"   reuse_shape_env
_shape_envdeferred_runtime_assertscopyras_by_symbolr(   r   r   bound_unbacked_symbolsrW   sizevarsgraph_input_namesgraph_inputsgraph_inputs_originalpartition_mapszero_dim_cpu_tensor_listdevice_typesdevice_idxsdevice_typer   additional_buffer_depsadditional_star_depsbuffer_to_padded_sizebuffers
operationsconst_output_indexr   folded_constants	constantsnamed_buffersr   r   r  named_parameterstorchbind_constantsopaque_value_type_classesseen_subgraphsconstant_reprsremoved_operationsremoved_buffersremoved_inplace_buffersmutated_bufferssdpa_constraint_cachenever_reuse_buffersinplaced_to_remove
device_opswrapper_code&torch._inductor.extern_node_serializerr  r+   	is_fbcodeextern_node_serializercurrent_nodelistsmutated_inputsmutated_input_idxsname_to_bufferlistname_to_users
name_to_optimecreation_timenamecpp_wrapper
fx_wrapperrecord_multi_kernel_choicemulti_kernel_to_choiceaot_modegraph_idnext_post_grad_graph_counterpost_grad_graph_id	schedulerautotuning_inputsautotuning_mappingautotuning_gridscurrent_devicefind_nodes_prefer_channels_lastnodes_prefer_channels_last_warned_fallbackr   graphr   r   	cache_key
cache_pathcache_linemapdisable_cudagraphs_reasondevice_node_mapping__copy__orig_gmmoduler  allocated_constant_namer4   r   	lru_cacher1   effectful_opsunaligned_buffersno_fuse_buffer_namesbuffer_layout_constraintslow_precision_codegen_opsinvoke_quant_opsall_codegen_kernel_names	itertoolscountworkspace_idplaceholder_idxr[   bw_donated_idxsdep_size_hint_cache)selfgmr  	shape_envrP  rK  rO  r  r?  r  r  r  r+  r  r  r  rJ  r  rL  r
  r  kv	__class__s                          r|   r	  zGraphLowering.__init__g  se   . 	*, % '''F 	
 '(#(&,"4!2(.',$$ 
I#(D #'D # ..335 	 '1&>&@#(3,.QS=?">B9C%)5L%%:< 	 )5L$$*, 	 !BMC
# ALJ@W! <>"(*.0"4" 	
 " )..01 	 '3L"" 	 +7L&&B 	 @Bww{{#Z\@
" .:L))r 	
  	  ;=&68.03=<0:8B$0:=?"4>L 3=<-126V !&< #, 	# ,0+-
/9|-/46@KD@Q35!YY[	&$
 +6'68#  "&'?"@>B =ADH7; 48 7;ooD002:< 	' !+,G+H I+J288+T("288T-M-MN !  	 6:& GI -/[[]LL..0 	&DAq$%Dq!	&LL113 	)DAq'(D!!!$	)040@0@0D0D.1
- 5A4LL00RT 	$ 	"#$=I$7$7$=>R$S!;= 3=,5?\! EG&:D,&1; :D% &OO-  "/1 AC r~   c                8    | j                   j                          y rw   )r  freeze_runtime_assertsrt  s    r|   r{  z$GraphLowering.freeze_runtime_asserts=  s    ..0r~   c                2   | j                   r2t        |j                               t        |j                               fS ddlm}  |dt        | j                  j                               }| j                  j                  ||      \  }}}|D cg c]4  }t        |t        j                        r|j                  j                  n|6 }}|D cg c]4  }t        |t        j                        r|j                  j                  n|6 }	}||	fS c c}w c c}w )z
        Support dynamic shapes and dynamic strides by assigning variables
        to each dimension.  We duck-shape tensors, so if two tensors
        have the same size they get assigned the same symbolic variable.
        r   )ConstantSource__inductor_unknown_tensor_)r  rX   sizestridetorch._dynamo.sourcer~  r   r  backed_var_to_val,create_symbolic_sizes_strides_storage_offsetr   r   SymIntr   expr)
rt  exr~  sourcer  r  _r   r_sizer_strides
             r|   symbolic_sizes_stridesz$GraphLowering.symbolic_sizes_strides@  s     ,RWWY79R		:   < $,S1R1R-S,TUF LL	 NRRAu||!<!&&++!CRROUV!:a#>AFFKKAEVVx SVs   9D9Dc                    |j                         D cg c]  }t        j                  |       }}|j                         D cg c]  }t        j                  |       }}||fS c c}w c c}w )z+
        Primarily used to weights
        )r  r   r   r  )rt  r  r   r  r  s        r|   static_sizes_stridesz"GraphLowering.static_sizes_stridese  sZ     +-'')4Qa 44,.IIK8q%--"88V| 58s   A%A*c                P   t        |t        j                        r|j                  }t        |t        j                        r|j                  }t        |t        j
                        r1|j                  | j                  v r| j                  |j                     S |j                         S rw   )	r   r,   rF   datarE   ComputedBufferrJ  r(  get_size)rt  r   s     r|   get_allocation_sizez!GraphLowering.get_allocation_sizeo  sw     dBLL)99DdBMM*99DtR../		T777 --dii88==?"r~   c                    |j                         }| j                  |      }|j                  }|j                  }t	        |||      S rw   )
get_layoutr  r  offsetr   )rt  r   layoutr  r  r  s         r|   get_allocation_storage_sizez)GraphLowering.get_allocation_storage_size  sA     "''-.tVVDDr~   c                h    t        |t              sJ |       || j                  t        |            v S rw   )r   r.   r1   r?   )rt  r
   features      r|   has_featurezGraphLowering.has_feature  s4    
 '>2;G;2$33OF4KLLLr~   c                   ||f| j                   vrd}| j                  j                  |j                        }t	        |t
        j                        rd| j                   ||f<   y	 |j                         r)| j                  j                  |j                               r#|r|j                         }n|j                         }|| j                   ||f<   | j                   ||f   S # t        $ r Y -w xY w)zc
        Get the size hint for a dependency with caching to avoid expensive recomputation.
        r   )rs  r  r   rJ  r   r,   NonTensorObjhas_unbacked_symbolsr  all_unbacked_explicitly_hinted	get_numelnumbytes_hint
numel_hintKeyError)rt  depcount_bytesresinps        r|   get_dep_size_hintzGraphLowering.get_dep_size_hint  s     T%=%==C ##''1C#r/?@((#{);<002}}CCCMMOT"!//1!nn. <?D$$c;%78''k(:;;   	s   %AC# #	C/.C/c                8    | j                   x}r|S t        d      )NzNo current device)rX  r   rt  r
   s     r|   get_current_device_or_throwz)GraphLowering.get_current_device_or_throw  s$    (((6(M233r~   c              #  b   K   | j                   }|| _         	 d  || _         y # || _         w xY wwrw   )rX  )rt  r
   r   s      r|   set_current_devicez GraphLowering.set_current_device  s1     ##$	("'D%Ds   /# /	,/c                8    | j                   ry| j                  ryy)N	inferencebackwardforward)r  r  r|  s    r|   get_training_phasez GraphLowering.get_training_phase  s    r~   c                  t         j                  syt         j                  ry| j                  j                  D cg c]?  }|j
                  t        j                  j                  j                  j                  u s>|A }}| j                  j                  D ]  }t        |      s|j                  |       ! t        |      }|dk(  ryt        j                  j                  j                   r;t        j                  j                  j#                         rt%        d |D              ryt        t'        | j                  j                              d|z  k\  rt(        j+                  d       yt-        d |D              rt(        j+                  d       ydd	}dd
}dd}|rt/        t0              }|D ]@  }	t3        |	      }
|
 ||	      rd}n ||	      rd}n ||	      rd}nd}||xx   |
z  cc<   B t(        j+                  d       d}d}d}d}t5        |j7                               }|d   |z  |d   |z  z   |d   |z  z   |d   |z  z   }||k  }|st(        j+                  d||       |S t-        t9        ||            rt(        j+                  d       yt-        t9        ||            rt(        j+                  d       yt%        t9        ||            rt(        j+                  d       yyc c}w )zl
        Decide if we should enable layout optimization for this graph based on
        heuristics.
        FTr   c              3     K   | ]A  }d D ]:  }|j                   |   j                  d   j                  j                  t        v  < C yw)r   r*   valN)rz   r   r
   typerc   r   nr   s      r|   r   z2GraphLowering.decide_layout_opt.<locals>.<genexpr>  sR      !  s  '..337OOOs   AA	i,  z*Skipped layout opt because only a few convc              3  t   K   | ]0  }d D ])  }t        |j                  |   j                  d          + 2 ywr  )r   rz   r   r  s      r|   r   z2GraphLowering.decide_layout_opt.<locals>.<genexpr>  sE      

  QVVC[--e45
5
s   68zeSee perf regression with dynamic shape. Follow up in https://github.com/pytorch/pytorch/issues/102670c                    | j                   d   j                  d   }t        |t        j                        sJ | j                   d   dkD  xr |j                  d      dkD  S )Nr*   r  r  )rz   r   r   r   r   r  )r  meta_vals     r|   
is_groupedz3GraphLowering.decide_layout_opt.<locals>.is_grouped  sQ    vvay~~e,Hh55566":>:hmmA&6&::r~   c                   | j                   d   j                  d   j                  d      dz  | j                   d   j                  d   j                  d      k  xr. | j                   d   j                  d   j                  d      dkD  S )Nr*   r  r      rz   r   r  r  s    r|   is_in_out_channelz:GraphLowering.decide_layout_opt.<locals>.is_in_out_channel  sv    q	u%**1-1QVVAY^^E5J5O5OPQ5RR 6FF1INN5)..q1A5r~   c                    | j                   d   j                  d   j                  d      dk  xr. | j                   d   j                  d   j                  d      dk  S )Nr*   r  r   @   r  r  s    r|   is_small_channelz9GraphLowering.decide_layout_opt.<locals>.is_small_channel	  sT    q	u%**1-3 8FF1INN5)..q1R7r~   groupedsmallin_outr   zConv inputs meta not foundg|?5^?gtV?g333333?guV?zhSkipped layout opt in inference because weighted flops indicate slowdown, default: %d, channels last: %dzFSkip layout opt because found grouped convolution with >1 in_channels!zBSkip layout opt because some convolutions have smaller out_channelz>Skip layout opt because all convolution channels are too small)r  r   r   bool)r  r   r   r  )r+   layout_optimizationforce_layout_optimizationr\  r   r   r   r   r   r   r   r   r   r   backendsr   enabledis_availableallrE  logdebugr   r   floatr:   r   valuesmap)ru  r  r  
conv_nodesnconvr  r  r  flop_countsr   counted_flops	node_typeGROUPED_MULTIPLIERDEFAULT_MULTIPLIERIN_OUT_MULTIPLIERSMALL_MULTIPLIERtotal_flopsweighted_flopsdo_layout_opts                      r|   r  zGraphLowering.decide_layout_opt  s    ))++ xx~~
UYY^^5O5O5W5W)WA

 
  	%Aa !!!$	% JA: NN!!))%%224 #  
 tBHHNN#$e3IIBC 

 

 IIw 	;
		 ,7,>K" 8 .t 4 (d# )I%d+ 'I&t, (I )II&-7&8  		67
 "'!& %$k0023K I&);;g&)99:h'*;;< i(+==>  +k9M 		~"
 ! & s:z*+IIX  s$j12IIT  s#Z01IIVWg
s   ?K2;K2c                @    | j                   | j                    d| S |S )z2Prepend the given name with the graph name if any.r  )rJ  rt  rJ  s     r|   qualify_namezGraphLowering.qualify_namei  s&    99 ii[$((r~   c                    t        | ||| j                  | j                  | j                  | j                  | j
                  | j                  | j                  |      
      S )a  
        Make a subgraph of the current graph with all inherited parts, except
        the graph module (`gm`) and `example_inputs`.  The subgraphs are lowered
        separately and lifted into a separate function in the parent output
        wrapper code.  The subgraph name is qualified by the parent graph's
        name. Note that the lifting of subgraph is supported for python wrapper
        only. For cpp wrapper, we inline the subgraphs in the parent wrapper.
        )
parentru  r  rv  rK  rO  r?  r  r  rJ  )SubgraphLoweringr  rK  rO  r?  r  r  r  )rt  ru  r  subgraph_names       r|   make_subgraphzGraphLowering.make_subgrapho  sZ      )oo((]]#'#>#>**((""=1
 	
r~   c                   d}t         j                  j                  j                  j                  g}t        t                  }t        | j                  j                  j                        D ]  }|j                  t         j                  j                  j                  j                  u r|j                  |       ||}S|j                  |v rbt        |      r|j                  |       |j                  D ]  }||v s|j                  |          | j                  j                  j                  D ]B  }|||k(  r |S ||v s|j                  D ]"  }|j                  |v r|j                  |       $ D |S )aC  
        The rule to decide if an node prefer channels last is simple.
        1. if it's input/output of a convolution
        2. if one of its user prefers channels last

        We have rule 1 because cudnn runs a faster convolution kernel for channels last inputs;
        Rule 2 is also important. It makes sure that indirect inputs to convolution also prefers
        channels last.

        Consider the scenario: conv -> batch-norm -> relu -> conv
        Without rule 2, batch-norm output may use a contiguous layout. That will cause 2 extra copies:
        1. the output of batch-norm should be channels last initially since its input is a conv's output.
           Forcing the batch-norm's output to be contiguous results in the first copy
        2. The second conv's input is initially contiguous. This layout is propagated from the batch-norm's output.
           We need convert it to channels last layout which results in the second copy.
        With rule 2, we makes sure all the tensors in the chain uses channels last layout. So both copies
        can be saved.
        N)r   r   r   bmmr   r(   r%   r   rd  r\  r   r   r   r   r   users)rt  	last_convnodes_cannot_propagate
output_setr  users         r|   rY  z-GraphLowering.find_nodes_prefer_channels_last  sZ   & 	"')).."4"4"<"<!=%'
$++++112 	Axx599>>55===q!$ !Ixx11a q! :%NN1%	< ""(( 	)A$i  JGG )D{{&<< NN4()	) r~   c                    || j                   vr2| j                   j                  |       t        j                  d|       y y )NzUsing FallbackKernel: %s)r[  r   perf_hint_loginfor  s     r|   warn_fallbackzGraphLowering.warn_fallback  s:    t,,,!!%%d+94@ -r~   c                R   | j                   j                  |j                         |j                  %| j                  j                  |j                         t
        j                  j                  r7|| j                  vr(t
        j                  j                  | j                  |<   y y y rw   )	r#  r   r  indexr$  rf   r\  r@  ra  r  s     r|   add_device_infozGraphLowering.add_device_info  sy    fkk*<<#  .77F$2J2J$J/0ww/C/CD$$V, %Kr~   c                "    t         j                  S rw   )rf   	fake_moder|  s    r|   r  zGraphLowering.fake_mode  s    {{r~   c           	        || j                   v r| j                   |   S || j                  v r| j                  |   S || j                  v rzt        j                  j                  |   }t        j                  |t        j                  |j                  |j                  gt        j                  j                  |             S y NrJ  r  )rD  r  r-  rf   r\  r,   ConstantBufferr>   r
   dtyper  )rt  buffer_namer  s      r|   try_get_bufferzGraphLowering.try_get_buffer  s     $---&&{33$+++$$[11$..(77$$[1D$$ ~~KK./gg.J.J4.P  r~   c                    t        d      )Nz'Should not be called for the main graph)r   )rt  symbols     r|   add_symbol_graph_inputz$GraphLowering.add_symbol_graph_input  s    DEEr~   c                H    | j                  |      }||S t        d|       )Nz$Failed to find buffer matching name )r  r   rt  r  bufs      r|   
get_bufferzGraphLowering.get_buffer  s1     !!+.?JA+OPPr~   c                   || j                   v r| j                   |   j                  S t        | j                  d      r|| j                  j                  v ro| j                  j                  |   }|| j
                  v r| j
                  |   j                         S || j                  v r| j                  |   j                         S || j
                  v r| j
                  |   j                         S || j                  v r| j                  |   j                         S t        j                  d|      }|r | j                  |j                  d            S t        d|       )Nmutation_real_namez1(as_strided|reinterpret_tensor)\(([a-zA-Z0-9_]+),r*   could not find )r-  r  r   rT  r  rD  	get_dtyper  rematchgroupr  )rt  r  mutated_bufr   s       r|   r
  zGraphLowering.get_dtype  s7   $..(>>+.444 DNN$89t~~@@@..;;KHKd111**;7AACCd///((5??AA$---&&{3==??$+++$$[1;;==HHI;W>>!''!*--677r~   c                F   || j                   v r| j                   |   j                         S || j                  v r0| j                  |   }|j                         sy|j	                         S || j
                  v r| j
                  |   j	                         S t        d|       )Nr*   r	  )r-  numelrD  has_tensor_outputr  r  r  r  s      r|   r  zGraphLowering.get_numel  s    $..(>>+.4466$---%%k2C((*==?"$+++$$[1;;==677r~   c                ^    t        d      5  t        |   | cd d d        S # 1 sw Y   y xY w)NzGraphLowering.run)r   r  run)rt  rz   ry  s     r|   r  zGraphLowering.run  s-    -. 	&7;%	& 	& 	&s   #,c                   |j                   
J d|        t        |t        j                        sJ | j	                  dt        | j                               }| j                  j                  |       || j                  |<   ||_         |S )NzOperation registered twice: r   )	operation_namer   r,   	Operationr  r   r*  r   rG  )rt  r   rJ  s      r|   register_operationz GraphLowering.register_operation  s      (M,H*MM("bll+++  2c$//&:%;!<=r" " r~   set_namec                  | j                  dt        | j                               }| j                  j                  |       || j                  |<   |j                         }|St        |t        j                        r(|j                         r|t        j                  d      k(  s| j                  |       |r||_        |S )Nr  r  )r  r   r)  r   rD  
get_devicer   r,   r  is_zero_elementsr   r
   r  rJ  )rt  bufferr  rJ  r
   s        r|   register_bufferzGraphLowering.register_buffer&  s      3s4<<'8&9!:;F#$*D!""$ 62#4#45++-ell511   (FKr~   c                j    | j                  ddj                  |      z         }|| j                  |<   |S )Nlist_r  )r  r   rA  )rt  operation_namesrJ  s      r|   register_operation_listz%GraphLowering.register_operation_list:  s3      388O+D!DE*

4r~   c                &     d fd |       y )Nc                    t        | t        t        f      r| D ]
  } |        t        | t        j                        r4| j                         D ]   }j                  |   j                  |        " y y rw   )r   rE  tupler,   rF   get_read_namesrF  r   )valuex	read_nameregisterrt  s      r|   r*  z1GraphLowering.register_users_of.<locals>.register@  sm    %$/  AQK %.!&!5!5!7 @I&&y188?@ /r~   )r'  Iterable[ir.IRNode] | ir.IRNoder   Nonerx   )rt  node_outputr*  s   ` @r|   register_users_ofzGraphLowering.register_users_of?  s    	@ 	r~   c                    t        |t              sJ | j                  j                  |       || j                  vry| j                  |   D ]  }|j                           y)z
        When a buffer is mutated we need to make sure all the reads to
        the old version are realized before the mutation happens.
        N)r   strr7  r   rF  realize)rt  rJ  r  s      r|   mark_buffer_mutatedz!GraphLowering.mark_buffer_mutatedJ  s\    
 $$$$  &t)))&&t, 	DLLN	r~   c                    || j                   v r|| j                  v s
J d|z          t        | j                   |         }|| j                  j                  v r| j                  j                  |   S | j                  |   S )z
        In AOTI, module buffers may have been mutated during the tracing and compilation.
        Thus we need to read from previously stored original buffers, to make sure the
        generated model.so uses correct initial values.
        z$Can not find the original value for )re  r-  rZ   rd  r   )rt  rJ  	orig_names      r|   get_original_value_of_constantz,GraphLowering.get_original_value_of_constantX  s     t3338N 	
2T9	
N 5T5Q5QRV5WX	 DKK,,, KKY'	
 %	
r~   c                   t         j                  j                  s2| j                  j	                         D ]  \  }}t        ||      s|c S  |dt        | j                         }|}|d   j                         rd| }| j                  |      }t        |      }|}d}|| j                  v r| d| }|dz  }|| j                  v r|| j                  |<   |j                  d|j                  dt        |j                               dt        |j                               dt        |      d	| j                   |<   || j"                  |<   |S )Nconstantr   	constant_r  r*    r(  )r+   aot_inductoruse_runtime_constant_foldingr-  itemsr^   r   isdigitr  r`   r
   r  r%  r  r  hashr3  re  )rt  rJ  r  constant_namer'  r4  prefixcnts           r|   allocate_non_dup_const_namez)GraphLowering.allocate_non_dup_const_nameh  sX   ""??(,(<(<(> )$u!$.(() <c$..123D	7??tf%D  &  %dnn$XQse$D1HC dnn$  $t{{oQtzznATYY[!$AeDKKM&:%=QDz!n 	D!
 .7$$T*r~   c                    | j                  ||      }t        j                  t        j                  |t        |j                  |j                  g| j                  |                   S r  )	rB  rF   creater,   r  r>   r
   r  r  )rt  r  rJ  new_names       r|   add_tensor_constantz!GraphLowering.add_tensor_constant  s`    33D$?"KK.2.G.G.M
 	
r~   c                v   | j                      j                  |k(  s|S t        j                  j                  j                         5  | j                   d|j                   |j                  xs d | j                      j                  |            }|| j                   v s
J | d       t        fd| j                  D              r| j                   |   | j                  |<   t        fd| j                  D              r| j                   |   | j                  |<   |cddd       S # 1 sw Y   yxY w)z
        We AOT copy constants to the devices they are needed on.
        If device_override doesn't match the constant's device, then
        copy it and return a different name.
        Nr  r   z' should be in V.graph.constants alreadyc              3  :   K   | ]  }t        |      k(    y wrw   r`   )r   r  rJ  s     r|   r   z.GraphLowering.constant_name.<locals>.<genexpr>  s#       {33   c              3  :   K   | ]  }t        |      k(    y wrw   rI  )r   
param_namerJ  s     r|   r   z.GraphLowering.constant_name.<locals>.<genexpr>  s#       z22rJ  )r-  r
   r   utils_python_dispatch_disable_current_modesrB  r  r  tor   r.  r/  )rt  rJ  device_overridenon_dup_const_names    `  r|   r?  zGraphLowering.constant_name  sE    >>$&&/9_=TK[[))@@B 	& "&!A!A&/../0E0E0J/KLt$''8"
 &7 %&&MN7  #'#5#5  :>&:""#56  "&"7"7  =ANN&=%%&89 &=	& 	& 	&s   
CD//D8c                	   | xj                   dz  c_         t        | 	  |||      }| j                  |      }t	        |t
              rrt        j                  j                  st        |j                        }n|j                  j                  }|| j                  |<   | j                  j                  |       |S t	        |t        t         t"        f      rAt%        j&                  |      }|| j                  |<   | j                  j                  |       |S t	        |t(              r9t+        ||      }|| j                  |<   | j                  j                  |       |S || j                  j                  |       y t	        |t,              r| j                  j                  |       y t	        |t.        j0                        rt3        t        j                  j4                  j6                        dk(  rt9        t;        t        j                  j4                  j6                              j<                  t.        j>                  j@                  jB                  t.        jD                  jF                  jH                  fv sJ tK        jL                  ||jN                        }|| j                  |<   | j                  j                  |       |S tQ        tS        |            rCtK        jT                  ||      }|| j                  |<   | j                  j                  |       |S t	        |t.        jV                        sJ |       |jX                  s| j[                  |      \  }	}
n| j]                  |      \  }	}
| j                  re| j^                  rY| j                   | j^                  v rAta        jb                  te        |tg        |jN                  |jh                  |	|
                  }n@ta        jb                  tk        |tg        |jN                  |jh                  |	|
                  }|| j                  |<   | j                  j                  |       |jl                  jl                  | jn                  |<   | j4                  j6                  r| jq                  |jN                         ts               5  tu        |      s| jv                  jy                  |       d d d        |S # 1 sw Y   |S xY w)Nr*   rJ  r'  )rJ  r
   r  )=rq  r  placeholderr  r   r$   rf   r\  r  r   r   r  r  r  r   intr  r  r   sympifyr   rG   r   r   	Generatorr   r@  r  rQ  iterr   _prims	rng_primsgraphsafe_run_with_rng_stater   higher_orderinvoke_subgraphr,   GeneratorStater
   r   r  OpaqueObjectStater   _has_symbolic_sizes_stridesr  r  rr  rF   rD  r=   r>   r  rA   r  r   r  r_   ra   rh  r   )rt  r   rz   r{   exampler  r   gen
opaque_objsizesstridestensorry  s               r|   rU  zGraphLowering.placeholder  s    	!'%fdF;""6*gx( 77&&,W\\:||(((,Df%""))&1K#tU!34==)D(,Df%""))&1K!12!vW=C(+Df%""))&1J_""))&1g}- ""))&11qww++112a7DQWW))//0=f&&CC		&&66=   ##GC(+Df%""))&1J%d7m4--6IJ(2Df%""))&1'5<<09'90
 22!66w?NE7!88ANE7 $$$$(<(<<%%&w~~w}}eWUF %%&w~~w}}eWUF %+&!%%f--3[[-=-=""6*""  0 12 	3.w7&&**62	3 	3 s   2'S##S-c                   t         j                  u r/t        |d   t        t        t
        f      rt        |   ||      S t        t        j                  j                        st        d      r |i |S t        vrct        t        j                  j                        s
J  d       j                         j                  d      d   }|t         v rt#        d| j$                  d       nt&        j(                  rt+        g      rt,        nt.        }t0        j3                  d|j5                  ||             t7        d	      }|<t        j8                  j:                  j=                        r| j>                  rt@        }nt7        d	      }tC        |      }t#        || j$                  
       n&t+        g      rt-        ||      t/        ||      	 t0        jE                  dt                  | jF                  }	tI              }
|
r||}}|
tJ        u rd|	jL                  v r|	jL                  d   \  }}t        t        j                  j                        sJ dfd} |||      \  }} |||      \  }} |||      \  }}tK        ||||      \  }}n |
|	g|i |\  }}d|	jL                  v r tO        d      |i |}nd }tP        v r_tR        jT                  vrMtR        jT                  jW                         	 tQ           |i |}tR        jT                  jY                         |*t        v rt           |i |}n tO        d      |i |}|
r| j[                  |	||       |S # tR        jT                  jY                         w xY w# t\        $ r}d }t        | d      r^| jF                  Rt        | jF                  d      r<| jF                  jL                  &| jF                  jL                  j_                  dd       }ta        ||||      jc                  |jd                        d d }~ww xY w)Nr   _inductor_lowering_functionz is not an OpOverloadr   FT)warnr
  override_decompz"Creating implicit fallback for:
%s)with_default)layout_constraintr
  z  via %seager_input_valsc                v    t         j                  j                  j                  | |      }|J |d   |d   fS )Nr   r*   )r   r   operator_schemasnormalize_function)rz   r{   r   r   s      r|   	normalizez.GraphLowering.call_function.<locals>.normalize  sE    %*XX%>%>%Q%Q &f&F $*#55#5#)!9fQi#77r~   should_fallbackadd_to_fallback_setr@  r   stack_trace)rv  )rz   r   r{   r   r   ztuple[Any, Any])3operatorgetitemr   rE  r%  dictr  r   r   r   OpOverloadPacketr   rM   r   rJ  r   rJ   rN   r
  r+   implicit_fallbacksr   r8   r9   r  r  operator_strr   _libraryrM  
is_builtinr  rQ   rR   r  r@  rO   rH   r   rK   rT   rf   active_user_lowering_opsr   discardpropagate_mutation	Exceptionr   r7   with_traceback__traceback__)rt  r   rz   r{   	base_nameerrortagdecided_constraintdefault_tagr  layout_constraintsold_args
old_kwargs	fake_argsfake_kwargsrr  outerv  ry  s    `                 r|   r   zGraphLowering.call_function&  s^   X%%%*T!WtUD>Q*R7(v>> &%**"="=>71D
 4*6**"fejj&;&;< (/0< ++C03I//"&"4"4$(	 ** *6(3 .5 
 9&&vtV<
 ,E, K,,77?(( + ' 1JT1K *B+)N&&8"&"4"4 $VH- 0fEE264HHS	8IIj)F"34!!A!9&!A!'+V*%)BB
 *QVV3128J1K.	;  *&%**2G2GHHH8 2;9k1R.	;'0v'>f/8:/N,*'@ &)[(f $6a#I$#I&#ILD& AFF*I&v5I#  n,a&@&@@ ..226:C,V4dEfE22::6B ;*'/@@ R.v5Q!%+ " ''8ZvNJ% 22::6B&  	8Kn-%%1D--v6%%**6"//4488M#64[nQ__-48	8s3   )D0N0 N (A#N0 !N--N0 0	Q9BQQc                Z    t        | j                        dk(  xr | j                  d   dk  S )zM
        True if this is a small constant attr that will be inlined.
        r*   r      )r   shape)ts    r|   can_inline_constantz!GraphLowering.can_inline_constant  s(    
 177|q 4QWWQZ1_4r~   c                .   t        | j                  |      }t        |t        j                  j
                        rE|| j                  v r| j                  |   S t        j                  ||      }|| j                  |<   |S t        |t        j                  j                        r+|| j                  |<   d| j                  |<   t        ||      S t        |t              r+|| j                  |<   d| j                  |<   t        ||      S t        t!        |            r+|| j                  |<   d| j                  |<   t        ||      S t        |t        j"                        sJ t$        j&                  j(                  s)t$        j*                  st-        |      s|| j.                  v r| j1                  ||      S t3               5  |j4                  dk(  r9t7        |j9                         |j:                  |j<                        cd d d        S | j?                  |      rRt@        jC                  d|       ddl"m#}  ||jI                         |j:                  |j<                  	      cd d d        S 	 d d d        | j1                  ||      S # 1 sw Y   xY w)
N)rJ  graph_moduler  rT  rx   )r'  r  r
   zInlining constant: %s r*   )rg  )r  r
   )%r   rd  r   r   r   rm   r2  r,   Subgraphr   ScriptObjectr0  r3  rG   r   r   r  r   r+   r:  r;  always_keep_tensor_constantsrS   r  rF  r'   r  r<   itemr  r
   r  r  r  loweringrg  tolist)rt  r   rz   r{   r'  r  rg  s          r|   get_attrzGraphLowering.get_attr  s(    "$++v6eUXX112,,,**622++6>C*-D'JeUXX223/4D$$V,*,D'"e<</0/4D$$V,*,D'"e<<DK(/4D$$V,*,D'"e<<%...<<22(/333++E6::] 
	V{{b **,ekk%,,
	V 
	V
 ''.		2F;,ellnEKKU
	V 
	V
 /
	V ''v66
	V 
	Vs   ?JAJJc                    t         rw   AssertionErrorrt  r   rz   r{   s       r|   call_modulezGraphLowering.call_module      r~   c                    t         rw   r  r  s       r|   call_methodzGraphLowering.call_method  r  r~   c                	   t         |   |||      }t        |t        t        f      s|f}t        |t        t        f      sJ t        |             |D cg c].  }t        t        |            rt        j                  |      n|0 }}t        d |D              sJ |       t        j                  j                  j                  d   }t        |t        t        f      s|f}|D cg c]!  }t        j                  j                  |      # }}g }t!        |      t!        |      k(  sJ t#        ||      D ]C  \  }}	t        |t        j$                  t        j&                  f      s|j)                  |       Ct        |j+                         t        j,                        r/|j)                  t        j                  j/                  |             t0        j2                  j                  j5                  |      sJ |	j6                  d   j9                         D 
cg c]4  }
t        |
t0        j:                        r|
j<                  j>                  n|
6 }}
|j)                  t        j@                  ||             F || _!        | jD                  jG                         D ]r  \  }}t        |tH        tJ        jL                  t0        j2                  j                  jN                  t0        j2                  j                  jP                  f      rnt        |t$              sJ dt        |              |jS                          t        |t$              sJ |jT                  }t        |t        jV                        sJ |}|jT                  }t        |tX              r|j[                         |k7  st        j\                  j_                  || j`                  |          	 | jB                  jc                  |      }| j`                  |   | jB                  |<   u | jg                          th        jk                  d| jl                  | jn                  | jn                         y d       y c c}w c c}w c c}
w # td        $ r Y w xY w)Nr'  c              3    K   | ]  }t        |t        t        j                  t	        d       t        j
                  t        j                  t        j                  j                  j                  t        t        j                  t        j                  t        t        j                  t        j                   t        j"                  f        y wrw   )r   rF   r,   r<   r  r  r   r	   logicboolalgBooleanrV  EffectfulKernelrD   rG   OpaqueMultiOutputOpaqueValueTypeConstantr`  )r   r(  s     r|   r   z'GraphLowering.output.<locals>.<genexpr>  s      
& % KKJ%%JJKK''//&&,,#((..((
s   C
Cr   r  z'Unsupported inductor graph input type: zGForce channels last inputs for %d conv for the current graph with id %dr  )8r  r   r   r%  rE  r  r   r,   r  r  rf   r\  r@  rz   ExternKernelrealize_inputr   ziprF   BaseViewr   get_output_specCommBufferLayout
copy_inputr   	_inductoris_storage_and_layoutr   r  r  r   r  try_match_insignificant_stridesr   r  r<  rG   r   Basicr_  r`  r1  r  rE   rA   get_nameMutationLayoutSHOULDREMOVErealize_intor   r  
ValueErrorfinalizer  r  r  rP  )rt  r   rz   r{   r   r(  fx_node_argsresult_correct_stridesrfx_nodesmeta_stridesrJ  r'  value_storage_boxindry  s                   r|   r   zGraphLowering.output
  s    f5&5$-0YF&5$-0>$v,>0 
 4HQ3PB&&Q/VWW
 
  
& '
 
 	( )	 
, ww++003,6(?L<BCq"////2CC!#< CK///fl3 	JAwa",,!<=&--a0A--/1D1DE '--boo.H.H.KL ))??BBB %\\%0779   $.a#>AFFKKAE   
 '--66q,G%	, 4,,224 	KD%#KKOO&&55OO&&88	 eY/ 9$u+G/ MMOeY///JJEeR]]333 %JJEe[1U^^5E5M--::455d;,,223DEC.2.H.H.ND&&s+9	@ 			U''!]]6DMM	
 =?	
w
< D" P " s$   3Q$(&Q)9Q.7Q33	R ?R c                F    | j                   D ]  }|j                           y rw   )r)  decide_layout)rt  r  s     r|   r  zGraphLowering.finalizew  s!    << 	 C	 r~   c              #  b   K   | j                   }	 || _         d  || _         y # || _         w xY wwrw   )r@  )rt  r   olds      r|   set_current_nodezGraphLowering.set_current_node{  s1     	$ $D #DDs   /# /	,/c              #  T   K   | j                   }	 d  || _         y # || _         w xY wwrw   r<  )rt  r  s     r|   set_current_wrapper_codez&GraphLowering.set_current_wrapper_code  s)     	$ #DDs   ( (	%(c                    t        |      t        |      k(  sJ t        |      t        |      k(  sJ |j                  t        j                  j                  j
                  u r	|j                  d   }t        |t              sJ t        j                  j                  j                  |d   |d   |j                         D ci c];  \  }}|t        |t        j                  j                        r|j                  d   n|= c}}|d         }	|	D ]X  }
|d   |
   }|d   |
   }||u r j!                  t        j                  j"                  j$                  j&                  ||fi        Z yt        |j                  t        j(                  j*                        sJ 	 	 	 	 	 	 	 	 d fd}|j                  j,                  }t/        t1        ||            D ]!  \  }\  }}|j2                  |   } ||||       # |j2                  D ci c]  }|j4                  | }}|D ]  }||   }||   }||   } ||||        yc c}}w c c}w )	ax  Propagate mutations on new_args/new_kwargs back to old_args/old_kwargs.

        Assumes we may have cloned old_args/old_kwargs into new_args/new_kwargs
        and then called fx_node(*new_args, **new_kwargs).

        If fx_node mutates any of new_args/new_kwargs, and they are different from
        old_args/old_kwargs, then we need to update the original tensor.
        r{   
kernel_idxconstant_args_idxr  tma_descriptor_metadataNc                L   ||u ry | j                   | j                   j                  r{t        |t        j                        r|f}|f}t        ||      D ]K  \  }}||u rj                  t        j                  j                  j                  j                  ||fi        M y y y rw   )
alias_infois_writer   r,   IRNoder  r   r   r   r   copy_r   )
schema_argold_argnew_argold_arg_itemnew_arg_itemrt  s        r|   maybe_propagatez9GraphLowering.propagate_mutation.<locals>.maybe_propagate  s     '!$$0Z5J5J5S5S gryy1&jG&jG25gw2G .L,#|3 &&		,,44|\6RTV 6T0r~   )r  ztorch._C.Argumentr  	ir.IRNoder  r  r   r,  )r   r   r   r   r]  triton_kernel_wrapper_mutationr{   r   ry  r   r   get_mutated_tensorsr<  r   r%   r   r   r   r  r   r   r   _schemar   r  	argumentsrJ  )rt  r  r  r  new_args
new_kwargsr{   rw  rx  mutatedrJ  r  r  r  schemar   r  argschema_kwargskeys   `                   r|   r  z GraphLowering.propagate_mutation  sN     8}H---:#j/111>>UYY33RRR^^H-Ffd+++--@@TT<(./ !'1 
1ehhmm(Dqvve}!K 45G   Y$X.t4$X.t4g%""599>>#7#7#?#?'7ASUWXY '..%***?*?@@@	)	4=	HQ		( '''0Xx1H'I 	:#C#'7))#.JJ9	: 392B2BC33CC 	:C oG oG&s+JJ9		:YT Ds   A I 	Ic                X    | j                   j                  di       j                  d      S )z:Get the user-annotated stream index from FX node metadata.customstream)r   r   r  s    r|   _get_node_streamzGraphLowering._get_node_stream  s$     vvzz(B'++H55r~   c                   | j                  |      }|j                  D ]_  }| j                  |      }||k(  r| j                  j                  |      }t	        |t
        j                        sP|j                          a y)aV  Realize IR inputs that are on a different stream.

        Without this, pointwise ops across stream boundaries would be inlined
        into each other during lowering, making it impossible for the scheduler
        to split them into separate kernels.

        None means the default stream, so it is compared like any other value.
        N)r  r   envr   r   r,   rF   r1  )rt  r  node_stream
input_nodeinput_streamir_values         r|   $_realize_inputs_at_stream_boundariesz2GraphLowering._realize_inputs_at_stream_boundaries  sp     ++A.++ 	#J00<L{*xx||J/H(BLL1  "	#r~   c                &#   #$%&' d'fd}	 	 	 	 	 	 d(%& fd}ddl m} t         j                        $t         j                        't        g      }j                  dk(  }|r4 j                        \  }}|t        ||      z  } j                         t        j                  j                  |      5  t        j                  j                   j                              5   j                        5  t!        j                        5  j                  dk(  rj"                  rt%        j"                  t&        j(                  j*                        r{t&        j,                  j.                  j1                  j"                        rHt3              s|j5                  ddfd      r' |d	        t7        j"                  d
      i }	nBj                  dk(  rzt%        j"                  t&        j(                  j*                  t&        j(                  j8                  f      r2t;              r' |d	        t7        j"                  d
      i }	nj                  dk(  rj"                  t&        j<                  j>                  j@                  u rtB        jD                  dk7  r |d       tB        jD                  dk(  r}
}jF                  jI                  d      x}r|d   }|d   }tK        ||||      \  }}ntM        g|i |\  }} jO                  j"                  ||      }	 jQ                  |
|||       ntS        dtB        jD                         tU        j"                        r |d       t%        jF                  d   t&        jV                  t&        jX                  t&        jZ                  f      r$jF                  d   j\                  j^                  }	n't`        (         }	n |d       t`        (         }	t&        j<                  jd                  jf                  jh                  t&        j<                  jd                  jj                  jh                  t&        j<                  jd                  jl                  jh                  t&        j<                  jd                  jn                  jh                  t&        j<                  jd                  jp                  jh                  g#ts        d jt                  D              } jv                  v &ts        #fdjt                  D              %jF                  jI                  dd
      rt%        |	tx              r|	j{                          jF                  d   j}                         }t'        j~                  j.                  j                  | }|	j                         |k7  r7|s5t        j                  |      }t        j                  j                  |	|      }	|rDt%        |	tx              r4t%        |	j                  t        j                        r|	j{                          |s%rt%        jF                  jI                  d      t&        j                        r&r jv                  jI                        }njF                  d   j}                         }|t        |      dkD  rtB        j                  xs & xr % }t&        j                  j                  jF                  d         }t        t        |            dkD  }|sl|rjt        |	j                               dk(  rN j                  v r@&s>%s<t        j                  j                  |	j                         t&        j                        }|st        |      rŉjF                  d   j                         s$t%        |	j                  t        j                        r6t        j                  j                  |	t        j                  |      |      }	nNt        |	j                               dk(  rt        |      dkD  rg }t        j                  j                  |	||      }	t        t        jt                              }|dkD  r)t%        |	tx              rjt                  D ]  }|j"                  t        v rn|	j                          t&        j<                  jd                  j                  jh                  t&        j<                  jd                  j                  jh                  t&        j<                  jd                  j                  jh                  g}g } j                  s=|j                  t&        j<                  jd                  j                  jh                         t&        j                  j                  r|t&        j<                  j                  j                  jh                  t&        j<                  j                  j                  j                  t&        j<                  jd                  j                  jh                  t&        j<                  j                  j                  jh                  t&        j<                  j                  j                  j                  t&        j<                  j                  j                  j                  t&        j<                  j                  j                  j                  gz  }|t&        j<                  j                  j                  jh                  t&        j<                  j                  j                  j                  t&        j<                  j                  j                  j                  t&        j<                  j                  j                  jh                  t&        j<                  j                  j                  jh                  t&        j<                  j                  j                  j                  gz  }t&        j                  j                  r2|t&        j<                  j                  j                  jh                  gz  }|j"                  |v rPt        j                  j                  |	t        j                  jF                  d   j}                               d      }	|j"                  |v rd|j                  d   u rSt        j                  j                  |	t        j                  t        jF                  d   j                                    }	|j                  dk(  st%        |	j                  j                  t        t        f      stB        j                  r9|	j                  j                         dk(  r|	j                  j                         s|	j{                           |	j                  }t%        |t              sqt%        |t        j                  t        j                  f      rG|j                  }t%        |t              s+t%        |t        j                  t        j                  f      rGt%        |t              r-|j                  t        jt                              r	 ||	      }	|	j                  t        jt                               t%        |	tx              r)|	j                         r ||	      }	|	j                          t%        |	tx              rbt%        |	j                  t              rH|	j                  j                  }t%        |t              r"|j                  d      r|	j{                          ddd       ddd       ddd       ddd       t        	        j                  |	       t        t        j                            } j                  $d D ]  }||j                         z  }  j                  'd D ]  }||j                         z  } t         j                  j                   j                  }j                  dk(  r>t%        |	t        j                        r$|j                  |	      r|j                  |	       d)$' fd } t         j                  j                  rj                  dk(  r|	S t        t         j                  j                   j                  jF                  jI                  d!i             }!|!J t        d" |!D              }"||"k\  s'J d#| d$|" d%j                          d& |                 j                  |       |	S # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y    xY w)*z4Lower and execute a single FX node into Inductor IR.c                Z    t         j                  dt        j                        |        y )Nzlowering %s %s)r  r  r   format_node)msgr  s    r|   r  z%GraphLowering.run_node.<locals>.debug  s    II&
1==(A3Gr~   c           	        t         j                  j                  |j                  d         }|j                  d   j	                         }t        t        |            dkD  }|s|rt        | j                               dk(  re|j                  v rWsUsSt        j                  j                  | t        j                  t        |j                  d   j                                    } | S )Nr  r      )r   _prims_common%is_non_overlapping_and_dense_or_falser   r  r   r   r  rZ  r,   r  require_stride_orderget_stride_orderr   r  )r   r  denserf  unbacked_symbols_in_stridesis_input_for_as_stridedis_user_visiblert  s        r|   &maybe_apply_channels_last_stride_orderzFGraphLowering.run_node.<locals>.maybe_apply_channels_last_stride_order  s     ''MMuE ffUm**,G*-.CG.L*MPQ*Q'/)*a/888'/==''6qvve}7J7JK Mr~   r   )CompilerBisectorr   inductorrM   c                     t               S rw   )reprr  s   r|   <lambda>z(GraphLowering.run_node.<locals>.<lambda>*  s    a r~   rK   Frt  flexible_layout-user_defined_triton_kernel_layout_constraintsneeds_fixed_stride_orderrn  r*   z1Unknown triton_kernel_default_layout_constraint: r   r  r  c              3  :   K   | ]  }|j                   d k(    yw)r   Nr   )r   r  s     r|   r   z)GraphLowering.run_node.<locals>.<genexpr>~  s     DDDGGx/Ds   c              3  :   K   | ]  }|j                   v   y wrw   )r   )r   r  as_strided_opss     r|   r   z)GraphLowering.run_node.<locals>.<genexpr>  s      *26~-*rJ  inductor_realize_to_stridesNr  )allow_paddingTr   d   )	thresholdrU  c                     j                   d  D  cg c]  } d| j                          d|  d }} |j                  d j                  d  D               dj	                  |      S c c} w )Nunbacked_symbol_defs= in:

c              3  J   K   | ]  }d |j                          d| d  yw)r  r  r  N)get_unbacked_symbol_defs)r   r   s     r|   r   zBGraphLowering.run_node.<locals>.format_new_defs.<locals>.<genexpr>O  s2       ((C(C(E'FfRDPRSs   !#z***
)r)  r  extendr*  r   )r  r  buffer_watermarkoperation_watermarkrt  s     r|   format_new_defsz/GraphLowering.run_node.<locals>.format_new_defsJ  s      <<(8(9: ((D(D(F'GvcURTUA  HH //*=*>?  <<?"s   A)unbacked_bindingsc              3     K   | ]8  }t         j                  j                  j                  j	                  ||       : y wrw   )rf   r  rv  unbacked_renamingsr   )r   r  s     r|   r   z)GraphLowering.run_node.<locals>.<genexpr>  s5      /
 KK!!4488A>/
s   >A zfailed  >= z (inductor >= fx)
fx node is: z
new operations are:

)r  r0  r   r,  )r   r  r  r   r   r  r   r0  )!torch._inductor.compiler_bisectorr  r   r)  r*  r(   r   fetch_args_kwargs_from_envrY   r  r,   r  current_originscurrent_stream_idxr  r  rf   r   r   r   r   r   r}  rM  r~  rL   disable_subsystemrK   HigherOrderOperatorrb   r   r]  r  r+   'triton_kernel_default_layout_constraintr   r   rH   rI   r   r  r   r   r  SymFloatSymBoolr   r  r  run_noder   
as_stridedr   as_strided_as_strided_scatterresize	resize_asr   r  r   rF   r1  r  r  any_is_symbolicmaybe_get_strider  r  r  r  r  r   r   r  r  r   r  rZ  FlexibleLayout stride_ordered_for_memory_formatchannels_last_is_viewrequire_exact_stridesrP   realize_hintr   mm_int_mmr  r   r   r   _has_mkldnnr   _linear_pointwiser   mkldnn_rnn_layeronednnqlinear_pointwiserg  binary_tensorr   r    _convolution_transpose_pointwiseqconv_pointwiseqconv2d_pointwisehas_mklmkl_mkl_linearrz   r   r  rB   rC   delay_realize_cheap_outputs	num_readshas_large_inner_fnrE   
MutableBoxshould_realize_on_reuse
mark_reusehas_exceeded_max_readsr;   r.  r   r   r  r\  r  rv  is_unbacked_symintr   r  r    r  create_deferred_runtime_asserts))rt  r  r  r  r  originsis_call_functionrz   r{   r   r  r  rn  inp_args
inp_kwargs	is_outputrf  sym_stridesstride_orderr  r   r  	num_usersr  need_fixed_layoutneed_fixed_channels_last_layout_datacurrnew_unbacked_defsr  r   rv  r  r  renamed_unbacked_bindingsr  r  r  r  r  ry  s)   ``                                 @@@@@r|   r,  zGraphLowering.run_node  s   	H		"/		0 	Gt||,!$//2 $.qc?44?2::1=LD&~dF33G55a8II%%g.Y	)II(()>)>q)ABY	) !!!$Y	) q!	Y	) 'HHqxx)>)>?NN((33AHH=9!<'99"K
 ()N)!((N
 'HHuzz44ejj6T6TU /q1 ()N)!((N
 'HH		 6 6 U UUBBFWWEFBB12  $H!'J+,66::6H+II'I#3A#6%5a%8
'@ "$&(f (?q'R4'R6'Rf!//$GF++AxT6R&KFLzLzK{|  !* '(FF5MELL%..%--#P VVE]//44F"W-a0Fb	)!, 		))11		**22		1199		%%--		((00N DAGGDDI4#C#CCO&) *:;''* '# vvzz7?J	E  &&-..0#oo33CCWM**,7#%#6#6w#?L__AA&,WFvy1v{{BKK8  4*

5!5<<; #">>BB1EGffUm224G&3w<!+;**A/.A%655 " "//UUuE 1':;a? 0 8! 12a7!@!@@ / 7"$"3"3"T"T"OO-u/B/B# 73w< 66%=113z"KKKK8 &(__%I%I & " 3 3G <.; &J &F  #6??#45:s7|a?O*,%'__%J%J &} &K &F Jqww/0I1}FI!>GG I-D{{&;;++- "IINN??GG!IINN--55!IINN22::-)
 ;=7#-44UYY^^5O5O5W5WX 88//- %		 0 0 B B J J %		 0 0 B B I I %		 ? ? G G %		 0 0 B B J J %		 0 0 B B I I %		 0 0 B B I I %		 0 0 B B P P2 - < %		 0 0 G G O O %		 0 0 G G N N %		 0 0 H H O O %		 0 0 Q Q Y Y %		 0 0 @ @ H H %		 0 0 B B I I@ ;  %xx// 1eiimm6O6O6W6W5X X 1;;*;;%'__%I%I & " 3 3AFF5M4H4H4J K.2 &J &F !KK+JJ !TYYq\ 1%'__%I%I & " 3 3$B166%=CVCV$W!"&F ww(*%fkk&6&6I8NO !' B B$*KK$9$9$;q$@(.(F(F(H ("NN,SI-V $UJ7JBKK7= "JJE %UJ7JBKK7=
 eZ0U5R5RL6 DFANF !!#agg,/ &),1N1N1P @J##% &),FKK1T{{''dI....=(sY	) Y	) Y	) Y	)v 	61%v&&u||46<< 0 12 	@C!=!=!??	@//"5"67 	?B!<!<!>>	? GG$$..	
 DDM!65<<0,,V4!!&)		# 77144=#8M. 6GG&&

3F(K
 !,,, %/ /
&/
 %
!
 !$== 	
'(-F,G H==?+ ,&&5&7%8:	
=
 	,,Q0ABaY	) Y	) Y	) Y	) Y	) Y	) Y	) Y	)s   =/AF,AE9>AE,p9AE	*AE	;C(AE	$DAE	0AE,8AE9 AFEAE)E$AE,E,AE6E1AE9E9AF	E>AFFAFc                @    t         j                  ry d	 fd}t               rv|j                  t        j
                  j                  j                  j                  u r< j                  r0 j                  |      \  }}|d   dk7  r ||d   |d    d       y y  xj                  |z  c_        t        j                  j                  j                  }|D ]G  } j                   j#                  |g       }|j$                  |   }	|j'                         j)                  |	      srd
d}
 |
|	j*                        r% |||	j*                  k\  | d|	j*                           |
|	j,                        r% |||	j,                  k  | d|	j,                          |D ]  }t/        |j0                        }| j                  z
  }|r=t3        |t4              } j                   j7                  |g       j9                  |       f ||j0                  |j0                           J y )Nc                z    t        j                  | |      }j                  |d       j                  |       y )NTr  )r,   AssertScalarr  r  )r  r  	assert_oprt  s      r|   make_assertzBGraphLowering.create_deferred_runtime_asserts.<locals>.make_assert  s4    c2I  T :##I.r~   r   Tz to be Truec                Z    | t         t          fv ry	 t        |        y# t        $ r Y yw xY w)NFT)r)   rV  	TypeError)r  s    r|   is_convertiblezEGraphLowering.create_deferred_runtime_asserts.<locals>.is_convertible  s5    & 11#()F#'( )#()s    	**r!  z <= )r  )r  r#   r  r0  r   r,  )r  r	   r   r  )r+   do_not_emit_runtime_assertionsr   r   r   r   r   _assert_scalarr   rO  r$  r  rf   r\  r  rv  r  r   var_to_range _default_unspecified_value_rangeissubsetlowerupperr   r  r   r0  r   r   )rt  r  r]  rc  	node_argsr  rv  i0rasvrrf  rafvsmissingi1s   `              r|   rP  z-GraphLowering.create_deferred_runtime_asserts  s    00@	/ %&EIINN99AAA::1=LIq|t#IaLYq\N+*FG $ ''+<<'((22I ( ;((,,R4++B/ AACLLRP) &bhh/#B"((Nrd$rxxj4IJ%bhh/#B"((Nrd$rxxj4IJ ;B/8C!D$?$??G c2**55b"=DDRH#BGGy:;);r~   c                    t         j                  rt        d      t        j                  dvrt        dt        j                         y )NzC++ codegen is disabled)linuxdarwinwin32zUnsupported platform )r+   disable_cpp_codegenr6   sysplatformr|  s    r|   !validate_can_generate_cpp_wrapperz/GraphLowering.validate_can_generate_cpp_wrapper  s@    %%()BCC<<;;(+@)OPP <r~   c                   | j                   j                         }|j                  d       |j                  d       t        |      dk  s%J dj	                  dj                  |                   t        |      dk(  }|rdn|j                         | _        | j                  r| j                          t        | j                        | _        t        | j                  | j                  | j                        }|J d| j                   d       |j                  ||||      | _        | j                   r0| j                   j                  j"                  | j                  _        y y )	Nr  r   r*   zDoes not support mixing {}+r   zDevice z not supported)r#  r  r  r   formatr   r   r%  rK  r}  r2   r;  r3   rL  rD  r<  r  _names_iter)rt  is_subgraphr  parent_wrapper_codepartition_signaturesr#  only_cpuwrapper_code_gen_clss           r|   init_wrapper_codezGraphLowering.init_wrapper_code  sP    ((--/U#V$< A% 	
'C'J'JHH\"(
 	
% |$)$,5,2B2B2D2241$2B2BC=d.. 
 $/ 	
d&&'~6	
/ 177 	
 ,0,=,=,J,J,V,VD) r~   c                @   dd l }|j                  | j                        }|j                  |      }g }|j                  j                  D ]T  }|j
                  dk(  s|j                  t        j                  j                  j                  u sD|j                  |       V g }i }i }g }	i }
|D ]  }|j                  d   D ]R  }|D ]K  }||v rt        |t        j                  j                        s-t!        |      ||<   |j                  |       M T |j                  d   }t        j"                  j$                  j'                  |j                  d   |j                  d   |j)                         D ci c];  \  }}|t        |t        j                  j                        r|j*                  d   n|= c}}|j                  d         }i }|j                  j-                  |      5  |j)                         D ]  \  }}||v rL|j                  j/                  t        j0                  |f	      }t!        |	      ||<   |	j                  |       V||
v r	|
|   ||<   ct!        |	      |
|<   |	j                  |       |
|   ||<    	 d d d        |||j2                  <    |	|z   }|j                  j                  D ]$  }|j
                  d
k(  st5        |      f|_         n |j9                          t        j                  j;                  |      }|j=                  |      }t!        |      dkD  r|t!        |	      d  }i | _        |D ]  }d}g }|j                  d   D ]t  }g }|D ]Q  }t        |t        j                  j                        s|j                  |       9d}|j                  |||             S |j                  t5        |             v |s|| j>                  |j2                  <    |d t!        |	       | _         || _!        y c c}}w # 1 sw Y   xY w)Nr   r   gridr{   r  r  r  r  )rz   r   FT)"r  deepcopyrc  r\  r   r   r   r   r   r]  r  r   r{   r   r   r%   r   r   r   r  r<  r   inserting_beforer   clonerJ  r%  rz   	recompileInterpreterr  rW  rU  rV  )rt  r  r  	cloned_gmtriton_nodesr   grid_inputsvisited_gridstriton_inputskwargs_inputsvisited_kwargsr  r  r{   rw  rx  r  r  new_nodenew_outputsrunnerreturned_outputsgrid_outputsdynamic_grid	new_gridsnew_grids                             r|   extract_autotune_inputsz%GraphLowering.extract_autotune_inputs	  s    	MM$,,/	~6OO)) 	*D?*KK599#9#9#X#XX##D)	* ,.24(*-/)+  &	2DF+ 0 0Cm+ !#uxx}}5-0-=c*#**3/00 [[*F--@@TTL)/0 !'1 
1ehhmm(Dqvve}!K 56G *,J11$7 6"LLN 6DAqG|#,??#@#@TUSW#@#X(+M(:
1%,,X6 N*(6q(9
1 (+M(:N1%!((+$21$5JqM66 (2M$))$M&	2P $k1OO)) 	Dww("";/1		
 	%%i0!::n5{a+C,>,@AL$&D!$ A$.0	 KK/ 	6D!H# J)#uxx}}=$OOC0$'+ ]35G(HIJ $$U8_5	6  7@D))$))4A" "22FC4F!G"/o6 6s   5A P(BPP	c                    t         fddD              rd fd}t        j                  j                  rt        j                  j                  rVd} j
                  D ]   }t        |t        j                        sd} n |r# |       } j                  |       t                 j                         S  j                  s j                         S d _         j                         j                  } |       }t         j"                  j$                  j'                         5   ||       ddd       ~d _         j(                  j+                           j,                  j+                           j.                  j+                          t0        j2                  j4                  j6                  j+                          t0        j2                  j4                  j8                  j+                          t;        j<                          t        j>                  ddi      5   j                         cddd       S  j                         S # 1 sw Y   xY w# 1 sw Y   yxY w)	zQ
        For GPU, Triton kernels are autotuned and stored as cubin files
        c              3  :   K   | ]  }|j                   v   y wrw   )r#  )r   r
   rt  s     r|   r   z9GraphLowering.codegen_with_cpp_wrapper.<locals>.<genexpr>u	  s     Ivv***IrJ  )cudaxpuc                    	 	 	 	 dd} t         j                  j                  j                         }|t	        t
        j                  t              sy|j                  r|j                  j                          |j                  D cg c]  }|| }}t        j                  |t
        j                        D cg c]
  } | |       }}nMt	        t
        j                  t              rj                  nt
        j                  D cg c]
  } | |       }}j                  rddlm} t#        j$                        D cg c]2  \  }}|j                  v rt	        ||   t         j&                        r|4 }	}}|	D ]/  }||   }
t	        |
t         j&                        sJ  ||
      ||<   ~
1 |S c c}w c c}w c c}w c c}}w )Nc                .   | y t        | t        j                  t        j                  f      r| j                  j
                  S t        | t              rt        |       S t        | t        j                        sJ dt        t        |             z          | S )Nz&Unknown type when creating real inputs)r   r   r  r*  r   hintr   r   r   r0  r  )r(  s    r|   materializezXGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.materializex	  sx     y##Aenn'EF vv{{*#Az2%ay()!U\\: Ds4PQ7|S:  !r~   r*   )clone_preserve_strides)r(  z,torch.SymInt | torch.SymFloat | torch.Tensorr   zint | float | torch.Tensor)r   _guardsTracingContexttry_getr   rf   real_inputsre   output_stridesclearparams_flatrn  chainr  rB  
compile_fxr  r   r  r   )r  tracing_contextparamr  r(  r  r  r   rJ  rC  mutated_inprt  s              r|   extract_real_inputszCGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputsw	  s   !C!/!" #(--">">"F"F"H".zMM;8 '55'66<<> &5%@%@#! , #K # "+amm!L# $A#K #  *!--E !//!"# $A#K # &&B *343D3D)E*%C4#6#66&{3'7F *& *  2 ( '2#&6)+u||DDD+A++NC('( #"W#
##*s   F29F7F<7GFTNztriton.autotune_at_compile_time)r    list[int | float | torch.Tensor]) r   r+   tritonautotune_at_compile_timeautotune_with_sample_inputsr*  r   r,   UserDefinedTritonKernelr  rt   codegenrO  rK  compile_to_modulecallr   rM  rN  rO  r5  r  r4  r:  rf   r\  r  precomputed_replacementsinv_precomputed_replacementsr-   resetpatch)rt  r  user_defined_kernelsr   r  compileds   `     r|   codegen_with_cpp_wrapperz&GraphLowering.codegen_with_cpp_wrappero	  s    IIID#L }}55 ==<<+0("oo "%b"*D*DE370!" ,&9&;44[A8>||~%}}  <<>) $) 1138813[[11HHJ *[)* $( $$**,''--/''--/  99??A  ==CCE\\#De"LM *<<>* * <<>!!* ** *s   #	IIIIc                    ddl m} t        j                  dd      5   || j                        | _         ddd       y# 1 sw Y   yxY w)z
        (Re)initializes the scheduler member.  When initializing the scheduler, no CUBIN
        files should be generated (to avoid biasing any benchmarks and pessimizing
        fusion decisions).
        r*   )	Schedulerztriton.store_cubinFN)rT  r  r+   r  r*  )rt  r  s     r|   _update_schedulerzGraphLowering._update_scheduler	  s;     	)\\.6 	8&t7DN	8 	8 	8s	   >Ac                L   t        dd      5  | j                          | j                          t        j                  j                  | j                  | j                  j                         | j                  j                  |        | j                  j                          t        j	                  dt        j                  j                         | j                  j                  | j                         }| j                  j#                          |cd d d        S # 1 sw Y   y xY w)NzGraphLowering.codegenTlog_pt2_compile_eventzFFinished codegen for all nodes. The list of kernel names available: %s)r   r  r  rf   r  draw_orig_fx_graphrc  rT  r   r<  push_codegened_graphr  r  r\  rm  generater  pop_codegened_graph)rt  r   s     r|   r  zGraphLowering.codegen	  s    1N 	""$""$GG&&t||T^^5I5IJ2248NN""$IIX00
 &&//0A0ABF113!	 	 	s   DDD#c                h   t        dd      5  |j                  | _        |j                  | _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        | j                          | j                  j                          ddd       y# 1 sw Y   yxY w)a  
        This is a more compact version of the `codegen()` above
        where we codegen this graph as a subgraph of some parent
        graph. The parent graph is passed as an argument: the
        intention is to inline codegening of the subgraph in
        the parent graph's wrapper code (including the generated
        kernels). The wrapper code is not finalized (via `.generate()`
        call), as this will be done in the parent graph's `codegen()`.
        zGraphLowering.codegen_subgraphTr  N)
r   r<  r;  rK  r#  r$  r%  r  rT  r  )rt  parent_graphs     r|   codegen_subgraphzGraphLowering.codegen_subgraph
  s     :RVW 		% , 9 9D*55DO+77D , 9 9D+77D+77D""$NN""$		% 		% 		%s   BB((B1c                    d}g }g }| j                   j                  D ]N  }|j                         }||z  }|j                  ||dz  f       |j                  ||j	                         f       P |||fS )Nr   r  )rT  r   get_read_write_buffers_sizesr   get_estimated_runtime)rt  total_bytesnode_countsnode_runtimesr   	num_bytess         r|   r  zGraphLowering.count_bytes
  s    
 NN(( 	GD99;I9$Ki1n56  $(B(B(D!EF		G K66r~   zCallable[[str], None] | Nonesave_output_codec                l    t        dddd      5  | j                         cd d d        S # 1 sw Y   y xY w)NzGraphLowering.compile_to_modulecode_genT,inductor_code_gen_cumulative_compile_time_us)
phase_namer  dynamo_compile_column_us)r   _compile_to_moduler|  s    r|   r  zGraphLowering.compile_to_module.
  s9    -!"&%S	
 	- **,	- 	- 	-s   *3c                <   | j                   r| j                         n| j                         \  }}t        |t              r| j                  |      }n*t        |t              r|}nt        dt        |             |j                  J t        |j                         t        j                  d|j                         t        j                  d|j                         t        j                   r(t#        d|j                   t$        j&                         t        |t              rut(        j                  j+                  |j                         t(        j                  j-                  t.        j0                  j3                  |j                        d   dz          |S )Nz Unrecognized wrapper code type: Output code written to: %szCompiled module path: )filer   .debug)rK  r  r  r   rd   _compile_to_module_linesr0   NotImplementedErrorr  __file__ru   r  r  rr   r  r+   benchmark_kernelprintr{  stderrrf   output_coder  ospathsplitext)rt  r<  r  mods       r|   r  z GraphLowering._compile_to_module7
  s/    04/?/?D))+T\\^ 	a l$45//=C&;<C%243E2FG  ||'''%		.=93<<H""*3<<.9

Kl$9:GG-GGLL))#,,7:XEF
r~   c                   ddl m} t        j                  j                  rw| j
                  j                  j                         }|j                  dd      }d|z   | j
                  j                  j                         z   dz   }|j                  z   _
        t        j                  t        j                  j                         t        j                  dj                         t        j                          }t#        j$                  |j                         	 j&                  D cg c]  \  }}||j(                  f }}}|j+                  j                        \  }	t        j                  d	       t,        j                  j/                         t,        j                  j1                  t2        j4                  j7                        d
   dz          t9        dfdfd       t=        dd      5  |j?                  |	|i | j@                  | jB                  | jD                        }
d d d        |	| _#        | _$        || _%        t        jL                  r4t        jN                  r$
jQ                         }|
jS                  |dd       
S c c}}w # t:        $ r t9        dfd        w xY w# 1 sw Y   xY w)Nr*   )PyCodeCachez"""z\"\"\"z&r"""
Compile-time auto-tuning block: 
z"""
zOutput code: 
%s)coder  r   r  inductor_output_codec                 H     t         j                  j                         dS )N)filename	file_path)r  r  abspath)r  s   r|   r	  z8GraphLowering._compile_to_module_lines.<locals>.<lambda>
  s     $!#!6 r~   c                      j                   S rw   r  r  s   r|   r	  z8GraphLowering._compile_to_module_lines.<locals>.<lambda>
      <#5#5 r~   )
payload_fnc                      j                   S rw   r  r  s   r|   r	  z8GraphLowering._compile_to_module_lines.<locals>.<lambda>}
  r  r~   zPyCodeCache.load_by_key_pathTr  )linemapattrs)timesrepeat)*	codecacher  r+   r  r  r<  kernel_autotune_defsgetvaluereplacekernel_autotune_callsr'  r   r  rr   r  rU   inductor_meta_from_configrV   begin_compileline_maprv  writerf   r  r  r  r  r  r   r  r   load_by_key_pathr-  r0  r1  r]  r^  r_  benchmark_harnessprofile_bandwidth_outputget_argsbenchmark_compiled_module)rt  r<  r  r  tuning_codeinductor_metaline_nor   r  r  r  rz   r  s    `          @r|   r  z&GraphLowering._compile_to_module_linesV
  s    	+==11#'#4#4#I#I#R#R#T #7#?#?{#S 7&' ##99BBDE 	  "-|/A/A!AL))5**<+=+=>1<3E3EF&@@B**=|?Q?QR	 &2%:%:!GT $**+G  $)),*<*<=IC!!">EGG%GGLL))$/2X=> & 6 8PTU 
	..nn.. 44	 / 	C
	 $##(G(G<<>D))$a)B
[  	&5
 	 
	 
	s+   J" .JBJ" ?:K J" "J= K	c                   g }t        j                  d      }t        j                  d      }|D ]  }t        |t        j                        r*|j                  | j                   dt        |              Gt        |t        j                        r*|j                  | j                   dt        |              |j                  |j                                 |S )Nr   _none_shape)
rn  ro  r   r,   NoneAsConstantBufferr   rJ  rQ  rD   r  )rt  r   namesshape_counternone_counterr   s         r|   _get_output_nameszGraphLowering._get_output_names
  s    !* q)! 	.D$ 7 78		{%\0B/CDED"":":;		{&m1D0EFGT]]_-	. r~   c                8    | j                  | j                        S rw   )r  r   r|  s    r|   get_output_nameszGraphLowering.get_output_names
  s    %%d&8&899r~   c                   || j                   v xrh | j                   |   j                         dk(  xrF t        | j                   |   j                               dk(  xr t	        | j                   |         dk(  xs || j
                  v S )Nr*   r   r  )r  r  r   r  r?   r"  r  s     r|   is_unspec_argzGraphLowering.is_unspec_arg
  s     D%%% B!!$'113q8BD%%d+44671<B   1 1$ 78EA	3
 T222	3r~   )NNNFFNNFFFNNNNNNFN)(ru  torch.fx.GraphModuler  zSequence[object] | Nonerv  zShapeEnv | NonerP  
int | NonerK  r  rO  r  r  zbool | Noner?  z1Callable[[list[ir.ExternKernelNode]], Any] | Noner  r  r  r  r  r  r+  zdict[str, int] | Noner  
str | Noner  r  r  zGraphLowering | NonerJ  r  r  zSequence[int] | NonerL  r  r
  z3Callable[..., dict[Any, Callable[..., Any]]] | Noner   r,  )r   r,  )r  torch.Tensorr   z1tuple[Sequence[int | Expr], Sequence[int | Expr]])r  r  r   z)tuple[list[sympy.Expr], list[sympy.Expr]])r   zLir.TensorBox | ir.StorageBox | ir.Buffer | WorkspaceArg | ir.TorchBindObjectr   zSequence[Expr])r   z-ir.Buffer | WorkspaceArg | ir.TorchBindObjectr   r	   )r
   z)torch._inductor.ir.IRNode | device | Noner  r.   r   r  )T)r  rp   r  r  r   rV  )r   torch.device)r
   r  r   Iterator[None]r"  )ru  rm   r  r  r   r  )rJ  r0  r   r0  )ru  r  r  zlist[torch.Tensor]r  r0  r   r  )r   zOrderedSet[Node])rJ  r0  r   r,  )r
   r  r   r,  )r   z,torch._subclasses.fake_tensor.FakeTensorMode)r  r0  r   z4ir.TensorBox | ir.Buffer | ir.TorchBindObject | None)r  
sympy.Exprr   r,  )r  r0  r   z-ir.TensorBox | ir.Buffer | ir.TorchBindObject)r  r0  r   ztorch.dtype)r  r0  r   z
int | Expr)rz   r   r   r   )r   zir.Operationr   r0  )r  z	ir.Bufferr  r  r   r0  )r!  	list[str]r   r0  )r-  r+  r   r,  )rJ  r0  r   r  )rJ  r  r  r   r   r0  rw   )r  r   rJ  r  r   rF   )rJ  r0  rQ  ztorch.device | Noner   r0  )r   r0  rz   ztuple[object]r{   dict[str, object]r   zExpr | TensorBox | None)r   rg   rz   r   r{   dict[str, Any]r   r   )r  r  r   r  )r   r0  rz   z	tuple[()]r{   r#  r   zLConstant | TensorBox | ShapeAsConstantBuffer | ir.Subgraph | TorchBindObject)r   r   rz   r   r{   r   r   r   )r   ztorch.fx.node.Targetrz   z"tuple[torch.fx.node.Argument, ...]r{   r#  r   r,  )r   r   )r   r   )r  r   r  
tuple[Any]r  r$  r  r%  r  r$  r   r,  )r  r   r   r  )r  r   r   r,  )r  r   r   object)r  r   r]  zOrderedSet[sympy.Symbol]r   r,  FNNN
r  r  r  r  r  zPythonWrapperCodegen | Noner  zGraphPartitionSignature | Noner   r,  )r  r  r   r,  )r   z)tuple[ValueWithLineMap, ValueWithLineMap])r  r   r   r,  )r   zVtuple[int, list[tuple[BaseSchedulerNode, int]], list[tuple[BaseSchedulerNode, float]]])r   CompiledModule)r<  rd   r   r)  )r   r   r   r"  )r   r"  )rJ  r0  r   r  )J__name__
__module____qualname____annotations__r	  r{  r  r  r  r  r  r  r  
contextlibr   r  r  staticmethodr  r  r  rY  r  r  propertyr  r  r  r  r
  r  r  r  r  r"  r.  r2  r5  rB  rF  r?  rU  typing_extensionsoverrider   r  r  r  r  r   r  r  r  r  r  r  r,  rP  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  __classcell__ry  s   @r|   r   r   d  s8   ""
 37%)#!"&"!$48)-(,-104 MQ+TC TC 0TC #	TC
 TC TC TC  TC!TC TC TC TC 2TC 'TC  &!TC" +#TC$ %TC& .'TC( )TC* K+TC, 
-TCl1# # 	:# J	2## 
#*EAE	EM9M  M 
	M<:4 ( ( ^ ^@
 
 +
 	

 

6>@A
D  	=$FQQ	6Q8,
8& FK (
	
 :	
&&Rkk k "	k
 
!kZ ]8  ]8~ 5 54747 47 "	47
 
V47l j
$j
 1j
 "	j

 
j
  j
X  $ $ $ $I:I: I: #	I:
 I: #I: 
I:V 6 6#$]~U;U;3KU;	U;nQ "$(;??C#W#W "#W 9	#W
 =#W 
#WJ]0>]0	]0~w"	2w"r	8&%*7
7" 6:29->G,G	GR:3r~   r   c                  L     e Zd ZdZd fdZ	 	 	 	 d	 	 	 	 	 	 	 	 	 d fdZ xZS )r  z
    Mostly a helper class for the subgraph lowering. The main goal is to call
    init_wrapper_code with the subgraph related arguments.
    c                2    || _         t        |   |i | y rw   )r  r  r	  )rt  r  rz   r{   ry  s       r|   r	  zSubgraphLowering.__init__
  s    $)&)r~   c                f    t         |   d| j                  | j                  j                         y )NT)r  r  r  )r  r  rJ  r  r<  )rt  r  r  r  r  ry  s        r|   r  z"SubgraphLowering.init_wrapper_code
  s.     	!)) $ 8 8 	" 	
r~   )r  r   rz   r   r{   r   r   r,  r'  r(  )r*  r+  r,  __doc__r	  r  r3  r4  s   @r|   r  r  
  sU    
* "$(;??C

 "
 9	

 =
 

 
r~   r  )rz   r   r{   r   r   r,  )r   r!  r   ztorch.dtype | None)r   r   r   r  )r   rm   r   r0  r   z,Tensor | torch._C.ScriptObject | GraphModule)r   rn   r   dict[Node, tuple[int, ...]])r   r9  r   zdict[Node, object])r   rn   r   r9  r   r,  )r   r%   r   r  )
__future__r   r.  r   rn  loggingrw  r  r  r{  rH  r1  collectionsr   r   typingr   r   r   r   r	   r   torch._loggingtorch.fxr
   r   torch._decompr   torch._dynamo.utilsr   r   "torch._library.fake_class_registryr   torch._library.opaque_objectr   r   r   torch._library.utilsr   r   r   torch._prims_commonr   r   torch._subclasses.fake_tensorr   torch._utils_internalr   %torch.fx.experimental._backward_stater   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr   r   r   r    r!   r"   r#   r$   torch.fx.noder%   torch.fx.passes.reinplacer&   torch.utils._mode_utilsr'   torch.utils._ordered_setr(   torch.utils._sympy.numbersr)   r  r+   r,   r-   codegen.commonr.   r/   r0   r1   r2   r3   r4   r5   excr6   r7   r8   r9   fx_utilsr:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   r  rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   runtimerU   runtime.autotune_cacherV   r  rW   rM  rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   virtualizedre   rf   collections.abcrg   rh   ri   rj   typesrk   torch._higher_order_ops.effectsrl   rm   torch.fx.graphrn   codegen.wrapperro   dependenciesrp   rT  rq   r)  torch._inductor.codecacherr   	getLoggerr*  r  _logginggetArtifactLoggerr  r   r   ro  rR  r>  )torch._inductor.fb.triton_kernel_metadatart   torch._inductor.fb.utilsru   r   r   r   r   r   r   r   r   r  r   r  rx   r~   r|   <module>rb     s&   "      	 	 
   # % / /        , 4 ? 
 ; 7 5 : ? L	 	 	  1 / / - ! !	 	 	  %       $ 8 &    ( FF ;$$5!,"77N 5 g!00<Hyy~~*9??, 6 9(
	!1$50V/V/+FV/	V/r*S%3EHH(( S%3lJ
} 
r~   