
    9j                      d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' d dl(Z)d dl*Z)d dl+m,c m-Z. d dl/m0Z0 d dl)m1Z1 d dl2m3Z3 d dl4m5Z5m6Z7m	Z8m,Z9 d dl:m;Z< d dl=m>Z> d dl?m@Z@ d dlAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZL d dlMmNZNm6ZO d dlPmQZQ d dlRmSZSmTZTmUZUmVZV d dlWmXZXmYZYmZZZ d dl[m\Z\m]Z]m^Z^m_Z_m`Z` d dlambZb d dlcmdZdmeZe d dlfmgZgmhZhmiZimjZjmkZkmlZl d dlmmnZn d dlompZpmqZqmrZrmsZsmtZtmuZumvZvmwZwmxZxmyZy d dlzm{Z{ d d l|m}Z} d d!l~mZ d d"lmZ d d#l*mZ d d$lmZmZ d d%lmZ d d&lmZ d d'lmZ d(d)lmZmZ d(d*lmZ d(d+lmZ d(d,lmZ d-d.lm6Z6mZmZ d-d/lmZmZ d-d0lmZ d-d1lmZ d-d2lmZ d-d3lmZ d-d4lmZmZ d-d5lmZ d-d6lmZ d-d7lmZmZ d-d8lmZ d-d9lmZ d-d:l,mZmZmZmZmZmZmZmZmZ d-d;lmZ er"d d<lmZmZmZ d d=lfmZ d d>lmZ d d?lmZ d-d@lmZ  e"dA      Z edB      Zes e6j                         s	ddCZddDZnd dElmZmZ er/d dFlmZmZmZ eee   gee)j                     f   ez  ee   z  ez  Z G dG dHej                        Ze G dI dJ             ZddKZddLZ eث       Zeڐj                  Zeڐj                  Zeڐj                  Z e	j                  e      Ze)j                  j                  edM      Ze)j                  j                  edN      Ze)j                  j                  edO      Ze)j                  j                  edP      Ze)j                  j                  edQ      ZddRZddSZddTZddUZ ej                  d      ddV       Zej                  ddW       ZddXZ	 	 	 	 	 	 	 	 ddYZ	 d	 	 	 	 	 dd[Z	 	 	 	 	 	 dd\Z	 	 d	 	 	 	 	 	 	 dd]Zddd^Z	 	 	 d	 	 	 	 	 	 	 	 	 dd`ZddaZ	 	 	 	 ddbZ	 	 	 	 	 	 ddcZ	 d	 	 	 	 	 	 	 dddZ	 d	 	 	 ddeZej                  ddf       Z  G dg dhe$dZi      Z G dj dke#      Z	 d	 	 	 	 	 	 	 	 	 ddlZ edmn      	 d	 	 	 	 	 	 	 	 	 ddo       Z G dp dq      Z G dr dse      Z G dt due      Z	 d	 	 	 	 	 	 	 	 	 	 	 ddvZ	 	 	 	 	 	 ddwZ		 ddxdxdxdy	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddzZ
dd{Z	 	 	 	 	 	 	 	 dd|Z	 d	 	 	 	 	 	 	 dd}Zedf	 	 	 	 	 	 	 	 	 dd~Z ed       Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZddZ	 	 	 	 	 	 	 	 ddZddZ	 	 	 	 ddZ ed_       G d d             Z	 	 	 	 ddZedZf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZef	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 ddZedddZdf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 ddZedd	 	 	 	 	 	 	 	 	 	 	 	 	 ddZedd	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ	 	 	 	 	 	 	 	 ddZ 	 	 	 	 	 	 	 	 ddZ!ddZ"	 ddd	 	 	 	 	 	 	 	 	 ddZ#	 d	 ddZ$y)    )annotationsN)ABCabstractmethod)defaultdict)AbstractContextManager)	dataclass)currentframe)count)
attrgetter)AnyTYPE_CHECKINGTypeVar)Neveroverride	ParamSpecProtocol	TypedDictUnpack)mock)#min_cut_rematerialization_partition)fx)enable_python_dispatcher)compiled_autogradconfigloggingutils)common)get_interface_for_device)wrap_compiler_debug)chromium_event_timedCompileEventLoggercountersdetect_fake_modedynamo_timedflatten_graph_inputsget_inputs_devicesget_metrics_context	GmWrapperlazy_format_graph_codeset_feature_use)aot_autogradr   )!unwrap_tensor_subclass_parameters)aot_export_moduleGraphOutputNamemake_boxed_funcSerializableAOTDispatchCompiler)	code_hashFxGraphCacheoutput_code_log)BoxedDeviceIndexcudagraphs_logformat_default_skip_message#log_cudagraph_skip_and_bump_counterPlaceholderInfo)CustomPartitionerFn)"create_mapping_pre_post_grad_nodessave_args_for_compile_fx_inner)CompiledAOTICompiledFxGraphCompiledFxGraphConstantsWithGmget_expanded_dimsindex_expanded_dims
OutputCode)	cache_dir)
	BoxedBoolcount_tangentsfresh_cacheget_all_devicesget_static_bw_input_idxs	InputTypeis_gpushould_assume_input_aligned should_use_remote_fx_graph_cachetensor_is_aligned)FakeScriptObject)is_opaque_type)trace_structured)compile_time_strobelight_meta)GraphModule)free_unbacked_symbolsSymExprPrinter)FakeTensorProp)_WaitCounter)
OrderedSet   )ShortenTraceback	SkipFrame)_use_lazy_graph_module)_PyTreeCodeGen)
has_triton   )r   distributed_autotunemetrics)get_wrapper_codegen_for_deviceinit_backend_registration)DebugContext)select_decomp_table)InductorError)joint_graph_passes)post_grad_passesview_to_reshape)pre_grad_passes)GraphLowering)get_device_typeIRNode)complex_memory_overlap)TritonBundler)	align_inputs_from_check_idxsclone_preserve_stridescopy_misaligned_inputs get_cloned_parameter_buffer_name%get_first_incompatible_cudagraph_node#maybe_get_suppress_shape_guards_ctxoutput_noderemove_unaligned_input_idxsshape_env_from_inputs)V)Callable	GeneratorSequence)_StrideExprStr)
OpOverload)Weights)ExternKernelNode_P_Tc                "    t         j                  S N)dynamo_utilsidentityattrs    Z/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/_inductor/compile_fx.pytime_and_logr      s    $$$    c                      y r    )argskwargss     r   log_optimus_to_scubar      s    r   )r   r   )FQNGraphInputNameGraphSignaturec                      e Zd ZdZdZdZy)FxCompileModer   r]   rW   N)__name__
__module____qualname__NORMAL	SERIALIZE
SUBPROCESSr   r   r   r   r      s    F IJr   r   c                  ,    e Zd ZU ded<   ded<   ded<   y)FxCompileConfigr   modebool	use_asyncuse_progressiveNr   r   r   __annotations__r   r   r   r   r      s    
Or   r   c                    d} t         j                  j                  |       }|t        t        j
                  dd      S d}d}|j                         j                  d      rd}|dd  }|j                         j                  d      rd}|dd  }	 |j                         }t        t        |   ||      S # t        $ r dd l
} |j                  t              }|j                  d	|| d
j                  t        d t        j                   D                           t         j                  j#                  |        t        t        j
                  dd      cY S w xY w)NTORCHINDUCTOR_FX_COMPILE_MODEFzprogressive+T   zasync+   r   z>Invalid value of %s for %s. Expected one of %s. Using default.z, c              3  2   K   | ]  }t        |        y wr   )repr.0xs     r   	<genexpr>z+_fx_compile_mode_default.<locals>.<genexpr>   s     HT!WHs   )osenvirongetr   r   r   lower
startswithupperKeyErrorr   	getLoggerr   errorjoinsorted__members__pop)namevaluer   r   r   logs         r   _fx_compile_mode_defaultr      s)   *DJJNN4 E}}33UEBBIO{{}/bc
{{})	ab	C}U3YPP Cg)		LIIfHm.G.GHHI		
 	

t}33UEBBCs   #B4 4BEEc                     ddigS )Nmax_autotuneTr   r   r   r   _get_progression_configsr      s     
 r   
perf_hintspre_grad_graphspost_grad_graphscudagraph_static_inputsinductor_metricsc                    t         j                  j                  j                         }t	        t        |             }|r|j                  s|S |j                  j                  S r   )torch_guardsTracingContexttry_getlistrangefw_metadatastatic_input_indices)	num_fixedcontextfixeds      r   get_static_input_idxsr      sM    
 mm**224Gy!"E'--333r   c                $   | j                   j                  d      d   }g }t        |j                  d   t        j
                  j                        s|j                  d   }n|j                  }|D ]  }t        |t        j
                  j                        rW|j                  j                  d      x}:t        |t        j                        r |j                  |j                                ~|j                  d         ||j                  d<   y )Noutputopr   valoriginal_output_strides)graph
find_nodes
isinstancer   r   r   Nodemetar   Tensorappendstride)gmrt   output_stridesoutput_node_argsr   r   s         r   record_original_output_stridesr     s    ((%%%215KNk&&q)588==9&++A.&++" 	(vuxx}}-..;3-!!#**,/ !!$'	( 3AK./r   c                    | j                   j                  dt        j                  j                  j
                        D ]0  }t        | |j                  d   j                        }t        |       2 t        |        y )Ncall_functionr   targetr   )r   r   r   opshigher_orderinvoke_subgraphgetattrr   r   )_recursive_record_original_output_stridesr   )r   nodesubgraphs      r   r   r     sh    ##599#9#9#I#I $  < 2tyy|2231(;	< #2&r   c           	        | j                   j                  dt        j                  j                  j
                        D ]  }t        | |j                  d   j                        }|j                   j                  d      D ]r  }t        t        |j                  d               D cg c]8  }t        |j                  d   |   t        j                  j                        r|: c}|j                  d<   t t        |        y c c}w )Nr   r   r   r   r   user_visible_output_idxs)r   r   r   r   r   r   r   r   r   r   lenr   r   r   r   *_recursive_record_user_visible_output_idxs)r   r   r   idxs       r   r   r   $  s    ##599#9#9#I#I $  = 2tyy|223NN---: 	D !TYYq\!235diil3/? 5DII01	 	38<=5s   (=Dc                 4    t        j                  t              S r   )dynamo_loggingget_step_loggerr   r   r   r   _step_loggerr   4  s    ))#..r   c                    t         j                  j                         rjt         j                  j                  j                  j
                  dk7  r8t         j                  j                         dk\  rt        j                  d       y y y y )Ntf32)   r   zTensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.)	r   cudais_availablebackendsmatmulfp32_precisionget_device_capabilitywarningswarnr   r   r   _warn_tf32_disabledr  9  si     	

!NN&&55?JJ,,.&8d	
 9 @ 	"r   c           
        t        | j                  d      D cg c]  \  }}|	 c}}      j                  t        | j                  d      D cg c]  \  }}|	 c}}             dfd}|j                  j
                  D ]F  }|j                  dk(  s|j                  }|j                  d      s|j                  d      sCt        | |      sP t        |      |      } t        |      |       }t        |t              r)t        |t              rb|j                  |j                  u rJ|j                  |j                  k(  r0|j                  |j                  k(  rt!        j"                  ||      r|j                  d      rdnd}	 ||j                  |	      }
|	 |
 }||_        t%        |||       j'                  |       I yc c}}w c c}}w )	a  
    In aot_export_module (make_fx), we create get_attr nodes with name prefix
    "_tensor_constant" and "_torchbind_obj". See Tracer.create_arg() in
    torch/fx/_symbolic_trace.py

    However, this might result in name collision if the original mod already
    has a different buffer with the same name.

    We resolve this potential name collision here by changing the target name
    with a new number post fix.
    Fremove_duplicatec                .   d}| j                   D ]  }|j                  dk(  s|j                  j                  |      s/t	        |j                        t	        |      kD  sQ|j                  j                  |      d   }|j                         st        |t        |            } D ]f  }|j                  |      st	        |      t	        |      kD  s-|j                  |      d   }|j                         sRt        |t        |            }h |dz   S )Nr   get_attrr]   )	nodesr   r   r   r   splitisdigitmaxint)r   prefixir   post_fixkeyexisting_keyss         r   find_smallest_iz0_resolve_name_collision.<locals>.find_smallest_iZ  s    KK 	2Dww*$)?)?)Gt{{#c&k1#{{008<H'')3x=1	2 ! 	2C~~f%s8c&k)"yy04H'')3x=1	2 1ur   r  _tensor_constant_torchbind_objN)r   zfx.Graphr  strreturnr  )rV   named_parametersupdatenamed_buffersr   r
  r   r   r   hasattrr   r   rM   real_objdevicedtyper   equalsetattradd)modr   r   r   r  r   target_name	gm_targetmodel_targetr  new_idnew_target_namer  s               @r   _resolve_name_collisionr)  F  s    "33U3KL)$LM #*;*;U*;*STYT3DTU   $/77j ++K))"!,,-=>3,/
;/3I2:k237L)%56|-=>!**l.C.CC  L$7$77OO|'9'99KK	<8  ))*<= #% 
 %RXXv6F!'1O)DKB3o.I$/- 	M Us   G
Gc                   ddl m}m} t        | |       i }| j	                  d      D ]   \  }}|||<    |||||j
                         " | j                  d      D ]   \  }}|||<    |||||j                         " |j                  j                  d      }	g }
|	D ]  }|j                  }||j                  v r!|j                  |   }|
j                  |       >||j                  v rE|j                  |   }|
j                  |       t        ||         |j                  t!        |      <   ||j"                  v sJ |
j                  d         ddlm} t)        |j                  j+                         j,                  d         }g }|j.                  }|j0                  }|j2                  }t5        |      D ]f  \  }}d }|t7        |      t7        |      z   t7        |      z   k  r(t9        |j                        }||v r||   }n	||v r||   }|j                  |       h  |||
|t;        j<                         d       }t?        |jA                               |j                  d	<   |S )
Nr   )_assign_attr	_AttrKindFr  )	attr_kindplaceholderr   )_unliftmutated_named_buffers)!torch.export.unflattenr+  r,  r)  r  	PARAMETERr  BUFFERr   r   r   inputs_to_parametersr   inputs_to_buffersro   r   rq   user_inputstorch.export._unliftr/  tuplert   r   buffers_to_mutateuser_inputs_to_mutateoutput_tokens	enumerater   r.   pytreetreespec_leafrV   values)r#  r   graph_signaturer+  r,  
state_dictr   parambufferplaceholder_nodeslifted_inputsr   	node_nameparameter_namebuffer_namer/  outputsmutated_outputsbuffer_mutationsuser_input_mutationsr;  r   outr   unlifted_gms                            r   _unlift_graphrO    s    ?C$IKJ++U+C 
e 
4))		

 ))5)A 
f!
4&&		

 ++}+=&(M " 'II	<<<,AA)LN  0/;;;);;IFK  -&z+'>? GG4[AB  ; ;;;;  &' -).rxx/C/C/E/J/J1/M)NGO&88*@@#11Mg& 
&S-1%&-A)BBSEWWW"388,D''(.--,T2u%
& 
K 1;;K;R;R;T0UK,-r   Fc              #    K   t        d | j                  j                  d      D              }t               }| j                         D ]@  \  }}||v st	        |t
        j                  j                        s0|j                  |       B |rl| j                  j                  dt
        j                  j                  j                        D ]*  }|j                  |j                  d   j                         , |E d {    y 7 w)Nc              3  4   K   | ]  }|j                     y wr   )r   r   s     r   r   z&_get_subgraph_names.<locals>.<genexpr>  s      55s   r  r   r   r   r   )rV   r   r   named_childrenr   r   r   rQ   r"  r   r   r   discardr   r   )r   skip_invoke_subgraphall_subgraph_namesfx_subgraph_names
child_namechild_moduler   s          r   _get_subgraph_namesrY    s      +5 5((---<5 + *4$&$5$5$7 . 
L ++
%((..1
 !!*-. HH''uyy'='='M'M ( 
 	;D %%diil&9&9:	;
 !  s   AD$D9BD?D Dc                F   t        ddd      5  t        j                  s| cd d d        S t        j                  }t        j                  }t        |       D ]'  }t        | |      }t        |d      }t        | ||       ) t        | |||      cd d d        S # 1 sw Y   y xY w)N_recursive_pre_grad_passesTpre_grad_pass_time_uslog_pt2_compile_eventdynamo_compile_column_usr   )
r$   r   use_pre_grad_passesadd_pre_grad_passesremove_pre_grad_passesrY  r   r[  r!  rh   )r   example_inputs
add_passesremove_passessubgraph_namer   new_subgraphs          r   r[  r[    s     
$"!8
 N
 ))N N //
5504 	5Mr=1H5hCLB|4		5
 r>:}MN N Ns   BA"BB c                8    d fd}t        ddd      5  t        j                  s cd d d        S t        t	                     }|D ]
  } ||        t               }t	        |d      D ]  }||vs ||        |cd d d        S # 1 sw Y   y xY w)	Nc                R    t        |       }t        |      }t        | |       y r   )r   _recursive_joint_graph_passesr!  )rf  r   rg  r   input_devicerT  s      r   _run_on_sub_graph_modulez?_recursive_joint_graph_passes.<locals>._run_on_sub_graph_module  s/    2}-4*L
 	M<0r   rj  Tjoint_graph_pass_time_usr]  F)rT  )rf  r  r  None)r$   r   use_joint_graph_passesrV   rY  re   )r   rT  rk  rl  old_subgraph_namesrf  out_gms   ```    r   rj  rj    s    
1 
'"!;
 
 ,,  ((;B@T(UV/ 	4M$]3	4 $B5 1eT 	8M$66(7	8 7  s   BAB;BBc                    t        ddd      5  t        j                  s
	 d d d        y t        |       D ]  }t	        | |      }t        ||        t        | |       d d d        y # 1 sw Y   y xY w)N_recursive_post_grad_passesTpost_grad_pass_time_usr]  )r$   r   use_post_grad_passesrY  r   rs  rf   )r   is_inferencerf  r   s       r   rs  rs  9  sz    	%"!9
 +
 **+ + 14 	@Mr=1H',?	@ 	\*+ + +s   A'4A''A0Tc                f   ddl m}m}m}m}m}  || |||      }	| |	       nd}
t        t        |	j                  j                        d   j                  d         D ci c]  \  }}|j                  | }}}g }g }i }| j                  j                  D ]V  }|j                  |v r|j                  |       #|j                  |   |k(  s6|j                  dk7  sF|j                  |       X |D ]B  }d|j                  z   } || |||
||j                        nd|       ||j                     ||<   D |ddd   D ]X  }|j                  r/|j                  D ]  }|j                  |   |k(  rJ d| d        >| j                  j!                  |       Z | j#                          |	|fS c c}}w )	a  
    This function takes an GraphModule input "gm".
    The gm will be split into 2 components,
      1) const_gm, which consists the subgraph of gm that can be constant folded.
      2) gm (being inplace modified,) which returns the graph after constant folding.

    If an additional "lifted_constants" argument is passed in, we will assume the gm has
    been lifted and run the transformation accordingly.

    When a "skip_folding_node_fn" callback is passed, we will skip constant folding on
    the nodes for which the callback returns True.

    const_output_index is a mapping of corresponding node name from gm to the
    output index of const_gm.
    Returns (const_gm, const_output_index)
    r   )CONST_MODULE_TAGMETA_TAG
MODULE_TAGreplace_node_with_constantrun_and_get_constant_graphNr	  r.  _FOLDED_CONST_znode: z user not empty.) torch._inductor.constant_foldingrx  ry  rz  r{  r|  r<  r8  r   r
  r   r   r   r   r   users
erase_node	recompile)r   skip_constructorlifted_constant_namesskip_folding_node_fnrx  ry  rz  r{  r|  const_gmconst_resultr   r   const_outputsto_erase_nodeto_replace_nodeconst_output_indexr   new_const_namens                       r   split_const_gmr  H  s   ,  *
35IH "7!>8:DL #,E(..2F2F,G,K,P,PQR,S"TQM  MO '99%""4(YYx $44M9Q  &	'   F)DII5" )0 ]49956		
 .;499-E>*F dd# &::ZZ Wvvh':5VvEU7VV5W HH%& LLN'''Es    F-c                Z   t         j                  j                  }t        |j                  j
                  |j                  j
                  |j                  j
                  |j                  j
                  g      }|D ]  }| j                  j                  d|      D ]  }t        |j                  j                  dd       t         j                        s8|j                  d   j                  t         j                   k(  sc|j                  d   j"                  j$                  dk(  s  y  y)Nr   r   r   r   TF)r   r   atenrV   mmdefaultaddmmbmmbaddbmmr   r   r   r   r   r   r  float32r  type)r   r  tf32_opsr   r   s        r   is_tf32_warning_applicabler    s    99>>DGGOOJJHHLL  		
H  HH''?6'J 	D499==5u||DIIe$**emm;IIe$++00F:	 r   c                r   t        d | D              }t        j                  r=t        j                  r-|s+t        j                  d       t        j                  d      S t        j                  j                  r+t        j                  d       t        j                  d      S t        j                         S )z
    For CPU backend, enable comprehensive padding causes some unit tests
    fail due to changing number of generated kernels. Skip for now.
    c              3     K   | ]>  }t        |t        j                        st        |j                  j
                         @ y wr   )r   r   r   rI   r  r  )r   ts     r   r   z6maybe_disable_comprehensive_padding.<locals>.<genexpr>  s/      "#Au||9Tqxx}}s
   A$Az!Skip comprehensive padding on CPUF)comprehensive_paddingz;Skip comprehensive padding for use_runtime_constant_folding)anyr   disable_padding_cpur  perf_hint_loginfopatchaot_inductoruse_runtime_constant_folding
contextlibnullcontext)rc  has_gpus     r   #maybe_disable_comprehensive_paddingr    s      '5 G !!f&B&B7>?||%88				9	9I	
 ||%88%%''r   c                ^    | s|rt        j                  d      S t        j                         S )zH
    graph partition does not support cpp_wrapper and aot_mode yet.
    F)graph_partition)r   r  r  r  )cpp_wrapperaot_modes     r   maybe_disable_graph_partitionr    s'     h||E22%%''r   c                   t               5  t        |      }|s;t        j                  j	                  d      } t        | |      j                  |  n\|st        j                         n t        j                  j                  |dd      }|5   t        | |      j                  |  ddd       ddd       |S # 1 sw Y   xY w# 1 sw Y   S xY w)z}
    If we can not detect fake mode from the context of inputs, create one.

    The created fake mode will be returned.
    Tallow_non_fake_inputs)r   r  N)r   r#   r   _subclassesFakeTensorModerT   	propagater  r  r   r  objectpropagate_dont_convert_inputs)r   rc  force_allow_non_fake_inputs	fake_modectxs        r   fake_tensor_propr    s     
"	# $^4	))88t8TI8N2I.88.I 3 &&(ZZ&&y2I4P 
  Pr	2PP#     s$   BCB:(C:C	?CCc                    t        j                  |       5  t        j                         cd d d        S # 1 sw Y   y xY wr   )r   r  get_config_copy)config_patchess    r   get_patched_config_dictr    s1     
n	% (%%'( ( (s   4=c               #     K   t         j                  r#t        t               d      5  d  d d d        y d  y # 1 sw Y   y xY ww)NF)dirdelete)r   force_disable_cachesrE   rB   r   r   r   with_fresh_cache_if_configr    s>     "" Y[7 		 	 		 	s   &A;AA Ac                      e Zd ZU ded<   ded<   ded<   ded<   ded	<   ded
<   ded<   ded<   ded<   ded<   ded<   ded<   y)_CompileFxKwargszBoxedBool | None
cudagraphsSequence[int]static_input_idxsr   is_backwardz
int | Nonegraph_idr  r  rv  bool | None
layout_optz.Callable[[list[ExternKernelNode]], Any] | Noneextern_node_serializerzBoxedDeviceIndex | Noneboxed_forward_device_index
fx_wrapper,Callable[..., dict[Any, Callable[..., Any]]]get_decomp_fnNr   r   r   r   r  r    sI      $$NJJ 77??r   r  )totalc                  *    e Zd Z	 d	 	 	 	 	 	 	 	 	 ddZy)_CompileFxCallableNc                     y r   r   )selfr   rc  compile_region_namer   s        r   __call__z_CompileFxCallable.__call__
  s     r   r   
r   rQ   rc  Sequence[InputType]r  
str | Noner   Unpack[_CompileFxKwargs]r  rA   )r   r   r   r  r   r   r   r  r  	  s<    
 +/	 , (	
 + 
r   r  c                   |j                  dd        |j                  dd       |j                  dd       |j                  dd        |j                  dd       |j                  dd       |j                  d	d       |j                  d
d        |j                  dd        |j                  dd        t        j                         5 }|j                  t        j
                  j                  j                                |j                  t        t        j                               |j                  t        j                  dddddd             |j                  t                      |j                  t                      t        j                   d|d           t#        t$        d      | |fd|i|cd d d        S # 1 sw Y   y xY w)Nr  r  r   r  Fr  r  r  rv  r  r  r  compile_fx_innerinductor_compileTcompile_inductor#inductor_cumulative_compile_time_us)
phase_namer^  log_waitcounterwaitcounter_name_overrider_  )r  inductor)compiler_namer  )
setdefaultr  	ExitStackenter_contextr   r   _python_dispatch_disable_current_modesrZ   dynamo_configuse_lazy_graph_moduler   r$   r  rb   r!   pt2_compiler   _compile_fx_inner)r   rc  r  r   stacks        r   r  r    s    lD)
)2.
mU+
j$'
mU+
lE*
ne,
2D9
lD)
.5 
			 
5EKK88OOQR2=3V3VWX%%"-&* $*<)N		
 	689LN+&&}-	
 P"#4JO
 !4
 	
'
 
 
s   	C0GGzcompilation time (in seconds)r   c                  ,- t         j                  }ddlm}  |       r&ddlm} |j                         }|j                          t        j                  j                  j                  j                          t        j                  | j                        dk(  r|st        j                  j                   j"                  s~ddlm} ddlm}	 |	j-                  |        t        j.                  j0                  j3                         }
t5        j6                  dd|
i|j8                         t;        | j<                        S |j?                  d	d
      }t@        jC                  d|       tE        ||      }tG        tI        tK        tM        | j                  jN                                    jP                  d   tR        tT        f      sJ d| j                          |jW                  d      &tY        t         jZ                  j\                        |d<   t         j^                  rta        | |fd|i| tc        jb                         }te               }tg                ti        d d tk        |       D        D              }tm        ddd      5  t         jn                   xrD t         jp                  xs |xr. | xr) |xr% t        j                  j                   j"                   }t         jp                  }|}ts        d|       tt        jC                  d||||t         jn                         tw        |      D ]L  \  }}tG        |t        jx                        s!t{        |j|                  j~                        sA||v sFd|_@        N d}d}d,d}t        |       }tc        j                         }|rt        j                  | ||||      \  },|v|\  }}tt        jC                  d|       |r)t        j                         }tt        jC                  d       t        j                  ||||||jW                  dd      |      \  },ntt        jC                  d       t        j                  j                   j"                  rp|J ,J t        j                          	 t        | ||fd|i|}|J t        j                         \  }}|j                  |       	 t        j                          n,,d   dk(  rA|J tt        jC                  d,,jW                  d d!      nd"       	 t        | ||fd|i|}nE,d   d#k(  r|J |J tt        jC                  d$       t        j                          	 t        | ||fd|i|}|J tc        j                         |z
  |_T        |\  }}||_U        ||_V        t        j                         \  }}|j                  |       	 t        j                          |t        |      ,d%<   |j                  ,d&<   tt        jC                  d'|       t        j                  |||||       n;,d   d(k(  sJ |J |J |\  }}tt        jC                  d)|       ||_U        ||_V        |J |} tG        | t              r|| _Z        ,,d   nd*-t5        j                  d+- ,xs i |,       t5        j                  d--|,r,jW                  d.      nd,r,jW                  d/      nd,r,jW                  d       nd0||1       ,t        d2-fd3,fd45       | j                  |||       t         j                  }!|!|!j                  |       } ddd       tt        jC                  d6tc        jb                         |z
         tt        j                  t        j                        rg }"t        d7   j                         D ]  \  }}#|j                  d8      }$t        |$      d9k  r|"j                  |d:d;d;d;|#g       =t        |$      d<k\  rd8j                  |$dd=       nd8j                  |$dd>       }%|%j                  d?      }&|&rDt        |$      d<k\  r6|$d=d \  }'}(})}*d8j                  |$dd=       }%|"j                  |%|'|(|)|*|#g       |$d>d \  }(})}*d8j                  |$dd>       }%|"j                  |%d:|(|)|*|#g        tt        j                  d@       tt        j                  dAj                  dBdCdDdEdFdG             tt        j                  dH       |"D ]9  }+tt        j                   dAj                  |+        tt        j                  dH       ; t        j                  j                  j                  j                           t               t        j                  dI|d   rdJndK dL|dM            S # t        t        f$ r  t        $ r3}t        |t                     j                  |j                        dd}~ww xY w# t        j                          w xY w# t        $ r3}t        |t                     j                  |j                        dd}~ww xY w# t        t        f$ r  t        $ r3}t        |t                     j                  |j                        dd}~ww xY w# t        j                          w xY w# 1 sw Y   sxY w)Nz
    Inductor API that compiles a single graph.

    If you change the argument list for this function, make sure you
    also update the call to save_args_for_compile_fx_inner below accordingly.
    r   )use_pipelined_autotuning)AutotuneProcessPool)CompileEventLogLevel)_LazyGraphModulezbackward no-op
compile_id)metadata	log_levelr  r   z&static input idxs compile_fx_inner: %szGinductor can only compile FX graphs which return a tuple/list, but got r  Nr  c              3  8   K   | ]  }||j                     y wr   )supports_caching)r   backends     r   r   z$_compile_fx_inner.<locals>.<genexpr>  s&      	#  	  	#s   c              3     K   | ]7  }t        |j                  t        j                  t        j                         9 y wr   )r`   r  r   r  r  r   r  s     r   r   z$_compile_fx_inner.<locals>.<genexpr>  s6      
  +V//1B1B
s   =?fx_codegen_and_compileT)r^  r  fx_cachezXFX cache status: use_cache=%s, local=%s, remote=%s, aot_mode=%s, force_disable_caches=%szFX cache key generated: %szUsing remote FX cacher  F)r  	constantszFailed to generate FX cache keycache_statebypasszFX cache bypass reason: %scache_bypass_reasonunknownz*FX cache disabled or key generation failedmissz,FX cache miss, compiling and saving to cachetriton_bundler_metatime_taken_nsz.Saving compiled graph to FX cache with key: %shitzFX cache hit with key: %sdisabledfx_graph_cache_)r  time_nsr  r  
componentszcache not enabled)r  cache_event_timer  r
  r  remote_cache_enabledlocal_cache_enabledartifactc                     d  ddS )Nr  jsonr   encodingr   )r  s   r   <lambda>z#_compile_fx_inner.<locals>.<lambda>t  s    -k]; &% r   c                 .    t        j                         S r   )r  dumps)
cache_infos   r   r  z#_compile_fx_inner.<locals>.<lambda>x  s    4::j#9 r   metadata_fn
payload_fnz%FX codegen and compilation took %.3fsaten_mm_info_   -?   )r  r  z$Overview info of inductor aten mms: z3{:<30} | {:<20} | {:<20} | {:<20} | {:<20} | {:<20}NameBMNKCountz----------------------------------------------------------------------------------------------------------------------------------ztorchinductor done compiling 	BACKWARDSFORWARDS graph r  )nrw   aot_compilation torch._inductor.autotune_processr  r  get_instancewarm_upr   	_inductorasync_compileCompiledTritonKernelscache_clearr   count_callsr   
_functorchr   bundled_autograd_cachetorch._dynamo.utilsr  torch.fx._lazy_graph_moduler  force_recompiler   CompileContextcurrent_compile_idr!   log_instant_eventPT2_COMPILEr/   forwardr  static_inputs_logdebugget_input_idxs_to_checkr   nextiterreversedr
  r   r8  r   r   rC   tritonr  	save_argsr;   timerK   ra   allrF   r$   r  fx_graph_cacher*   r   r<  r   rI   r  r  _is_inductor_staticr>   r	  r2   prepare_keyget_remote_cacheload_with_keyrm   begin_compiler  collectset_triton_bundlerX   rY   	Exceptionrd   r	   with_traceback__traceback__end_compile_time_taken_ns_fx_graph_cache_key_fx_graph_cache_debug_linesr  _save_graphr=   r  instanttry_add_pt2_compilerO   post_compilecudagraph_policywrap_outputisEnabledForr   INFOr"   itemsr  r   r   r   endswithr  formatr   ).r   rc  r  graph_kwargsr  r  r  pool_instancer  r  r  r  inputs_to_checkstartfx_graph_remote_cachebackends_support_caching	use_cachelocalremoter  inputmb_compiled_graphkey_inforemote_cacher  
start_timer  debug_linestriton_bundler  e	cache_keycompiled_graphpolicymm_table_datar   partsr   
is_batchedbatchmr  krowr  r  s.                                               @@r   r  r  C  s
    &&HI!H+88: 
OO!!77CCE 	  *a/  ''>> 	=@((,]]11DDF
,,"J/*66	
 rzz**'3'>'>?RTV'WDFWX-n>OPOd4 89:??BUDMR 
QRTRZRZQ[\R %-%.v}}/G/G%H\"&	
 !4	
 		
 IIKE<> " 	#
 *"-	
	# 	  
 d
 g@ +++ C&&?*?CC )C $$++BBB 	 %%&
I.		f''	
 ".1 	1HAu5%,,/5<<,,-**,0)	1 04
226	 \\^
%1%=%=NL/6&"Xz
 ##+ [		6<#/#@#@#BLII560<0J0J"  , 0 0 F'1-!: 		;<""99$,,,%%%
 ''),$:"#% )<	%
 #%! )444 "))+!'!33MB ))+
 :m#<#H$,,,II, "- NN#8)DE$:"#% )<	%
 #%! &&0$,,,'''IIDE''),$:"#% )<	%
 #%! )44437<<>J3N!0)1&	;8A!5@K!= "))+!'!33MB ))+".478K4L
01*;*J*JJ'IIF	R$$! m,555$000''''/$YII19=4=1<G9 ,,,*no61DN. *4)?J}%Z 	 	""k]+%2	
 	..#')3
u%7Az~~l3t  45(!' %	
  ! : 	##NI|L((#//?NOg@R II5tyy{U7JK %">288: 	BJCIIcNE5zA~$$c3S#u%EF ,/u:?388E#2J'sPR@TD'9:Jc%jAo!&rsq!Qxxcr
+$$dE1aE%BC  *1axxcr
+$$dCAq%%@A)	B, 	78AHHS#sG	

 	  	 CHHQJQQSVWXHHY	  
OO!!77CCELN'&}5;:
F Gj)*	, C %i0  #A|~6EEOO
 ))+.  #A|~6EEOO: %i0  #A|~6EEOO
 ))+ug@ g@s   C	kk0k5Dk<f?Akh&-<k*A+i%Fk?h
.hh

hh##k&	i"/.ii""k%j0=.j++j00j33k		kkc                  $    e Zd ZU dZded<   ddZy)_FxCompileStatr   r  codegen_and_compilec                     d| j                    S )Nzcodegen_and_compile: )r  )r  s    r   __repr__z_FxCompileStat.__repr__  s    &t'?'?&@AAr   N)r  r  )r   r   r   r  r   r  r   r   r   r~  r~    s      Br   r~  c                  r    e Zd ZU dZ ee      Zded<   dZded<   e		 	 	 	 	 	 	 	 	 	 d	d       Z
ed
d       Zy)	FxCompileza
    An FxCompile represents a mechanism that can turn a GraphModule into an
    OutputCode.
    z%dict[type[FxCompile], _FxCompileStat]_compile_statsNr  r  c                     y r   r   )r  r   rc  rd  rb  s        r   r  zFxCompile.codegen_and_compile  s     r   c                8    | j                   j                          y r   )r  clear)clss    r   _reset_statszFxCompile._reset_stats  s      "r   
r   rQ   rc  r  rd  r  rb  r  r  rA   r  rn  )r   r   r   __doc__r   r~  r  r   r  r   r  classmethodr  r   r   r   r  r    s}     =H<WN9W&**
  , '	
 ' 
  # #r   r  c                  2    e Zd Ze	 	 	 	 	 	 	 	 	 	 dd       Zy)_InProcessFxCompilec                !  45678 d|v r|d   J |d   }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }	|j                  d	d      }
t        j                  }|j                  d
d      }|j                  dd      }|j                  dt              }t	        d      j                         5  t        j                         5  t        j                  x},ddl
}t        j                  d|        |j                  |       t              r
t                t         d   j#                         }t%        j&                  t)        t%        j*                         d              t-               t.        j0                  d|rdnd d|        t3        j4                         }t6        j8                  j:                  j<                  j?                  ||dd       |jA                         8tC        dd 8fd       t        jD                  jG                  |       jH                  }|tK        |      }tM               tO        dd      5  t7        jP                         5  tS        |      }ddd       ddd       tU               tC        dd fd       t        jV                        5  tY              }|5  t[        |        ddd       t        jD                  j]                  |       t^        jE                  d!ta        d"ddd#             jc                  dddd$      5tC        dd% 5fd&       t        jd                  jf                  dk7  r~t6        jh                  jj                  jm                  jn                        }tq        t6        jr                  jD                  jt                  |      t6        jr                  jD                  _;        ty               }|j{                         r.t         d'   j}                         }t        j                  d|(       t        j                         r 	 t        d)t        t                     i*       ddd       t        jV                  |      5  t        |      5  t        |	|      5  d}d}d}d}|rt        j                  j                  rt        d, -      \  }}t        |g |||	||||d|
|.      }t        j                  |      5  t        j                  g       5  |	sJ d/       |j                          |j                         \  }}ddd       ddd       t        f||||	||||||r|j                  nd|r|j                  nd|||
|d0}t        j                         }|j                          t        j                  |      5  t        j                  g       5  t        j                         5   |j                  |  g } |j                  t               7|j                  D ]  }!t        |!t              rq|!j                         rat        t        |!j                                     dk(  r<| j                  t        7fd1|!j                         j                  D                     | j                  d        t        |       d}"tO        d2d      5  |j                  r_|j                  rS|j                  rJ |j                         d   j                  }#t        jD                  d3|#jc                  d4             n|j                  rOd5d6limj}$ |j                  sJ d/       |j                         \  }%}&t        jD                  d7|%j                         |&j                  r t        jD                  d8|&j                         d}'t        j                  r5|j                  t        j                        }'t        jD                  d9|'       tO        d:d      5  |$j                  ||%j                  |&j                  |'|j                  g t        j                  |j                  j                  |r|j                  j                  ng z         ;      }#ddd       n)|j                         }(|(j                  }#t        |(d<d      }"ddd       d6d4t        jd                  jf                  dk7  rt        j                  t6        jr                  jD                  j                               6t        j                  t6        jr                  jD                  j                        4tC        dd= 6fd>       tC        dd? 4fd@       4r,ty               }|j{                         r|j                  dA4       d})t        j                  t.        j0                        r{|j                         \  }*}+})t        xj                  |*z  c_~        t        xj                  |)z  c_        t        xj                   |+z  c_        t        j                  dB|*|+|)dC       t        j                  r>|j                         \  },},})t6        jr                  jD                  j                  |)       t6        jr                  jD                  j	                  |j
                  j                         |rMt        j                  j                  r0t        j                  st        jn                  j                  st7        jr                  j                  j                  | rd}-jn                  j                  D ]  }.|.j                  j                  dDd      }/|.j                  dEk(  sFt        |/t6        j                        r+t6        jr                  j                  j                  |/      sw|.j                  j                  dFd      x}-s n dG}0|-r	|0 dH|- dI}0n|0 dI}0|0t        jn                  _        |rt        j                  sut        jn                  j                  sZt!              }1|1rLdJ|1j"                   }0|1j                  j                  dFd      x}-r|0 dH|- dI}0|0t        jn                  _        t        j                  rt        #t        t$        t6        jh                  j&                  f      sJ t)        |#             t+        |#|j                  K      cddd       cddd       cddd       cddd       cddd       cddd       cddd       cddd       S |rUt        jn                  j                  s:ddLlm}2  |2t        jn                  j0                        t        jn                  _        | j2                  t)        |          xj4                  d5z  c_        t6        jr                  jD                  j6                  rt6        jr                  jD                  j8                  ot        t6        j:                  j<                  j?                               }3|j                  d      }|(|3t6        jr                  jD                  j8                  |<   tA        #|| t        jn                  j                  |jC                         t         d   |z
  |||| jD                  ||85|"64      cddd       cddd       cddd       cddd       cddd       cddd       cddd       cddd       S # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# t        $ r t        j                  d+       Y )w xY w# 1 sw Y   /xY w# 1 sw Y   
axY w# 1 sw Y   
fxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)MzS
        Generates the OutputCode from the GraphModule and example_inputs.
        r  Nr  r   r  Fr  r  r  rv  r  r  z/pytorch.wait_counter.actual_codegen_and_compiler   z3Sleeping for %s since sleep_sec_TESTING_ONLY is setr  i  ztorchinductor compiling r(  r)  r*  )save_dirr  c                     dddS )Nfx_graph_runnablestringr  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    / (% r   c                      S r   r   )runnable_graph_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    #5 r   r  additional_fake_tensor_propTr^  c                     dddS )Nbefore_post_grad_graphr  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>M  s    4 (% r   c                 ,     j                  ddd      S NFTprint_outputinclude_strideinclude_deviceprint_readabler   s   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>Q  s    2#4#4!&tD $5 $ r   rv  %szAFTER POST GRADr  r  colored)r  r  r  fast_sympy_printc                     dddS )Ninductor_post_grad_graphr  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>s  s     :$,) r   c                      S r   r   )inductor_post_grad_graph_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>w  s    'C r   graph_break)	overwritenum_graph_breakspt2_configs)extra_loggingzfailed to log pt2_configsc                    | j                   dk(  xrc t        | j                  t              xrG | j                  j	                  d      xs* t        | j
                  j                  dd       t              S )Nr  r  r   )r   r   r   r  r   r   r   rM   )r   s    r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s_    $''Z:O ;&t{{C8; !KK223CD X)$))--t*DFVW	 r   )r  )rc  	shape_envr  r  r  r  rv  r  is_const_graphr  r  z"AOT mode only supports C++ wrapper)rc  r  r  r  r  r  rv  r  r  const_wrapper_codeconst_kernel_codeconst_modulerd  r  r  c              3  @   K   | ]  }j                  |        y wr   )doprint)r   sps     r   r   z:_InProcessFxCompile.codegen_and_compile.<locals>.<genexpr>  s     )X1!))A,)Xs   zGraphLowering.compile_to_fnzOutput graph module: 
%s)r  r]   )AotCodeCompilerzOutput wrapper code: 
%szOutput kernel code:
%sz#Serialized Extern Kernel Nodes: 
%szAotCodeCompiler.compile)device_typeadditional_filesrunnerc                     dddS )N*inductor_provenance_tracking_node_mappingsr  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>O  s    (T,21 r   c                      S r   r   )r  s   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>S  s    /Y r   c                     dddS )N0inductor_provenance_tracking_kernel_stack_tracesr  r  r   r   r   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>W  s    (Z,21 r   c                      S r   r   )inductor_kernel_stack_trace_strs   r   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>[  s    /N r   inductor_provenancezGraph Metrics:
%s)num_bytes_accessednodes_num_elemnode_runtimesr   r.  stack_tracezWgraph with symbolic shapes inputs and config.triton.cudagraph_skip_dynamic_graphs=True.z Found from 
z,disabling cudagraphs due to incompatible op )filenamer  ) check_lowering_disable_cudagraph)r   rw   r+  rc   rU   guardr   preserve_rng_stater   sleep_sec_TESTING_ONLYrF  r   warningsleepr  r  r"   copysyssetrecursionlimitr  getrecursionlimitr   r   r^  ioStringIOr   _dynamorepro	after_aotsave_graph_reprogetvaluerO   r?  fx_graphr  rv   rg   r$   no_gradr  r   set_fake_modeget_cuda_device_contextrs  fx_graph_transformedpost_grad_graphs_logr)   r  traceprovenance_tracking_levelr   	tracebackget_graph_provenance_jsonr   r:   r/  _pre_grad_graph_id _inductor_post_to_pre_grad_nodesr'   in_progressr  r!   compilation_metric	is_fbcoder   r  r  rP  r  r  r  r  r  ri   set_graph_handlerset_extern_kernel_nodesruncodegen_with_cpp_wrapperr   r_   CachedMetricsHelperfreeze_runtime_assertsr^   graph_contextgraph_outputsrS   r   rk   has_tensor_outputr   rR   
get_strider   r8  
get_layoutr   _check_triton_bf16_supportr  r  r  codegenr   r3   	codecacher  extern_kernel_nodesr  compiler  dictfromkeyswrapper_coder  compile_to_modulecallr   r  r  dump_inductor_provenance_info_inductor_kernel_stack_trace
add_to_setinductor_metrics_logr]  count_bytesr  r  r  r  log_tlparselog_runtime_and_tensor_metalog_collective_schedule	schedulerr
  rD  cudagraph_skip_dynamic_graphsr  disable_cudagraphs_reasonr   any_is_symbolicr   r   r   rr   r   r   rQ   r  r<   torch._inductor.cudagraph_utilsr  device_node_mappingr  r  RECORD_GRAPH_EXECUTIONGRAPH_COMPILE_IDSr   r9  r:  r=   
get_deltasr  )9r  r   rc  rd  rb  r  r  r  r  r  r  r  rv  r  r  	sleep_secrF  inductor_countersfdr  r  cuda_contextprovenance_tracking_jsonmetrics_contextr  r  const_graphr  r  r  r   metrics_helperr   rM  compiled_fn_runnercompiled_fnr  r   kernel_codeserialized_extern_kernel_nodescompiled_moduler  	num_bytesr  r  r  r   meta_valdisablemaybe_incompat_noder  r  r  r  r  r  r  s9    `                                                  @@@@@r   r  z'_InProcessFxCompile.codegen_and_compile  s     |+\0J0VVV ,\ :
+7+;+;<OQS+T(,,]EB+//
DA(,,]EB'++L%@
**)--neD5t< 	 GSFVFV0G
 JKQQSw	++-w	 $:::	GI9 

9%)"-#% ( 4 9 9 ; !!#c&;&;&=t"DELN*"-;:> ?!
$ BMM))::B
T ;  "$ 6 GGR0
 I 1.A	$ B-T E ]]_ E 0^ DIEE 6b9
 	 + ?A6r:! O/NO,,R@$***)'+'+ $	 02/@/@!&#'#'%)	 0A 0, !!  D <<99Q>**DDRXXN - ;!OO11DD4 OO))J #6"7"..0'/'>'D'D'F$&99"&9I ##%	A, -s3J3L/M+o?AD 	*W3NCW .k8DW
 &*""%)"$(! 3 3 P P 4B.40H0 #0 ')"+!)$/!)/E%1$/'+#-&3#K ++K811"5  +P,PP{#)'@@B >*,=  &
 $2'% +%+A!- +'94F*00D 4E)//$!,$3)"//2 ")!<!<!> ,,.''.J--b1J )668J
 EII~.NPN**6 +,#(#6#6 <C *3 7$'$9$9$;$'(=cnn>N(O$PTU$U !/ 5 5$))X@P@W@W)X$X!" !/ 5 5d ;< /u5 *.&%5T > !>>e.>.>','8'88#8*/--/!*<*?*?K+11 ; + : : : N
 #^^B#(#4#4  D#4 9>8V8V8X5L++11 ;\=O=O  +00 / 5 5$={?P?P!" >B: 44$)$@$@AVAV$W !? !0 5 5$J$B!"
 ". 9QU" " />.E.E$)$0$6$6$/$5$5$B050A0A	6&)-,1,>,>,O,O 4? 1<0H0H0Y0Y57	-.**	6& /F /"	" ", /4.E.E.GO*9*>*>K18 /42.y>B BF>6:3||==BEIZZ!OO11OOQFB ;?**!OO11NN;7 )&) (Z )&) (O ;.A.CO.::< / : :$9$C!"
 %)M+88FCHCTCTCV@	>=22i?2-->-...@.,1106?2@1> )).3.?.?.A+1m--II-X OO))AA%//BWBWX #"MMGG & 6 6 ! A A!OO11AA>R&*$&HHNN 
&D'+yy}}UD'AH $= 8'1(ELL'I','<'<'L'LX'V (.2iimmM4.PP{P %
& #|&)0	k]"&MG)0	nG<C9
 # & 6 6 ! A A.STV.W+.(TUhUoUoTp&qG.A.F.F.J.J -t/  {   .5I\+b*Q@GAGG= (()' $(<(<=  -  ,	-  
  ,%0e>O>O aJ J J J[W W W WAw	 w	 w	F "!''*K*K = ! ; ; 9 ''T
3GG1LG --DD!OO11CCO%(!MM88KKM&
 $0#3#3J#?#/ !+ "OO11CCHM +#&99&113 ,/@@"&)00$'*4*B7'mJ J J J[W W W WAw	 w	 w	ZE EE E8O Or % A $?@Ay?A ?AL   X" "I> >AJ J J J J J J J[W W W W W W W W WAw	 w	 w	 w	 w	 w	s1  AA8)FAA#-}%}	}%:AA#~$}2	-E*~$}?7AA#AA A@9	-A"A@$~>%-~1~>A7A@$A@':<C%%!E&A: 2M%D%/	:8	A@	A@$
	A@9		AA	AA#%	AA88F%	:	A@!	A@$*	A@9	3	AA<	AA#	AA8}"}%%}/*AA#2}<7~$?~!	~$ ~!	!~$$~.)AA#1~;6~>>A@$"%%.*:1	A@:A@?A@@	A@$@A@@A@$@	A@9	@$A@-@)A@9	@0	AA@9AA@>AAA	AA#AAAAAA#A	AA8A#AA,	A(AA8A8ABNr  )r   r   r   r   r  r   r   r   r  r    sK    UU ,U '	U
 'U 
U Ur   r  c                @   t         t        j                  k(  rt               }nIt         t        j                  k(  rddlm}  |       }n$t         t        j                  k(  rddlm	}  |       }t        r8ddlm} ddlm}	 t        |	      sJ d        ||      }||j                  _        t"        rUddlm}
 ddlm}	 t        |	      sJ d       t'               }t               }||_         |
|||      }||j(                  _        |_        |j+                  | |||      S )	Nr]   )_DebugSerdeFxCompile)_SubprocessFxCompile)_AsyncFxCompile)_OutOfProcessFxCompilez7async is only valid with an out-of-process compile mode)_ProgressiveFxCompilez=progressive is only valid with an out-of-process compile mode)fx_compile_moder   r   r  r   compile_fx_extr&  r   compile_fx_subprocr'  fx_compile_asynccompile_fx_asyncr(  r)  r   _compiler  fx_compile_progressiver*  r   _optimized_compiler  )r   rc  rd  r  rb  schemer&  r'  r(  r)  r*  progression_configsfast_schemes                r   r  r    s    -...$&	M33	38%'	M44	4<%'5: &"89 	
E	
9 !( 	+ ;: &"89 	
K	
9 78 *+*=' '{F<OP 	!!5 "5F %%b./<XXr   c                d   g }t        |       D ]  \  }}t        |t        j                        s!t	        |j
                  j                        sAt               5  ||v rt        |      r
	 ddd       et        |      s
	 ddd       z	 ddd       |j                  |        |S # 1 sw Y   xY w)z
    This function runs at compile time, and generates a list of indices for which we
    might need to do a copy to preserve alignment requirements.
    N)r<  r   r   r   rI   r  r  rs   rL   rJ   r   )inputsr  ids_to_checkr  rk  s        r   r@  r@  /  s     Lf% 5%.ell''(02 	 %%*;E*B		 	
 /u5	 	
 6	 	A), 	 	s   B&3B&&B/	r   )r  placeholdersmutated_input_idxsc                    ddl m}	 t        j                  j                  rEt        j                  |	|||||||t        j                  j                  j                         	      nt        d d fd}
|
S )Nr   )cudagraphify_impl)device_indexstack_tracesr  rv  r  r9  r:  r  c                ~    't        j                         5   |       d d d         |       S # 1 sw Y   xY wr   )r   r  )
new_inputsr  cudagraphify_fnmodelr  s    r   r  zcudagraphify.<locals>.runt  sH    002 T-eZARST:&&T Ts   3<)r@  r  r  r   )torch._inductor.cudagraph_treesr<  r   rD  cudagraph_trees	functoolspartialr   r   r9  r:  )rB  r  r=  r>  r  rv  r  r9  r:  new_cudagraphify_implr  r  rA  s   ``         @@r   cudagraphifyrH  R  sr    
 }}$$#++!%%#%%1}}33FFH

 ,K' ' Jr   c                    t        j                  | j                         | j                         | j                  | j
                        S )z1
    Copy and input while preserving strides
    )r  r  )r   empty_stridedsizer   r  r  )r   s    r   static_inputrL  ~  s/     qvvx177188TTr   c                V    t        | |      } t        ||      }| j                  |       y)z=Index into expanded dimensions of both dst and src then copy_N)r@   copy_)dstsrcexpanded_dimss      r   index_expanded_dims_and_copy_rR    s'     c=
1C
c=
1CIIcNr   c                *  	
 t        |      }t        t        |            t        ||       t	        |t
              sJ t        |      D cg c]  \  }}|vrt        |      ng  c}}t        |      D cg c]@  \  }}t	        |t        j                        s|n|vrt        |      n|j                         B c}}t        t        |            D ]8  \  }\  }}t	        |t        j                        s$|vs)t        |   ||       : t        j                  j                          t        j                  j!                         }|j#                  t        j                  j%                                t        j                  j'                  |      5   | t                     ddd       |j                          t        j                  j%                         j#                  |       t        j                  j                          t        j                  j)                         
t        j                  j+                  
|d      5   | t                    ddd       t	        t
        t,        f      sft.        j0                  rd
fd}n1t3        t5                    D cg c]	  }|vs| c}	d	
fd}t7        ||t                     S c c}}w c c}}w # 1 sw Y   ExY w# 1 sw Y   xY wc c}w )zQ
    Assumes inputs[static_input_idxs[i]] are always the same memory address
    Nthread_local)streamcapture_error_modec                   t              t        |       k(  sJ t        t        |             D ]u  \  }\  }}}t        |t        j
                        s%t        |t        j
                        sJ |v r$|j                         |j                         k(  rgJ t        |||       w | j                          j                          	S r   )
r   r<  zipr   r   r   data_ptrrR  r  replay)
r@  r   rO  rP  rQ  r   inps_expanded_dimsr  static_inputsstatic_outputss
        r   r  zcudagraphify_impl.<locals>.run  s    }%Z8882;M:/AB3 K..c3 "#u||4!#u||444++<<>S\\^;;;
 2#sMJK LLN!!r   c                    D ]8  }|   }| |   }t        |t        j                        sJ t        |   ||       : | j	                          j                          S r   )r   r   r   rR  r  rZ  )	r@  r   rQ  rP  copy_indicesr   r[  r\  r]  s	       r   r  zcudagraphify_impl.<locals>.run  si    # V 23 7 o!#u||444-mC.@#}U	V
 LLN!!r   )r@  list[InputType]r   Callable[[list[InputType]], Any])r@  rV   ru   rp   r   r   r<  r?   r   r   rL  detachrX  rR  r   synchronizeStreamwait_streamcurrent_streamrU  	CUDAGraphr   r8  r   size_assertsr   r   rn   )rB  r7  r  check_input_idxsr   r   rQ  rU  r  r_  r   r[  r\  r]  s     `      @@@@@r   r<  r<    s    /v7HI)3#F,=>* 6#34fd###  'C !$+< <!"D  '	 C a.  ++ a		M $-S9K-L#M Paa&36G+G)-*<aOP
 
JJZZ F
uzz0023			6	" #d=!"#
	JJ++F3	JJ JJ  "E			%>		R 4tM234ntUm4(*	" 	", !]!34
CT8TC
		" 		" (-=z|LL]	*# #4 48
s1   K+AK1"K7L;	LL7LLc                <   t        | t              sJ |        t        |        t        j                  |xs i       }|j                  dd      st        j                  sd|d<   |j                  dt        j                  j                        }|r|j                  d      r"J d       i |dt        | j                        i}dd	lm}  ||      }|j                  d
d       }| j                   j                  dd       }t"        j$                  j'                  |      }t)        j*                  d      5  t"        j$                  j-                  |      5  t/        ddd      5  t1               5  t3        | |t5        j6                  ||      |      }	t        |	t8              sJ |	j:                  cd d d        cd d d        cd d d        cd d d        S # 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)Nr  FTr  zaot_inductor.output_pathz.pt2a
  The output path for aot_compile should not have an extension with .pt2 this is for specifying the output path for the .so in AOTInductor. If you would like to package the AOTInductor generated files into a pt2, please call `torch._inductor.aoti_compile_and_package`.r]   )maybe_aoti_standalone_configr  dynamo_compile_idcompile_fx_aot)r^  reset_event_log_on_exit)r  )inner_compiler  )r   rQ   r,   r  deepcopyr   r   r  r  output_pathr`  r1   coder   rk  r   r   r   r   r9  rw   set_aot_compilationcompile_contextr    r'   
compile_fxrE  rF  r<   r  )
model_example_inputs_ro  r  rq  rk  r  saved_compile_idsaved_compile_contextcompiled_artifactss
             r   rm  rm    s    fk*2F2* &f- &*]]>3GR%HN|U3v7H7H(,}% $$"F$7$7$C$CK ''/ 	
R	
/

&	&++(>

 41.AN+//0H$O{{':DA!MM889IJ	d#+%%&;<+ 	"&$(	
+ 	+ (#++'= *
 ,l;;;!**-+ + + + + + + + + + + + + + +sa   2 HG=!G(,AG	.	G(7	G= 	HGG(	G=(G1-G=4	H=H	HHc                   ddl m}m}	 t        ||       }
t	        | t        t        |
                  } t        j                  | d      }|rt        | |d        ||         |	|| |      \  }D cg c]  }||   	 }}t        |      }|j                  j                  ^ }}|j                  d   }t        |      D cg c],  \  }}t        |t         j"                  j$                        s+|. c}}|j&                  d<   g }t         j(                  j*                  j-                         }dgd|,|j.                  J |j.                  }t1        dt3        |      dz
        t5        t6                  }|j8                  }|J d}t3        |      dkD  rg t;        t3        |            D ]I  }|vrd ||<   |dkD  r(||   ||dz
     k(  r|dz  }n|j=                  ||          j?                  |       K |j@                  J t;        t3        |j@                              D ]  }||vsd |j@                  |<    |jB                  r|jB                  jD                  }tF        jH                  jK                  |dd      5   ||||||d||	      d d d        tL        jN                  rS dfd
}d|_(        |S c c}w c c}}w # 1 sw Y   8xY w)Nr   )%convert_conv_weights_to_channels_lastfreezerk  Tr  r   r]   r  )r  r  r  rv  r  r  c           
         D cg c]  }| |t        |         z
      }}| j                           |      S c c}w r   )minr  )r   r  args_newmax_offset_idxoptimized_functionpreserved_arg_indicesunwrapped_args_offsetss      r   wrapperz%fw_compiler_freezing.<locals>.wrapper  sT     +
 +C>,BCCD
 
 	

!(++
s   <)r   zlist[object]r  zSequence[torch.Tensor]))torch._inductor.freezingr|  r}  r&   rj  rA  rB  ri   decide_layout_optr  r#   r   r
  r   r<  r   r   r   r   r   r   r   r   params_flat_unwrap_subclassesr  r   rV   r  params_unwrapped_to_flat_indexr   r"  r   params_flatr   r   r   r  r  rw   r+  _boxed_call) aot_autograd_modelaot_example_inputsdynamo_modelnum_example_inputsro  r  r  forward_devicer|  r}  inputs_devicesr  	opt_modelindr  r  model_outputs_nodemodel_outputsr   r  r  tracing_contextparams_flat_unwrappreserved_indices_params_flatunwrapped_idxscurrent_offsetr  r  r  r  r  r  s                                @@@@r   fw_compiler_freezingr  8  s    W ((:<NON6$~./
 001CRVWJ+-?F-.@A'-($I$ >SSc,S1SS !34I '__22Q&++A.M#M2;QjEHHMM6R;67 $&mm22::<OSN"<<HHH,JJQ$6 7! ;<(23(9%(GG)))!"Q&%'"s-./ 	:A--(,"1%q5^A..Q2GG"a'N-11.2CD")).9	: **666s?6678 	6A5515++A.	6 && / ; ; P P			9&=t	D 

*/!'5!	


 	!!, , GNQ T;L

 

s   7K,K	3K	KKc                    t         j                  j                  r$t         j                  rt	        t        d             t         j                  j                  t         j                  j                  nt               xr t        j                  } | |  t         j                  j                  xr$ t        j                   xr t         j                   ddS )Nz0cpp-wrapper does not support graph partition yetT)ztriton.autotune_at_compile_timeztriton.autotune_cublasLttriton.cudagraphsztriton.store_cubin)
r   rD  r  r  r7   r6   autotune_at_compile_timer\   rw   r+  )r  s    r   get_cpp_wrapper_configr    s    }}F$:$:+'B	
 ==11= 	.. \/a//	  ,D(@$@MM$$ +%%%+***"	 	r   c                B   t         j                  j                         st        j                         S t        d t        |       D              }t        |      dk(  r1t         j                  j                  t        t        |                  S t        j                         S )zX
    Returns a cuda device context manager if there is a single device in the graph
    c              3  @   K   | ]  }|j                   d k(  s|  yw)r   N)r  r  s     r   r   z*get_cuda_device_context.<locals>.<genexpr>  s       8FKK64I8s   r]   )r   r   r   r  r  rV   rF   r   r  rA  rB  )r   cuda_devicess     r   r  r    s     ::""$%%''-7 8,R08 .L |! 	

$tL123 ##%r   c           
     n   t        |       }|5  t        ||       }t        | dt        t	        |                  } d d d        |j                  dd       }t        j                  1t        j                  dd      5  t        | |fd|d|cd d d        S t        t        j                  t              sJ t        j                  t        j                  j                  j                  d      5  t        j                  | |fd|d|cd d d        S # 1 sw Y   xY w# 1 sw Y   y xY w# 1 sw Y   y xY w)NT)rT  rk  static_lifetime_input_indicesr   r  r  )compilerr  )r  r&   rj  rA  rB  r   r   custom_partitioner_fnr   r$   r   r   r9   	__class__r   )r   joint_inputsr   r  r  r  s         r   partition_fnr    sO   
 +2.L	 	
 ,L"=*!%d>23
	
 7=jj'7! ##+&&1
 
	 7 $.K	 
	 
	 &668KLLL&&((22;;"&
 
	 // $.K	
 	
	 
	;	
 	
 
	 
	
	 
	s#   -D<D/D+DD(+D4c                f    t        |       }t        j                  |j                   }t	        |      S r   )rt   r=  arg_tree_leavesr   r   )rB  r  r  s      r   get_num_model_outputsr    s/    $U+**,>,C,CDM}r   c                    | j                   r1t        j                  j                  st        j                  ddi      S t        j                         S )Nr  T)r   r   rD  r  r  r  r  )r  s    r   cudagraph_annotation_contextr  	  s=      8 8||0$788!!##r   )frozenc                  D    e Zd ZU ded<   ded<   ded<   ded<   dZd	ed
<   y)CompilerConfigExtrarC   r  r  r  r4   r  forward_is_partitionedNr  cudagraphs_bwd_override)r   r   r   r   r  r   r   r   r  r  	  s$    M$$%%+/[/r   r  c                |   t        | t              r| j                  nd }t        t        j
                  j                        }d }||j                  d      x}|j                  i|j                  t        j
                  j                  k7  rBt        |j                        }|j                  rt        j                  d       nt        d       |j                  r/|j                  #|j                  s|j                  }t        d       t        t              }t!        d       }t        d      }t#        |||||      S )Ncudagraph_annotationz9enabling cudagraphs due to override_cudagraphs annotationz:disabling cudagraphs due to override_cudagraphs annotationzGdisabling cudagraphs for backward due to override_cudagraphs annotationF)r  r  r  r  r  )r   rQ   r   rC   r   rD  r  r   fwdr5   r  r7   r   bwdrA  _graph_counterr4   r  )r   gm_metar  r  
annotationr  r  r  s           r   create_compiler_config_extrar  	  s    $B4bgg$G 6==334J+/ 	";;'=>>ZK>>%*..FMM<T<T*T":>>2J~~##O 4P 
 ::>>&0nn#/Y N#H &d+N
 'u-% 75 r   c           
         |rt        dd  fd       t               }|j                  d   D cg c]M  }t        |t        j
                  j                  j                        r|j                  j                  d      ndO c}|j                  d<   t        |       }	t         t        t        |	            	       t        dd
  fd       t        j                  j                  j!                  |t#        |            }
t               }t$        j&                  rt)        j*                  |j                   }t#        |      }t        j,                  j.                  j1                         }|%|j2                  r|s|j2                  j4                  }nd}||k  sJ ||z   }||k  sJ t7        ||      D cg c]+  }t        ||   t        j
                  j                        r|- c}|j                  d<   ng |j                  d<   t9                t;        |j<                        5   | |t?        |
      |j<                  |j@                  ||jB                        }|sEt        |tD              r5|jF                  r)t#        |jF                        dkD  rd|jH                  _%        |cddd       S c c}w c c}w # 1 sw Y   yxY w)a#  
    Compile the forward graph of the given graph module.

    Args:
        gm: The graph module to compile.
        example_inputs: The example inputs to use for compilation.
        num_orig_model_outputs: The number of model outputs from the original dynamo graph.
        num_example_inputs: The number of example inputs from the original dynamo graph.
        compiler_config_extra: Extra configuration for the compiler.
        inner_compile: The inner compile function to use.
        is_inference: Whether this is an inference graph.
    r  c                     dddS )Nbefore_joint_graphr  r  r   r   r   r   r  z$compile_fx_forward.<locals>.<lambda>q	  s    ,$! r   c                 ,     j                  ddd      S r  r  r  s   r   r  z$compile_fx_forward.<locals>.<lambda>u	      r00"4  1   r   r  r   r  Noutput_stack_tracesr~  c                     dddS )Nafter_joint_graphr  r  r   r   r   r   r  z$compile_fx_forward.<locals>.<lambda>	  s    +$! r   c                 ,     j                  ddd      S r  r  r  s   r   r  z$compile_fx_forward.<locals>.<lambda>	  r  r   r   )r  r  r  rv  r  r]   T)&rO   rt   r   r   r   r   r   r   r   r   r&   rj  rA  rB  r/  r   num_fw_fixed_argumentsr   r   keep_output_strider=  r  r   r   r   r   num_mutated_inp_runtime_indicesr   r   r  r  r   r  r  r=   partition_mapsr  r   )r   rc  num_orig_model_outputsr  compiler_config_extraro  rv  r   argr  r   r  r  num_model_outputsr   original_output_start_indexorig_output_end_idxr   results   `                  r   compile_fx_forwardr  W	  s   , 		
 R {{1~.
  c588==#5#56 ]+.
)* ,NB?*2DnAU<VW		
 OO!!88C/E %R  ..0B0G0GH.--..6687#6#6|##CC ( +,'%):::: :<RR #&7777 8:MN?
-,ehhmm< ?
 :; ?A :;
 /r2	%&;&F&F	G 3E:,77*33%'<'K'K
 6?3%%F))*Q.AE!88>' U.
z?
 s   AJ180J6(A?J;;Kc                   ddl m} |5  t        |       }t        j                  rlt        j                  |j                   }t        |      D cg c]+  \  }}t        |t        j                  j                        r|- c}}|j                  d<   ng |j                  d<   t        |       }	|j                  }
|j                   t#        |j                         }
|j$                  j&                  rt)        |       }nt+        t-        |	            }t        j.                  rt        j0                  t3                     nt5        j6                         5  t9        |
      5   || |||
d|j:                  |j<                        cddd       cddd       cddd       S c c}}w # 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)a5  
    Compile the backward graph of the given graph module.

    Args:
        gm: The graph module to compile.
        example_inputs: The example inputs to use for compilation.
        compiler_config_extra: Extra configuration for the compiler.
        inner_compile: The inner compile function to use.
    r   )compile_lockr   NT)r  r  r  r  r  )torch._dynamo.convert_framer  rt   r   bw_outputs_user_visibler=  r  r   r<  r   r   r   r   r   rD   r  r  rC   r  r   rG   r   r   r  r  r  r  r  r  r  r  )r   rc  r  ro  r  r  r  r   r  r   r  r  s               r   compile_fx_backwardr  	  s    9	 *(_))"224F4K4KLM (6CCa/ C##$>? CE##$>?r" +55
 88D"#8#P#PQJ
 !77==/G/K $U5\ 2 %% 356++-		 )4	 !"3% .77+@+O+O	 	 	5* *C,	 	 	 	 	5* * *sU   AG
0F?CG
F5#F >	F5	G
G
 F)%F5,	G
5F>	:G

Gc           
         t        dd  fd       t        j                  dt        d ddd             t	         j
                        t        j                  j                  _        t        j                  j                  d	k(  rc j
                  j                  D ]J  }|j                  s|j                  t        j                  j                  j                  |j                  <   L t!         |       t        dd
  fd        S )Nr  c                     dddS )Nbefore_pre_grad_graphr  r  r   r   r   r   r  z%run_pre_grad_passes.<locals>.<lambda>"
  s    + 
 r   c                 ^     j                  ddd      dt         j                         z   S NFTr  z

 # graph id: r  idr   rv  s   r   r  z%run_pre_grad_passes.<locals>.<lambda>&
  9    600tD 1 
 b./
01 r   r  r  zBEFORE PRE GRADTr  r]   c                     dddS )Nafter_pre_grad_graphr  r  r   r   r   r   r  z%run_pre_grad_passes.<locals>.<lambda>A
  s    * 
 r   c                 ^     j                  ddd      dt         j                         z   S r  r  r  s   r   r  z%run_pre_grad_passes.<locals>.<lambda>E
  r  r   )rO   pre_grad_graphs_logr?  r)   r  r   r   r/  r  r   r  r  r
  r  #_inductor_pre_grad_node_stack_tracer   r[  )rv  rw  r   s   `  r   run_pre_grad_passesr  
  s    
 
1
 	
	 02&,,/?EOO,||--2LL&& 	D$$ %%II$))T	 (@F
1
 Mr   c                :   dfd}nt         }ddlm} |j                  dd      r| S |rIt	        j
                  |      5  t        | | t	        j
                  |      |      ||      cddd       S t        j                  ||      }t        d	 |D              r2t        j                  j                  j                  j                          t        j                  st        j                   rdd
lm}	 t        j                  }
t        j                   }t	        j
                  t'                     5  t)        j*                  |      5  t-        | t.              rt1        | |      n|}t3        |      } |	| |i |      5 \  }}}}}t5        ||t        j                  ||
|      |||      cddd       cddd       cddd       S t5        | |||||      S # 1 sw Y   mxY w# 1 sw Y   nxY w	 ddd       n# 1 sw Y   nxY wddd       I# 1 sw Y   RxY w)a@  
    Main entry point for compiling given FX graph.  Despite the fact that this
    lives in :mod:`torch._inductor`, this function is responsible for calling
    into AOT Autograd (and we will eventually get a callback to
    ``inner_compile`` to perform actual compilation.  In other words, this
    function orchestrates end-to-end compilation for the inductor backend when
    you use :func:`torch.compile`.

    NB: This function TAKES OWNERSHIP of the input ``model_`` and can potentially
    mutate it!  Make a copy if you need to preserve the original GraphModule.
    Nc                      S r   r   )decompositionss   r   r  z!compile_fx.<locals>.get_decomp_fnc
  s	    !!r   r   )CompilerBisectorr  pre_grad_graph)ro  r  ignore_shape_envr  )r  c              3     K   | ]8  }t        |t        j                        xr |j                  j                  d v  : yw))r   xpuN)r   r   r   r  r  )r   rr  s     r   r   zcompile_fx.<locals>.<genexpr>
  s8       	1ell#H(HHs   >A )_fakify_script_objects)r  r  ro  r  r  r  r  r  )r  zdict[Any, Callable[..., Any]])rc   !torch._inductor.compiler_bisectorr  disable_subsystemr   r  ru  rE  rF  r  r   r/  r0  AsyncCompilewakeupr  r  torch._export.non_strict_utilsr  r  rw   set_real_inputsr   rQ    _extract_inputs_from_exported_gmr#   _maybe_wrap_and_compile_fx_main)rv  rw  ro  r  r  r  r  r  r  r  cpp_wrapper_configfx_wrapper_configinputs_r  patched_mod	fake_argsr  s       `            r   ru  ru  M
  s   ( !	" ,
 C))*6FG\\.) 		:fll>:=I-!1$7		 		 %%/M     	%%2299;V..I#//"-- LL/12	o.	 fk2 1I$ 
 )1I'YG  L6"+"3"3%$6#4#
 &6"/(; 	 	 	< +#/ y		 		P  	 	 	 	 	 	sN   *G4H
5G<?-G&,	G<5	HG#&G/+G<3	H<H	HHc           
        | j                   j                  D cg c]-  }|j                  dk(  s|j                  j	                  d      / }}t
        j                  s+|D cg c]   }t        |t        j                        r|nd " }}t        d |D              rt        t               ||      D ]  \  }}}|
t        |t        j                        s%t        |t        j                        sJ |j                  |j                  k7  s[t        d| d|j                   d|j                   d       |S |S c c}w c c}w )Nr.  r   c              3  $   K   | ]  }|d u 
 y wr   r   )r   vs     r   r   z3_extract_inputs_from_exported_gm.<locals>.<genexpr>
  s     
.Q1D=
.s   zBDevice mismatch between fake input and example input at position #z: z vs zx. If the model was exported via torch.export(), make sure torch.export() and torch.aot_compile() run on the same device.)r   r
  r   r   r   r   r  r   r   r   r  rX  r
   r  
ValueError)r   rw  r   fake_inputsinpr   fir  s           r   r  r  
  s/    *,!%477m;S		eK  
 GR
?B:c5<<0Cd:
 
 
.+
..eg{OD 	JCQ~*R">!!U\\22299($\]`\aac99+T!(( 4cc 		 3
s   D8D8!%D=r  c               P   t        j                  t        ||||      }t        |       st	        | ||      S t        | t              r1t        | j                  j                  t              rt        | ||      S t        d |D              rt        | ||      S t        | |||||      S )z
    Part of compile_fx, called after patching configs.

    Ultimately we want to call _compile_fx_main, where the actual work happens.
    But under various conditions, various forms of wrapping might be needed
    around _compile_fx_main.
    r  c              3  R   K   | ]  }t        |t        t        t        f       ! y wr   r   r   r8  r  r   s     r   r   z2_maybe_wrap_and_compile_fx_main.<locals>.<genexpr>
  s     
G!:a$t,-
G   %'r  )rE  rF  r  graph_returns_tuplemake_graph_return_tupler   rQ   r   _codegenr[   handle_dynamo_export_graphr  r%   _compile_fx_main)rv  rw  ro  r  r  r  
compile_gms          r   r  r  
  s    $ ""'#)#/J v&&v
KK&+&:~, *&/:NN

G
GG $FOZHH #/ r   c               <    t        t        j                        5  t               5  t        j
                  j                  j                  t        j                  j                  dk(        5  t        j                  j                  j                         5  t        j                  rJ t        |      t!                |       }t#        j$                  |      	 	 	 	 	 	 	 	 d fd}t#        j$                  |d      }t'        t(        |      }t        j*                  rSt	        j,                         s?t#        j$                  t.         j0                  j2                  j4                        }	n't#        j$                  |d      }	t'        t(        |	      }	t7        d	      	 	 	 	 	 	 dfd
       }
t'        t(        |
      }
t9        |      xs  t        j:                  j=                  d      }t        j>                  j@                  jC                         xs t        j>                  jA                  |      }tD        jF                  rt        jH                  sddl%m&}  |        tO         tP              rtS         |       tU        jV                  dt        jX                        5  t[         |d|      \  }}tO        |tP              sJ ddl.m/}  ||      }|j`                  jb                  D ]  }|jd                  dk(  sd|jf                  vs# ti        |jj                        |      }tO        |t        jl                        r%|J |jo                  |d      |jf                  d<   }tO        |t        jp                        sts        tu        |            r8t        jv                  jx                  j{                  ||      |jf                  d<   tO        |t|              s||jf                  d<    	 ddd       t               }d jf                  v r jf                  d   |jf                  d<   d jf                  v r jf                  d   |jf                  d<   t        j                  j                         }|rt        j                  j                  nt        j                  }tE        j                  |      5  t        j                         5   |       5   |	||      cddd       cddd       cddd       cddd       cddd       cddd       cddd       S tE        j                  |      5  t        j>                  j                  |      5  t        j                         5  tU        jV                  dt        jX                        5  	  t        j                  ||
|	|t        d|tR        |
       |      cddd       cddd       cddd       cddd       cddd       cddd       cddd       cddd       S # 1 sw Y   xY w# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd        # 1 sw Y   *xY w# t        $ r}|j                         dd}~ww xY w# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)aQ  
    Main part of compile_fx, called after wrapping is done.

    Roughly speaking, here the steps will be:
    (1) apply pre-grad passes
    (2) create `fw_compiler` and `bw_compiler` functions out of `inner_compile`
    (3) call aot_autograd, which:
    - (3a) creates a joint graph with `decompositions`,
    - (3b) partitions it with `partition_fn` into fw and bw graphs (applying joint-graph passes),
    - (3c) calls `fw_compiler` and `bw_compiler` on those graphs (applying post-grad passes)
    - (3d) finally, assembles the fw and bw compiled functions back together and returns.
    r]   )r  c           
         t        j                  d      5  t        t              rt	              }nt	        |       }t        | |||      cd d d        S # 1 sw Y   y xY w)Nz$compile_fx.<locals>.fw_compiler_base)r  r  r  ro  rv  )r   r$   r   rQ   r  r  )r   rc  rv  r  r  ro  rv  r  s       r   fw_compiler_basez*_compile_fx_main.<locals>.fw_compiler_base2  sf    
 **+QR fk2-B6-J*-B2-F*)"+A'9*?"/!-  s   9AA#Fr  )r  r  ro  r  r  r  Tbackward)r  c                x    t        j                  d      5  t        | |      cd d d        S # 1 sw Y   y xY w)Nzcompile_fx.<locals>.bw_compiler)r  ro  )r   r$   r  )r   rc  r  ro  s     r   bw_compilerz%_compile_fx_main.<locals>.bw_compiler[  s?    
 ))*KL +"*?"/	  s   09r  )is_valid_aoti_model_nameunlift_effect_tokensselective_decompose)trace_jointr  r   )_detect_fake_mode_from_gmr  r   N)static_shapes dynamo_flat_name_to_original_fqnrl  )
fw_compilerr  inference_compilerr  r  keep_inference_input_mutationsr  r  rh   r  )r   rQ   rc  r  rv  r   r  rA   )r   rQ   rc  r  r  rA   )NrZ   r  r  r   r   r   r  preserve_node_metar   r  r  r/  r?  reset_provenance_globals_raise_error_for_testingr   r  rE  rF  r0   rA   freezingis_grad_enabledr  r  r  r  rP   r#   r  r  r   r   r   rw   r+  enable_autograd_for_aotr   r  r   rQ   r  functorch_configr  r  r-   torch._export.utilsr  r   r
  r   r   r   r   r   from_tensorScriptObjectrN   r  _libraryfake_class_registrymaybe_to_fake_objrM   rO  _C_is_any_autocast_enabled_DisableAutocastr  r  r  r   _disabletracingdynamo_commonr+   r  rX   remove_dynamo_frames)rv  rw  ro  r  r  r  r  r  r  r  r  r  r  r  r   r@  r  r   r   rN  disable_ampr   rr  r  r  s   ` `                    @@r   r  r  	  sd   , 	}BBCv9 "v9 	--LL22a7	
v9 	668v9 2222 1 <V D&!))-}U		/	 	 		 	* .UC 	 6j+N??5#8#8#:5>5F5F$##5+0;;.774CC6 "+!2!23CRV!W!@." 
'*	=		-@		 
>	 6j+N$
 J--D-I 	 MM((002 7}}++I6 	
 V%C%C7$&
 &+.,V_E!''%)$*$>$> %6 '8# %#1	'#O ""k222I5b9	 HHNN 6Dww*,dii1G!8DKK!8!<%fell;#,#88#8/8/D/D &d 0E 0DIIe, (0B0BC~ LH !& B B T T$-v!" !IIe,
 (0@A/5DIIe,#6)%6N (ODK1V[[@GM{{6H  !CD #fkk18>DW8X  !45  ((;;=K-8))j>T>T  + H->-G-G-I H79 H)+GH H H Huv9 v9 v9 v9 v9| OOI&	9MM!!/2	9 &&(	9 ""%)$*$>$>		99}11 + +'9#1!-37*?%5$7(; /+	9 	9 	9 	9 	9{v9 v9 v9 v9 v9@%6 %6tH H H H H H H H2 $ 9 ,,.D89-	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9{v9 v9 v9 v9 v9 v9 v9 v9 v9 v9 v9 v9s  \A[=.)[(H[	2AXXCX/XC[	
YX:'	X%0	X:9	Y	[		[(	[=	\0[	 Z>%Z):&Z Y?"-Y	Z	Z)!	Z>*	[	3	[(<	[=	\X"[	%X.*X:1	Y:Y?Y
[	Y[		Y<&Y77Y<<Y??ZZ	Z)ZZ) 	Z>)Z2.Z>5	[	>[[	
	[([[(	[=([1-[=4	\=\	\\c                   t        | t              syt        |       j                  \  }t        |t        t
        f      ryt        |t        j                  j                  j                        rst        |j                  d      r]t        |j                  j                  j                        dkD  r1t        d |j                  j                  j                  D              ryy)z"True if a FX graph returns a tupleT_schemar]   c              3  L   K   | ]  }t        |j                        d k(    yw)r   N)r  r  )r   rets     r   r   z&graph_returns_tuple.<locals>.<genexpr>  s     OcCHH)Os   "$F)r   rQ   rt   r   r   r8  r   r   r   r   r  r   r   r6  returnsrG  )r   rvs     r   r	  r	    s    b+&O  ER"tUm$2uxx}}))*BIIy)		!!))*Q.ORYY5F5F5N5NOO r   c                   t        |       }|j                  \  }t        j                  |      \  }| j                  j                  |      5  | j                  j                  |       ddd       | j                  j                  |       t        |       sJ  || |      t        j                        dfd       }|S # 1 sw Y   [xY w)z
    Mutate gm so it returns a tuple.  This is only needed for graphs
    not created by torchdynamo that return non-tuples.
    Nc                 <    t        j                   | i |      S r   )r=  tree_unflatten)r   r   r  specs     r   r  z(make_graph_return_tuple.<locals>.wrapper  s     $$[$%A&%A4HHr   )r   r   r   r   r  r   )rt   r   r=  tree_flattenr   inserting_beforer   r  r	  rE  wraps)r   r7  r  r   r:  r  r  r>  s         @@r   r
  r
    s     r?DIIER""2&HB		"	"4	( 
HHr"""R(K__[!I "I N s   CCc                .   | j                   j                  t        j                  j                   j	                         | j                   _        | j                           ||  j                  |       t        j                        dfd       }|S )z
    `torch._dynamo.export` embeds pytrees in the FX graph codegen object,
    convert that to a normal FX graph so inductor can compile it.
    c                 F    j                    j                  |         S r   )process_outputsprocess_inputs)r   r  r  s    r   r  z+handle_dynamo_export_graph.<locals>.wrapper  s'    &&{4JG4J4JD4Q'RSSr   )r   r   r  r   )	r   r  r   r   CodeGenr  rE  rE  rA  )r   r7  r  r  r  r  s       @@r   r  r    sx     hhG..0BHHLLNR!7!7!7!@AK__[!T "T Nr   c                   dd}t        j                  | j                  j                         | j                        D ]  }t        |t              st        |      }|r,t        |      r!|j                         t        j                  k7  rNt        |      }|j                  d      r y  ||j                                 y )Nc                    ddl m} | J t        | j                        }|j	                  |       }t        j                  |j                   d        |d      )Nr   )rY   z9 does not support bfloat16 compilation natively, skippingzBF16 is not supported)torch._dynamo.excrY   r   r  get_device_propertiesr  r  r   )r  rY   device_interfacedevice_propss       r   warn_and_skipz1_check_triton_bf16_support.<locals>.warn_and_skip  s\    /!!!3FKK@'==fE  !!Z[	
 /00r   F)including_emulation)r  torch.device | Noner  r   )	itertoolschaingraph_inputsr?  r  r   rk   rj   rI   	get_dtyper   bfloat16r   is_bf16_supported
get_device)r   rM  r   r  rK  s        r   r  r    s    
1  2 2 9 9 ;U=P=PQ )$'%d++&~~5>>1 4K@--%-Hdoo'()r   )optionsc               B   ddl m}  ||       sJ d       d}d}t        | j                  j                  t
        j                  j                  j                        r| j                  j                  }t
        j                  j                  j                         | j                  _        | j                          |j                  j                  |j                  j                  }|j                  j                  G|j                  j                  }n0t        | d      r| j                  }t        | d      r| j                  }|t!        j"                  |      nd}|t!        j"                  |      nd}	t!        j$                  ||xs i f      \  }
}t'        d |
D              rd	d
lm}m}  ||j.                  d      |
D cg c]&  }t        |d   t
        j0                        r|d   nd( }}|||k7  rt3        d| d|       |||	dni |||	d}||fS c c}w )z
    Flatten the inputs to the graph module and return the flat inputs and options.
    Add "aot_inductor.serialized_in_spec" and "aot_inductor.serialized_out_spec" to the options.
    r]   )r	  zGraph output must be a tuple(). This is so that we can avoid pytree processing of the outputs. Please change the module to have tuple outputs.N_in_spec	_out_spec c              3  V   K   | ]!  }t        |d    t        j                         # yw)r]   N)r   r   r)  r   s     r   r   z'_aoti_flatten_inputs.<locals>.<genexpr>h  s!     
MA:adE../
Ms   ')r   )	UserErrorUserErrorTypezTorchBind objects found in inputs. TorchBind object inputs are not supported in AOTInductor. TorchBind objects can only be attributes.z>Trying to flatten user inputs with exported input tree spec: 
z-
but actually got inputs with tree spec of: 
)zaot_inductor.serialized_in_specz aot_inductor.serialized_out_spec)ru  r	  r   r   r  r   r   r[   rF  r  pytree_infoin_specout_specr  rY  rZ  r=  treespec_dumpstree_flatten_with_pathr  rI  r]  r^  INVALID_INPUTr   r  )r   r   r   rW  r	  r`  ra  r  serialized_in_specserialized_out_specflat_args_with_pathreceived_specr]  r^  r   flat_example_inputss                   r   _aoti_flatten_inputsrj  8  s(    0r" 	" GH"((##UXX^^%B%BC((##!HHNN224
&&2))11G''3**33H 2z"kkG2{#||H;B;N..w7TV+3+?h'R  *0)F)F	v|*& 
M9L
MM>''8
 	
 CV=>
1Q4.!D8  }7Mi <o
 	
 ? 0B0C	



/A0C
  ''1s   +Hc                   t         j                  st         j                  rt        d      ||n	t	               }t        | t              rt        |       st        d      t        d |D              rt        d      t        |       }t        |      xs  t        j                  j                  d      }t        j                  j                   j#                         xs t        j                  j!                  |      }t%        j&                  dt         j(                        5  t+        t,        j.                        5  t1               5  t        j2                  j4                  j7                  t         j8                  j:                  dk(        5  t        j<                  j>                  jA                         5  tC        jD                  |      5  t        j                  jG                  |      5  tI        jJ                         5  tM        jN                  | ||||d	      cd d d        cd d d        cd d d        cd d d        cd d d        cd d d        cd d d        cd d d        S # 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)
NzBautograd_cache_key is not supported with cpp_wrapper or fx_wrapperzDautograd_cache_key does not support graphs that don't return a tuplec              3  R   K   | ]  }t        |t        t        t        f       ! y wr   r  r   s     r   r   z%autograd_cache_key.<locals>.<genexpr>  s     
F!:a$t,-
Fr  z;autograd_cache_key does not support nested container inputsTr  r  r]   )r  r  r  r  )(r   r  r  RuntimeErrorrc   r   rQ   r	  NotImplementedErrorr  r  r#   r   r  r  r   r   r   r&  r  r  rZ   r  r  r   r   r  r   r  r  r/  r?  r!  rw   r  r1  r   r0  r+   autograd_cache_key)r   rc  r  r  r  r  r  s          r   ro  ro    s    V..P
 	

 )4:M:O  %%.A%.H!R
 	
 
F~
FF!I
 	
 9? !0 E4E4E4T4T" 5U 5I 	$$,,. 	3==''	2  	!%6;U;U	

 	}BBC	

 	!"
 	--LL22a7	

 	668
 	
	"
 	o.
 	""$
 ..-)"7+/

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   #K8=K#AK)J9	6J$ J,I:I%	I:%	J.	J$7	J9	 	K		K#	K8%I.*I:1	J:J?J	J$JJ$	J9	$J-)J9	0	K9K>K	K#KK#	K8#K,	(K88L)r   r  r  z.Callable[[Callable[_P, _T]], Callable[_P, _T]])r   r  r   r  r  rn  )r  r   )r  zlist[dict[str, Any]])r   r  r  	list[int])r   rQ   r  rn  )r  zCallable[..., None]r  )r#  rQ   r   rQ   r  rn  )r#  rQ   r   rQ   r@  r   r  rQ   )F)r   rQ   rT  r   r  zGenerator[str, None, None])r   rQ   rc  r  r  rQ   )FN)r   rQ   rT  r   rk  rO  r  rQ   )r   rQ   rv  r   r  rn  )TNN)
r   rQ   r  r   r  zlist[str] | Noner  z&Callable[[torch.fx.Node], bool] | Noner  z"tuple[GraphModule, dict[str, int]])r   rQ   r  r   )rc  r  r  "AbstractContextManager[None, None])r  r   r  r   r  rq  )r   rQ   rc  r  r  r   r  z torch._subclasses.FakeTensorModer   )r  zstr | dict[str, Any] | Noner  zdict[str, Any])r  zGenerator[None, None, None]r  )
r   rQ   rc  r  r  r  rb  r  r  rA   )r   rQ   rc  r  rd  r  r  r  rb  r  r  rA   )r7  r  r  r  r  r  )r   )rB  Callable[..., Any]r  r  r=  r  r>  zlist[str | None]r  r   rv  r   r  ztuple[torch.Tensor, ...]r9  zSequence[PlaceholderInfo]r:  ztuple[int, ...]r  rr  )r   torch.Tensorr  rs  )rO  rs  rP  rs  rQ  rp  r  rn  )rB  rr  r7  zlist[torch.Tensor]r  r  r  ra  )
rv  rQ   rw  r`  ro  r  r  dict[str, Any] | Noner  z'list[str | Weights] | str | GraphModule)r  rQ   r  r  r  rQ   r  r  ro  rr  r  rC   r  r  r  r4   r  z0Callable[[list[object]], Sequence[torch.Tensor]])r  zdict[str, object])r   torch.fx.GraphModuler  zAbstractContextManager[None])r   rQ   r  zSequence[object]r   r  r  ztuple[GraphModule, GraphModule])rB  rQ   r  r  )r  rC   r  z'contextlib.AbstractContextManager[None])r   zGraphModule | GmWrapperr  r  )r   rQ   rc  r  r  r  r  r  r  r  ro  Callable[..., OutputCode]rv  r   r  rA   )
r   rQ   rc  r  r  r  ro  rv  r  rA   )rv  rQ   rw  r  r  rQ   )rv  rQ   rw  r  ro  rv  r  rt  r  z+dict[OpOverload, Callable[..., Any]] | Noner  r   r  r  r  CompileFxOutput)r   rQ   rw  r  r  r  )rv  rQ   rw  r  ro  rv  r  r   r  r  r  r  r  rw  )r   rQ   r7  r  r  rr  r  rr  )r   ri   r  rn  )
r   ru  r   zlist[Any] | tuple[Any, ...]r   rt  rW  rt  r  z tuple[list[Any], dict[str, Any]])r  r   (%  
__future__r   r  r  enumrE  r  rP  r  r   r   r  rF  r  abcr   r   collectionsr   r   dataclassesr   inspectr	   r
   operatorr   typingr   r   r   typing_extensionsr   r   r   r   r   r   unittestr   torch._inductor.async_compiler   torch.fxtorch.utils._pytreer   _pytreer=  functorch.compiler   r   torch._dispatch.pythonr   torch._dynamor   r   r  r   r   torch._dynamo.backendsr   r2  torch._dynamo.device_interfacer   torch._dynamo.repro.after_aotr   r6  r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   torch._functorchr+   r&  7torch._functorch._aot_autograd.subclass_parametrizationr,   torch._functorch.aot_autogradr-   r.   r/   r0   torch._inductor.codecacher1   r2   r3   r  r4   r5   r6   r7   r8   !torch._inductor.custom_graph_passr9   torch._inductor.debugr:   r;   torch._inductor.output_coder<   r=   r>   r?   r@   rA   'torch._inductor.runtime.cache_dir_utilsrB   torch._inductor.utilsrC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   "torch._library.fake_class_registryrM   torch._library.opaque_objectrN   torch._loggingrO   torch._utils_internalrP   rQ   %torch.fx.experimental.symbolic_shapesrR   rS    torch.fx.passes.fake_tensor_proprT   torch.monitorrU   torch.utils._ordered_setrV   _dynamo.excrX   rY   fx._lazy_graph_modulerZ   fx.graphr[   utils._tritonr\   r[  r^   r_   codegen.commonr`   ra   r?  rb   decompositionrc   excrd   fx_passes.joint_graphre   fx_passes.post_gradrf   rg   fx_passes.pre_gradrh   r   ri   irrj   rk   output_coderl   triton_bundlerrm   rn   ro   rp   rq   rr   rs   rt   ru   rv   virtualizedrw   collections.abcrx   ry   rz   r{   
torch._opsr|   )torch.export.pt2_archive._package_weightsr}   r~   r   r   r  r   r   torch._inductor.fb.utils&torch._functorch._aot_autograd.schemasr   r   r   r   r  r   r  rw  Enumr   r   r   r   _fx_compile_configr   r+  r   r.  r   r1  r   r   r   _logginggetArtifactLoggerr  r  r  r>  r  r   r   r   r   	lru_cacher   cacher  r)  rO  rY  r[  rj  rs  r  r  r  r  r  r  contextmanagerr  r  r  r  r  r~  r  r  r  r@  rH  rL  rR  r<  rm  r  r  r  r  r  r  r  r  r  r  r  r  ru  r  r  r  r	  r
  r  r  rj  ro  r   r   r   <module>r     s
   "     	    	 
   # # - !     . . U U  $  $ $ A  ;  ; C =    F  O N  B  >   @ 7 + ?   W ; & / 5 : % & 3 3 U  .  5 B /   ' / )
 
 
  ==:%A$ t_T](((*% L  	$v,%,,!7783>cJWT 
DII    CD ./ $))%// +;; g!00<Hnn66xARS ~~77BTU NN44'  ~~77BTU 
4A,'=  T/ / 	
 	
H/VM	M%M8FMMb 38!!+/!!0NN'N N0 "'(,''' &' 	'T+" ".2CG	E(E(E( ,E( A	E(
 (E(P*('('(.	(	(!%	('	( ).' "& &	@ 37(/((  @y @  '+-
-
'-
 $-
 '	-

 -
` 23 '+ll'l $l -	l
 l 4l^B B# #6W) W@ '+<Y<Y'<Y
 #<Y $<Y -<Y <Y~  $   J (*) +-.0*,))$) 	)
 #) ) ) () ,) () )XU		  
	 (*_M_M_M %_M &	_MJ )9,0	@+@+$@+ &@+ *	@+
 -@+F qh#h+h h 	h
 &h h h %h 6hV4$--"- - %	-`$$,$ $0 0 0;;;H 0@CC'C  C 	C
 /C -C C CT 0@	;;'; /; -	;
 ;|//*=//j 0@,0BF"&*hh(h -h *	h
 @h h $h hV&9J CV&*//(/ -/ 	/ @/ $/ /p CV&*K9K9(K9 -K9 	K9 @K9 $K9 K9\$ # 	4 # 	,)D %)S(
 &*S(S(
%S( "S(
 #S( &S(t 	C
 C
r   