
    9j                     2   U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z	d dlZd dlZd dlZd dlZd dlZd dlmZmZmZ d dlmZmZ d dlmZ d dlZd dlmZmZmZ d dlmZ d dlm Z  d dl!m"Z" d d	l#m$Z$ d d
l%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 ddl8m9Z9m:Z: ddl:m;Z; ddl<m=Z=m>Z>m?Z?m@Z@mAZA ddlBmCZC  ej                  eE      ZFdaGeHeIeJeKf      dz  eLd<   daMeNeLd<   daOeIePeJdz  f   dz  eLd<    e&eEd      ZQ e&eEd      ZReHe   ZS e j                  ddd g      ZUg d!ZVej                  d"eNfd#       ZX	 	 d`d$eHe=   d%eNd&eJdz  d"dfd'ZYd(eHe=   d"ej                  fd)Z[	 	 dad$eSdz  d*eIeJeJf   d+eJdz  d,ePd"df
d-Z\d*eIeJeJf   d"eIeJeUf   fd.Z]d/ej<                  jX                  d(eSd"dfd0Z^ej                  d"ed   fd1       Z`i aaeIeJeIeJeHeJ   f   f   eLd2<   i abeIeJeHeJ   f   eLd3<   dacePdz  eLd4<   i adeIeJeJf   eLd5<   i aeeIeJeHeJ   f   eLd6<   d afePeLd7<   dbd8Zgej                  d"ed   fd9       Zh G d: d;      Zi G d< d=      Zjd$eSd"dfd>Zkd$eSd"dfd?Zld@eHeJdz     d"dfdAZmd$ee=   d"dfdBZndCeeoeepf      d"dfdDZqdbdEZrej                  d"ed   fdF       Zsej                   G dG dH             Zu ej                         ZwdIePdz  dJeIeJef   d"eIeJeIeJeHeJ   f   f   fdKZxdLeIeJef   d"eIeJeIeJef   f   fdMZyd"eIeJef   fdNZzd"eIeJeIeJeHeJ   f   f   fdOZ{	 dcdPee=   e;z  dQeJdReNd"ePdz  fdSZ|dTedUed"dfdVZ}dWeJd"efdXZ~ddYdZed[eJf   d\ej                  j                   d]eIeJef   d^e3dz  d"eJf
d_Zy)d    N)CallableIteratorSequence)AnyIO)patch)
draw_graphget_aot_graph_nameget_graph_being_compiled)fx)save_graph_repro)get_debug_dir)utils)getArtifactLogger)trace_structured)signpost_event)GraphModule)_extract_tensor_metadataTensorMetadata)legalize_graph)FileLike)
OrderedSet)tree_map   )configir)ExternKernel)BaseSchedulerNodeFusedSchedulerNodeNopKernelSchedulerNode
OutputNodeSchedulerNode)VGRAPH_EXECUTION_ORDERFRECORD_GRAPH_EXECUTIONGRAPH_COMPILE_IDSir_pre_fusionir_post_fusionBufMetanamen_origin)dotz-Gnslimit=2z-Gnslimit1=2z-Gmaxiter=5000returnc                  0    t        j                  d      d uS )Nr,   )shutilwhich     U/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/_inductor/debug.pyhas_dotr4   ?   s    <<d**r2   nodesprint_graphfnamec           
         t               st        j                  d       y|
t               }t	        |       }|j
                  D ]  }d|j                  vr|j                  d   j                  }t        |t              r t        |d   t              r|d   f}nj|d   }ndt        |t              rT	 |j                  d   j                  }|j                         d   j                  j                         }|rt        |      nd}d}t        |t"        j$                        r|j&                  j(                  }t+        ||ddddd      }	|	|j                  d<    |rt-        |       t/        i |      }
t1        |
       |
j2                  j5                          t7        |
|dt8        j:                  j<                  	       y# t         $ r d}Y w xY w)
z$
    Draw a graph in fname.svg.
    z*draw_buffers() requires `graphviz` packageNfusion_metar   r   r1   tensor_metaF)
clear_metadot_graph_shape)r4   logwarningr   create_fx_from_snodesr5   metagroup
isinstancetupleintstrsnodeget_outputsnodemaybe_get_size	Exceptionr   ComputedBufferdatadtyper   printr   r   graphlintr	   r   tracer<   )r5   r6   r7   rO   rH   rA   rF   sizerM   metadatagms              r3   draw_buffersrU   D   s    9@A}(*!%(E ,		)		-(..eU#%(C(qas#		-066((*1-22AAC'+d
 dB--.IIOOE!%dD$M#+		- 7,: e	R	B2HHMMO
EeV\\5Q5Q%  s   1AF??GGsnodesc                    dt         dt        dt        f   fd}t        j                  dg d      }i }i }t
        j                  j                         }d}g }d}| D ]  }	|	j                         rd}
|
}ns|	j                         rd	}
|
}n^t        |	t              rd
}
|
}nIt        |	t              rd}
|	j                  }n*t        |	t              rd}
|	j                  }nt        d      t
        j                   j"                  j%                  |	j'                         d      }|
 d| } ||      }i }t)        |	d      rd|	j+                         i}|j-                  |d|      }dt.        t        z  dt0        ffd |	      r|j3                  |       |	j5                         }||_         |||	|
      |j8                  d<   |||<   |	j;                         D ]  }|||j5                         <    ||} | D ]  }	|	j5                         }|	j<                  j>                  }||   }g }|D ]{  }|j6                  |v r||j6                     }nD|jA                  |      5  |jC                  |j6                        }|||j6                  <   ddd       |k(  rk|j3                  |       } tE        |      |_#         |jI                  tK        |      dk(  r|d          |S tE        |             |S # 1 sw Y   ixY w)B
    Creates a FX Graph from a list of SchedulerNode objects.
    r*   r-   .c                 4    dt         dt        fd}| |_        |S )Nargsr-   c                       yNr   r1   )rZ   s    r3   func1z;create_fx_from_snodes.<locals>.get_fake_func.<locals>.func1   s    r2   )r   rD   __name__)r*   r]   s     r3   get_fake_funcz,create_fx_from_snodes.<locals>.get_fake_func   s#    	 	 	 r2   
FusionMeta)rA   rF   typeNexterntemplatenopcomputefusedzUnknown node typeoriginal_atenz: 
get_devicedevicer1   rZ   kwargsrF   c                     t        | t              rt        fd| j                  D              S t        d | j	                         D              S )Nc              3   .   K   | ]  } |        y wNr1   ).0x	in_outputs     r3   	<genexpr>z;create_fx_from_snodes.<locals>.in_output.<locals>.<genexpr>   s     >A9Q<>s   c              3   r   K   | ]/  }|j                   D ]  }t        |j                  t                 1 y wrn   )usersrB   rH   r!   )ro   bufusers      r3   rr   z;create_fx_from_snodes.<locals>.in_output.<locals>.<genexpr>   s<      II  499j11s   57)rB   r   anyrV   rG   )rF   rq   s    r3   rq   z(create_fx_from_snodes.<locals>.in_output   sF    %!34>>>>  ,,.  r2   r9   r   r   )&rE   r   rD   collections
namedtupletorchr   Graph	is_externis_templaterB   r    r"   rA   r   RuntimeError	_inductorr   get_fused_kernel_name	get_nodeshasattrrh   call_functionr   boolappendget_namer*   r@   rG   read_writesreadsinserting_beforeplaceholderrC   rZ   outputlen)rV   r_   r`   buf_to_fx_nodenode_to_fx_noderO   
first_nodeoutputsrA   rF   	node_type
fused_name	func_name	node_funcrk   fx_noder*   ru   depsnew_argsdepdep_noderq   s                         @r3   r?   r?   }   s   
C HS#X$6  ''6PQJNOHHNNEJGE  2!?? IE "IE56IE}-!IKKE12IKKE233__**@@OO

 !kJ<0	!),	5,' 0 0 23F%%ib%H	.1CC 	 	 UNN7#~~&0y&I]# '$$& 	5C-4N3<<>*	5  Je2!j  '~~  &&!$' 		&Cxx>))#((3++J7 8$00:H/7N388,8 7"OOH%		& X#'& 
LLs7|q0EL 7<GnEL8 8s    +K33K<node_name_to_buf_nameparent_buf_name	n_originsc                 X   | y | D ]  }|j                         }|j                         }| t        |      dkD  rt        ||||n|       Et        |      dk(  r|d   |k(  sJ |j                  }||j
                  x|j
                  D ]  }|j                  }	|	|vs||n|||	<     y )Nr   r   )r   r   r   $update_orig_fx_node_name_to_buf_namerH   originsr*   )
r5   r   r   r   rH   buf_namechildren_nodesir_nodeorigin	node_names
             r3   r   r      s     } ==?)%#n*=*A0%+3
  ~&!+q0AT0III))?goo5oo 	FI  55 / 7H_ &i0	%r2   c                     i }| j                         D ]-  \  }}||vrt        |g      ||<   ||   j                  |       / i }| j                         D ]"  \  }}t        ||         }t	        ||      ||<   $ |S rn   )itemsr   addr   r)   )r   buf_name_to_n_noder   r   node_name_to_buf_metan_nodes         r3   get_node_name_to_buf_metar     s     4::< 8	8--+5yk+Bx( x(,,Y78 4::< E	8'12+28V+Di(E ! r2   rT   c                     i }t        ||       |yt        |      }| j                  j                  D ]9  }|j                  |v s|j                  |j                        |j                  d<   ; y)rX   Nbuf_meta)r   r   rO   r5   r*   getr@   )rT   rV   r   r   rH   s        r3   annotate_orig_fx_with_snodesr     sm     -/(1FG$56KL I99--$9$=$=dii$HDIIj!Ir2   c               #     K   t         j                  j                  dd      dk(  } dd l}t	        j
                  |j                  j                  j                        }t        j                         }| s	 d  |j                          y |j                  t        dd             t         j                  j                  t!               d      }t        j"                  |d       t	        j$                  t         j                  j                  |d	t'                d
            }|j)                  t        j*                         |j-                  t	        j.                  d             |j1                  |       	 d  |j3                  |       |j                          y # |j                          w xY w# |j3                  |       |j                          w xY ww)NTORCH_COMPILE_DEBUG01r   z*functorch.compile.config.debug_partitionerTtorchinductor)exist_okaot_z
_debug.log3[%(filename)s:%(lineno)d %(levelname)s] %(message)s)osenvironr   torch._functorch.aot_autogradlogging	getLogger
_functorchaot_autogradr^   
contextlib	ExitStackcloseenter_contextr   pathjoinr   makedirsFileHandlerr
   setLevelDEBUGsetFormatter	Formatter
addHandlerremoveHandler)compile_debugrz   r=   stackr   fhs         r3   enable_aot_loggingr   '  sZ    JJNN#8#>#EM(


E,,99BB
CC  "E	KKM 
JDQR77<<9DKKt$			
%'(
3	

B KKOOOP NN2"3 KKM0 	"s7   A1G4F 8C>G7F2 ;"GF//G2#GG _inductor_post_to_pre_grad_nodes._inductor_triton_kernel_to_post_grad_node_info_pre_grad_graph_id#_inductor_pre_grad_node_stack_trace_inductor_kernel_stack_trace(_inductor_kernel_provenance_debug_handlec                      da y r\   )r   r1   r2   r3   -reset_inductor_kernel_provenance_debug_handler   [  s    /0,r2   c               #      K   t         } t        j                         }t        j                         }t        j                         }t
        j                         }t        }da i ai ai ai ada	 d | a |a|a|a|a|ay# | a |a|a|a|a|aw xY ww)zzContext manager that resets provenance tracking globals upon entering
    and restores their original values when exiting.r   N)r   r   copyr   r   r   r   )original_pre_grad_graph_idoriginal_post_to_pre_grad_nodes-original_triton_kernel_to_post_grad_node_info+original_inductor_pre_grad_node_stack_trace$original_inductor_kernel_stack_trace0original_inductor_kernel_provenance_debug_handles         r3   reset_provenance_globalsr   `  s      "4&F&K&K&M#6;;= 2 	,002 0 ,H+L+L+N(0 5
 ')$572*,'#% /0,
 8+J(9 	7 (L$7 	, = 	1 8+J(9 	7 (L$7 	, = 	1s   A)B,A= 0B=BBc                   ^   e Zd Z ej                         Zedededz  fd       ZddZ	deddfdZ
	 dded	ed
ededee   f
dZej                  	 dded	ed
ededeee      f
d       ZdedefdZddZddZdededdfdZdee   dz  dedz  dedz  ddfdZddZdeded   dz  fdZy)DebugContextfolder_namer-   Nc                 6   t         j                  j                  xs
 t               }t        j
                  D ]`  }t        j                  j                  |d|  d|       }t        j                  j                  |      rIt        j                  |       |c S  y )Nr   .)r   rQ   	debug_dirr   r   _counterr   r   r   existsr   )r   r   ndirnames       r3   create_debug_dirzDebugContext.create_debug_dir  s{    LL**=mo	&& 	Aggll-q$G
 77>>'*G$	 r2   c                 R    d | _         d | _        t        j                         | _        y rn   )_prof_pathr   r   _stack)selfs    r3   __init__zDebugContext.__init__  s     

 **,r2   new_pathc                    | j                   sy |j                  d      sJ |       ddlm} 	  || d      5  t        j
                  j                  |      rt        j                  |       t        j                  | j                   |       d d d        y # 1 sw Y   y xY w# t        $ r$ t        j                  d| j                   |       Y y w xY w)Nz.debugr   )FileLockz.lockz(Failed to copy debug files from %s to %s)r   endswithfilelockr   r   r   r   r/   rmtreecopytreeOSErrorr=   r>   )r   r   r   s      r3   r   zDebugContext.copy  s    zz  *4H4*%	XJe,- 677>>(+MM(+

H56 6 6  	KK:DJJ	s/   B" ABB" BB" B" "*CCfilename
write_moderZ   rk   c                     | j                   sJ t        t        j                  j	                  | j                   |      |g|i |S rn   r   openr   r   r   )r   r   r   rZ   rk   s        r3   fopenzDebugContext.fopen  s:     zzzBGGLLX6
TTTVTTr2   c              /      K   | j                   sJ t        t        j                  j	                  | j                   |      |g|i |5 }| d d d        y # 1 sw Y   y xY wwrn   r   )r   r   r   rZ   rk   fs         r3   fopen_contextzDebugContext.fopen_context  sW      zzz"'',,tzz84jR4R6R 	VWG	 	 	s   AA#	A	A#A A#suffixc                 r    | j                   sJ t        j                  j                  | j                   |      S rn   )r   r   r   r   )r   r  s     r3   r   zDebugContext.filename  s'    zzzww||DJJ//r2   c                    t         j                  j                  dd l}| j                  sJ t
        j                  j                  | j                  t
        j                  j                  | j                         d      }|j                  |d      5 }|j                  | j                  t
        j                  j                  | j                               d d d        t         j                  j                  |       y y # 1 sw Y   *xY w)Nr   z.tar.gzzw:gz)arcname)r   rQ   
upload_tartarfiler   r   r   r   basenamer   r   )r   r
  tar_filetars       r3   r	  zDebugContext.upload_tar  s    <<"".:::ww||

rww//

;<GDH h/ J3

BGG,<,<TZZ,HIJLL##H- /J Js   ADD
c                    t         j                  rjt        j                  d      j                  }j                  t        j                         dt        dd ffd}| j                  j                  ||       | j                  j                  t        j                  |              t         j                  j                  sy | j                  t!                     | _        t         j                  j$                  r | j'                  dt        j                         t         j                  j(                  r!| j'                  dt        j*                         y y )Nztorch._dynamolevelr-   c                 (    j                  |        y rn   )r   )r  r=   s    r3   reset_log_levelz/DebugContext.__enter__.<locals>.reset_log_level  s    U#r2   z	debug.logzinfo.log)r   debugr   r   r  r   r   r   r   callbackr   r#   set_debug_handlerrQ   enabledr   r
   r   	debug_log_setup_log_captureinfo_logINFO)r   
prev_levelr  r=   s      @r3   	__enter__zDebugContext.__enter__  s    <<##O4CJLL'$s $t $ KK  *=!!!"5"5d";<||##**+=+?@
<<!!##K?<<  ##J= !r2   r  c                    t        j                  d      }| j                  j                  | j	                  |            }t        j
                  |      }|j                  |       |j                  t        j                  d             |j                  |       |j                  t        |j                  |             | j                  j                  |j                  |       y )Nztorch._inductorr   )r   r   r   r   r  StreamHandlerr   r   r   r   minr  r  r   )r   r   r  r=   fdchs         r3   r  zDebugContext._setup_log_capture  s    
  12[[&&tzz(';<""2&
E
ST	
 	rSE*+S..3r2   exc_typeexc_valexc_tbc                 .   | j                   r*| j                   j                          | j                          | j                  r9| j	                          t
        j                  dt               | j                         | j                  j                          y )Nz%s debug trace: %s)
r   disable_save_profile_datar   r	  r=   r>   r   r   r   )r   r!  r"  r#  s       r3   __exit__zDebugContext.__exit__  sa     ::JJ ##%::OOKK,.F.H$**Ur2   c                    | j                   sJ | j                   j                  | j                  d             | j                  d      5 }t	        j
                  | j                   |      }|j                          |j                  d       |j                  d       |j                  d       |j                  d       d d d        y # 1 sw Y   y xY w)Nzcompile.profzcompile.stats)streamcumtimed   tottime)	r   
dump_statsr   r  pstatsStats
strip_dirs
sort_statsprint_stats)r   r  statss      r3   r&  zDebugContext._save_profile_data  s    zzz

dmmN;<ZZ( 	#BLLB7EY'c"Y'c"	# 	# 	#s   
A6C		Cr*   ).Nc                 
   t         j                  j                  r0t        t         j                  |      r	 t        t	        |       |      S dt        dt        dd fd}|S # t
        $ r t        j                  dd       Y y w xY w)Nz Ignoring exception in debug codeTexc_inforZ   rk   r-   c                       y rn   r1   rj   s     r3   ignoredz)DebugContext.__getattr__.<locals>.ignored)  s    r2   )	r   rQ   r  getattrDebugFormatterrJ   r=   r>   r   )r   r*   r8  s      r3   __getattr__zDebugContext.__getattr__   sy    <<GFLL$$?~d3T::s c d  N  >Ns   A  BBr-   N)w)r^   
__module____qualname__	itertoolscountr   staticmethodrE   r   r   r   r   r   r  r   contextmanagerr   r  r   r	  r  rD   r  ra   BaseExceptionr'  r&  r   r;  r1   r2   r3   r   r     s   y Hc cDj  -
S T & UU U 	U
 U 
CU  		 	 		
 	 
"S'		 	0s 0s 0
.>.44 4 
	4 }%, % d
	
 
	# (;d(B r2   r   c                      e Zd ZdeddfdZdej                  j                  deej                     ddfdZ
dej                  j                  deej                     ddfdZd	eddfd
Zd	eddfdZed	edefd       Zd	eddfdZdej                  j                  d	eddfdZddededdfdZdedeej,                     dedef   dedededz  ddfdZy)r:  handlerr-   Nc                 x    |j                   | _         |j                  | _        |j                  | _        || _        y rn   )r  r  r   rF  )r   rF  s     r3   r   zDebugFormatter.__init__0  s/    ]]
$22((r2   rT   inputsc           
         | j                  d      5 }d }t        j                  j                  j                  j
                  rRt        j                  j                  j                  |      }t        j                  j                  |j                        }t        j                  j                  j                  j
                  }t        j                  j                  j                  ddd      5  t        |||d||       d d d        d d d        | j                  d      5 }|j                  |j!                  d             d d d        y # 1 sw Y   MxY w# 1 sw Y   QxY w# 1 sw Y   y xY w)Nzfx_graph_runnable.pyF)ztrace.enabledztrace.save_real_tensorsinductor)save_dirstable_hashzfx_graph_readable.pyprint_output)r  rz   r   r   rQ   save_real_tensors_subclasses
fake_utilstry_convert_fake_to_realr   r   r   r*   r   r   writeprint_readable)r   rT   rH  r  rK  rL  s         r3   fx_graphzDebugFormatter.fx_graph6  s+   
 ZZ./ 	2H%%++==**55NNvV77??2773
  //0066HHK''--"'EJ 
 !% +
	, ZZ./ 	<2HHR&&E&:;	< 	<
 
	 	,	< 	<s0   CE/EE""E%E	EE"%E.c                     | j                  d      5 }|j                  |j                  d             d d d        y # 1 sw Y   y xY w)Nzfx_graph_transformed.pyFrM  )r  rS  rT  )r   rT   rH  r  s       r3   fx_graph_transformedz#DebugFormatter.fx_graph_transformedT  sB    
 ZZ12 	<bHHR&&E&:;	< 	< 	<s	   "=Ar5   c                     | j                  d      5 }|j                  | j                  |             d d d        y # 1 sw Y   y xY w)Nzir_pre_fusion.txtr  rS  	_write_irr   r5   r  s      r3   r'   zDebugFormatter.ir_pre_fusion\  s;    ZZ+, 	,HHT^^E*+	, 	, 	,	   !<Ac                     | j                  d      5 }|j                  | j                  |             d d d        y # 1 sw Y   y xY w)Nzir_post_fusion.txtrY  r[  s      r3   r(   zDebugFormatter.ir_post_fusion`  s;    ZZ,- 	,HHT^^E*+	, 	, 	,r\  c                     t        j                         }| D ]2  }|j                  |j                                |j                  d       4 |j	                         S )Nz


)ioStringIOrS  	debug_strgetvalue)r5   ru   rH   s      r3   rZ  zDebugFormatter._write_ird  sI    kkm 	 DIIdnn&'IIh	  ||~r2   c                 <    t        || j                  d             y )Nzgraph_diagram.svg)r7   )rU   r   )r   r5   s     r3   graph_diagramzDebugFormatter.graph_diagraml  s    U$--0C"DEr2   c                     t        ||       t        || j                  d      dt        dt        j
                  j                         y )Nzorig_fx_graph_diagram.svgFT)r7   r;   progparse_stack_tracer<   )r   r	   r   GRAPHVIZ_COMMAND_SCALABLEr   rQ   r<   )r   rT   r5   s      r3   draw_orig_fx_graphz!DebugFormatter.draw_orig_fx_grapho  s<    
 	%R/-- ;<*""LL88	
r2   r   	extensionc                 T    t        j                  || j                  d|              y )Nzoutput_code.)r/   r   r   )r   r   rj  s      r3   output_codezDebugFormatter.output_code~  s     Hdmml9+,FGHr2   r*   input_nodestimingsChoiceCallerelapseprecompile_elapseprescreening_elapsec           	      N   ddl m dt         j                  dt        t        t        f   ffd|t
        j                  j                         t
        j                  j                         |D cg c]
  } |       c}|||d}| j                  ddd	
      5 }	|j                         D ][  \  }
}t        |
j                               }|j                  |       ||d<   t        j                  ||	       |	j                  d       ] 	 d d d        y c c}w # 1 sw Y   y xY w)Nr   )FixedLayoutrH   r-   c           
         t        | d      r| j                  }nd}|t        |       j                  d}	 | j	                         }t        |      r |j                  |j                  t        j                  j                  j                  |j                        t        j                  j                  j                  |j                        t        j                  j                  j                  |j                  d            }t!        |      |d<   nt!        |      |d<   	 t!        | j%                               |d<   	 t!        | j'                               |d	<   	 t!        t        j                  j                  j                  | j)                                     |d
<   	 t!        t        j                  j                  j                  | j+                                     |d<   	 t!        t        j                  j                  j                  | j-                                     |d<   t        | d      r9t        | j.                  t0        j2                        r | j.                        |d<   |S # t"        $ r Y Yw xY w# t"        $ r Y Lw xY w# t"        $ r Y ?w xY w# t"        $ r Y w xY w# t"        $ r Y w xY w# t"        $ r Y w xY w)Nr*    )r*   ra   r   )fallback)rM   rR   strideoffsetlayoutrM   ri   rx  rR   numelrL   )r   r*   ra   r^   get_output_specrB   ri   rM   r#   rO   sizevarsoptimization_hintsrR   rx  optimization_hintry  rE   rJ   	get_dtyperh   
get_strideget_size	get_numelrL   r   IRNode)rH   r   	node_inforz  static_layoutrt  build_node_infos        r3   r  z>DebugFormatter.log_autotuning_results.<locals>.build_node_info  s\   tV$ II		!T
++I--/fk2$/$llWW--@@M ww//BB6==Q ww//AA"MMA  B  %M +.m*<Ih'*-f+Ih'%()9%:	'"&)$//*;&<	(#&)GG$$778IJ'	(#
$'GG$$77H%	&!
%(GG$$66t~~7GH&	'"
 tV$DIIryy)I$3DII$>	&!=            so   C/I2 &J J  AJ" $AJ2 (AK 2	I?>I?	JJ	JJ"	J/.J/2	J>=J>	KK)op_namecuda_device_namecuda_device_countrm  autotuning_timeprecompile_timeprescreening_timezautotuning_result_json_list.txtatzutf-8)encodingbenchmark_result
)r   rt  r  dictrE   rz   cudaget_device_namedevice_countr  r   	info_dictupdatejsondumprS  )r   r*   rm  rn  rp  rq  rr  rH   general_propertiesr  callertimer  rt  r  s                @@r3   log_autotuning_resultsz%DebugFormatter.log_autotuning_results  s    	$6	")) 6	S#X 6	r  %

 : : <!&!8!8!:>IJdOD1J%0!4
 -tg   
 	 '  !1!1!34	  !3404	,-		)R(	 	 K
	 	s   2D
A/DD$)py)r^   r>  r?  r   r   rz   r   r   listTensorrU  rW  SchedulerNodeListr'   r(   rB  rE   rZ  rd  ri  rl  r   r  r  floatr  r1   r2   r3   r:  r:  /  s     <HH  < U\\"< 
	<<<HH  < U\\"< 
	<,#4 , ,,$5 ,$ , * s  F#4 F F
HH  
 !
 
	
IC IC I4 ITT "))_T ne+,	T
 T !T #T\T 
Tr2   r:  c                     t         j                  t        j                        r)t         j	                  dt
        j                  |              t        j                  j                  |        y )NzBEFORE FUSION
%s)
ir_pre_fusion_logisEnabledForr   r  infor:  rZ  r#   r  r'   r5   s    r3   log_ir_pre_fusionr    sB    %%gll32N4L4LU4STGG% r2   c                     t         j                  t        j                        r)t         j	                  dt
        j                  |              t        j                  j                  |        y )NzAFTER FUSION
%s)
ir_post_fusion_logr  r   r  r  r:  rZ  r#   r  r(   r  s    r3   log_ir_post_fusionr    sB    &&w||4 2N4L4LU4STGG5!r2   schedulec                 x     	 t        dd  fd       y # t        $ r t        j                  dd       Y y w xY w)Nartifactc                      dddS )Ninductor_collective_scheduler  r*   r  r1   r1   r2   r3   <lambda>z+_dump_collective_schedule.<locals>.<lambda>  s    6"! r2   c                       S rn   r1   r  s   r3   r  z+_dump_collective_schedule.<locals>.<lambda>  s    x r2   metadata_fn
payload_fnzAFailed to log inductor_collective_schedule via structured loggingTr5  )r   rJ   r=   r  r  s   `r3   _dump_collective_scheduler    sE    
 (	
  
		O 	 	

s     99c           
          | D cg c]6  }t        t        |dd       x}t        j                        rt        |dd       8 }}|rt	        |       y y c c}w )NrH   python_kernel_name)rB   r9  r   _CollectiveKernelr  )r5   rH   opr  s       r3   log_collective_scheduler    s`     GD&$77b9M9MN 	($/H  !(+ s   ;Anode_runtimesc           	      h   	 t         j                  j                  j                  dt        t
           dz  dt        t
           ffd}dt
        dt        dz  fd}g | D ]  \  }}t        |j                  d|j                               }t        j                  |j                        rdnd	}g }	 |j                         D ]  }|j                  }	|	j                         }
t        |	j                   t"        j$                        r|	j'                         nd}|	j)                         }|j+                   ||
       ||       ||      d
        	 j+                  ||||d       	 t/        dd fd       y# t,        $ r Y 7w xY w# t,        $ r t0        j3                  dd       Y yw xY w)zDLog per-op runtime estimates and output tensor metadata for TLParse.rp   Nr-   c                 .    | t         |             S g S rn   )r  )rp   to_optimization_hintss    r3   to_listz,log_runtime_and_tensor_meta.<locals>.to_list	  s    56]4-a01JJr2   rM   c                 D    | y t        |       }|j                  d      }|S )Nztorch.)rE   removeprefix)rM   ss     r3   dtype_to_strz1log_runtime_and_tensor_meta.<locals>.dtype_to_str  s&    }E
Ax(AHr2   r  
collectivere   )shaperx  rM   )r*   ra   estimated_runtime_nsr   r  c                      dddS )N inductor_runtime_and_tensor_metar  r  r1   r1   r2   r3   r  z-log_runtime_and_tensor_meta.<locals>.<lambda>9  s    :"! r2   c                      d iS )Nopsr1   )r  s   r3   r  z-log_runtime_and_tensor_meta.<locals>.<lambda>=  s    s| r2   r  z.Failed to log inductor_runtime_and_tensor_metaTr5  )r#   rO   r}  r~  r   r   r  rE   r9  rH   r   r   is_collectiverG   rI   rB   rz  r   Layoutr  maybe_get_dtyper   rJ   r   r=   r  )r  r  r  r  
runtime_nsr*   op_typer   ru   irnoder  rx  rM   r  r  s                @@r3   log_runtime_and_tensor_metar    s   :S ! 0 0 C C	Kx}t+ 	KS	 	K	 	d
 	 %'* !	MAz166#7FD&+&9&9!&&&AlyG -/G==? C XXF"113E &fmmRYY? ))+! 
 #224ENN%,U^&-fo%1%%8& JJ #,6&	5!	F 	 ,	
  (  S		BT	RSs7   B/F 4BE?,F ?	FF 
FF  F10F1c                      t         sy	 t        dd d        y# t        $ r t        j	                  dd       Y yw xY w)	z:Emit a structured artifact with the graph execution order.Nr  c                      dddS )Ngraph_executionr  r  r1   r1   r2   r3   r  z%log_graph_execution.<locals>.<lambda>J  s    )"! r2   c                      dt         iS )Ngraph_execution_order)r$   r1   r2   r3   r  z%log_graph_execution.<locals>.<lambda>N  s     79NO r2   r  zFailed to log graph_executionTr5  )r$   r   rJ   r=   r  r1   r2   r3   log_graph_executionr  C  sH     
B P	
  B		1D	ABs     ==c               #   l   K   g a i ada	 d t                dada day# t                dada daw xY ww)z5Record graph execution order and log it once on exit.TNF)r$   r&   r%   r  r1   r2   r3   $record_and_log_graph_execution_orderr  T  sU      !!!& $  	!& $ s   4 414c                   6    e Zd ZU eed<   ej                  ed<   y)TensorMetadataHoldertensor_metadatari   N)r^   r>  r?  r   __annotations__rz   ri   r1   r2   r3   r  r  d  s    ##LLr2   r  pre_grad_graph_idpost_to_pre_grad_nodes_jsonc           	         i i d}t        |t              st        j                  d       |S t        | t              s|S t        j                  t              }t        j                  t              }	 dt        t        t        f   dt        fd}|j                         D ]<  \  }}t        |t              st        j                  d       |c S |D ]  } ||      s|c c S |j                  d      | k(  r.||d      j                  |       ||   j                  |d          |j                  d	g       D 	cg c]  }	|	|f }
}	|
sx|
j                         \  } ||      s|c c S |j                  d      | k(  r.||d      j                         |   j                  |d          |
j!                  fd
|j                  d	g       D               |
r
 ? dt        t        t        f   ddfd} ||        ||       ||dS c c}	w # t"        $ rd}t%        dddt        |      t'        j(                         d       t        j                  d|       t        j                  d|        |cY d}~S d}~ww xY w)zx
    Create bidirectional mappings between pre_grad graph nodes
    and post_grad graph code nodes, and vice versa.
    )	preToPost	postToPrezCProvenance tacking error: post_to_pre_grad_nodes_json is not a dictrH   r-   c                     t        | t              st        j                  d       yd| vsd| vsd| vrt        j                  d       yy)NzVProvenance tacking error: node provenance in post_to_pre_grad_nodes_json is not a dictFgraph_idr*   	from_nodezYProvenance tacking error: node provenance in post_to_pre_grad_nodes_json has wrong formatT)rB   r  r=   error)rH   s    r3   check_formatz8create_mapping_pre_post_grad_nodes.<locals>.check_format  sN    dD)		l %t);{RV?V		o r2   zIProvenance tacking error: post_to_pre_grad_nodes_json value is not a listr  r*   r  c              3   &   K   | ]  }|f 
 y wrn   r1   )ro   r   
parent_keys     r3   rr   z5create_mapping_pre_post_grad_nodes.<locals>.<genexpr>  s      !,-J!s   dNc                 J    | D ]  }t        | |         | |<    t        |       } y rn   r  r  r  keys     r3   convert_sets_to_listszAcreate_mapping_pre_post_grad_nodes.<locals>.convert_sets_to_lists  +     &af#&QAr2   rJ  provenance_tracking_error"create_mapping_pre_post_grad_nodesfunction	error_msgstack_tracez post_to_pre_grad_nodes_json:  %szpre_grad_graph_id:  %s)rB   r  r=   r  rD   rx   defaultdictr   rE   r   r   r   r  r   r   popextendrJ   r   	traceback
format_exc)r  r  empty_returnpre_to_postpost_to_prer  	outer_key
node_arrayrH   r   r   current_noder  er  s                 @r3   r  r  m  s    5L
 148		WX'- "-"9"9*"EK"-"9"9*"EKD	tCH~ 	$ 	 &A%F%F%H 	!Izj$/		_ $#" #D)''88J'+<<V-11)<	*..tF|< 26+r1JKA!YKK/4yy{,L*'5++#''
37HH#L$89==jI#J/33L4HILL !1=1A1A+r1R!  	6	T#s(^ 	 	 	k*k*$$
 	
) L0   	'@ V(335	
 			46QR		*,=>sR   3AH H $AH :HH 
 H +A.H 3H H 	J AI;5J ;J triton_kernel_to_post_grad_jsonc           	      F   i i d}t        | t              st        j                  d       |S t	        j
                  t              }	 | j                         D ]I  \  }}t        |t              st        j                  d       |c S |D ]  }||   j                  |        K dt        t        t        f   ddfd} ||       | |dS # t        $ rN}t        dd	d
t        |      t        j                         d       t        j                  d|        |cY d}~S d}~ww xY w)zqCreate bidirectional mappings between triton kernel name and post_grad
    graph code nodes, and vice versa.
    )cppCodeToPostpostToCppCodezGProvenance tacking error: triton_kernel_to_post_grad_json is not a dictzMProvenance tacking error: triton_kernel_to_post_grad_json value is not a listr  r-   Nc                 J    | D ]  }t        | |         | |<    t        |       } y rn   r  r  s     r3   r  zFcreate_node_mapping_kernel_to_post_grad.<locals>.convert_sets_to_lists  r  r2   rJ  r  "create_mapping_kernel_to_post_gradr  z$triton_kernel_to_post_grad_json:  %s)rB   r  r=   r  rx   r  r   r   r  r   rE   r   rJ   r   r  r  )r  r  post_to_cpp_coder  r  	curr_noder  r  s           r3   'create_node_mapping_kernel_to_post_gradr    s:    /L
 5t<		U	
 '2'>'>z'J$%D%J%J%L 	;!Izj$/		c $#' ;	 +//	:;	;	T#s(^ 	 	 	./<-
 	
   	'@ V(335	
 			24S	
 s&   >C	 AC	 		D ADD D c            	         	 i } t         rrt        t              }i t        |} t        j
                  j                  r?t        j                  j                  dd      5 }t        j                  | |       d d d        d| d<   | S # 1 sw Y   xY w# t        $ r8}t        dddt        |      t        j                          d       i cY d }~S d }~ww xY w)	Nz/inductor_provenance_tracking_node_mappings.jsonr=  g       @versionrJ  r  dump_inductor_provenance_infor  )r   r  r   r   r   rQ   r  r#   r  r  r  r  rJ   r   rE   r  r  )node_mappingnode_mapping_kernelr  r  s       r3   r  r  
  s    " (*"I>#2%L ||##WW]]Es 0IIlB/0 #&Y0 0   	'; V(335	
 	s6   AB B4B BB 	C-CCCc            	          	 t         j                  di       } t        t        j	                               t        t
        j	                               z  }i }|D ]p  }t
        j                  |g       }t               }|D ]#  }|j                  | j                  |g              % t        j                  |g       |t        |      d||<   r |S # t        $ r8}t        dddt        |      t        j                         d       i cY d}~S d}~ww xY w)zCreate kernel information JSONr  )stack_tracespost_grad_nodespre_grad_nodesrJ  r  create_kernel_information_jsonr  N)r   r   r   r   keysr   r  r  rJ   r   rE   r  r  )r  all_kernelsresultkernel_namer  r  	post_noder  s           r3   r  r  0  s   %
 7::;K !=!B!B!DE
:??AI
 
 & 	KLPPRO /9lN, F	%%kooi&DEF != @ @b Q#2"&~"6#F;	  
'< V(335	
 	
s   C	C 	D-DDDnode_scheduler  r|   c           	        
 t         j                  j                  dk(  ry	 ddlm}m} t        dz  ag }| dt         }|rt        | t              sJ t        j                  |g       
| j                  r,| j                  j                  }|
vr6
j                  |       n$
j                  
fd| j                  D               t!        | j#                               }nt        | t               sJ t%               }| D ]  }|||fvs
|j&                  t        j                  |g       
|j)                  |j&                  j#                                
j                  
fd|j&                  j                  D                t!        |      }t*        j                  |g       j                  |       t        S # t,        $ r6}	t/        dd	d
t1        |	      t3        j4                         d       Y d}	~	yd}	~	ww xY w)z
    Set the mapping between `kernel_name` and the post_grad nodes in `node_schedule`.

    Returns a unique int debug handler for each call to this function.
    r   Nr   )DisableReductionEnableReduction:c              3   R   K   | ]  }|j                   vr|j                      y wrn   r*   ro   r   curr_node_infos     r3   rr   z:set_kernel_post_grad_provenance_tracing.<locals>.<genexpr>  s)      &{{.8 KK&   $'c              3   R   K   | ]  }|j                   vr|j                      y wrn   r$  r%  s     r3   rr   z:set_kernel_post_grad_provenance_tracing.<locals>.<genexpr>  s+      . !'%{{.@ #KK.r'  rJ  r  'set_kernel_post_grad_provenance_tracingr  )r   rQ   provenance_tracking_levelcodegen.simd_kernel_featuresr   r!  r   rB   r   r   
setdefaultorigin_noder*   r   r  r   r  get_stack_tracesr   rH   r  r   rJ   r   rE   r  r  )r  r  r|   r   r!  r  origin_node_namestack_traces_setrF   r  r&  s             @r3   r)  r)  Z  s    ||--2?S 	1A50"$$Q'O&PQm\:::KVVRN ((#0#<#<#A#A #>9"))*:;%% &"/"7"7& 
   > > @ALmT2220:& 2B CCzz-JUU +R ' )//

0K0K0MN&-- . +0***<*<.     01L$//R@GGU77  	'E V(335	
 s%   C%F9 F9 B$F9 9	G8,G33G8rZ   rk   c                  (   t         j                  j                  t        j                         d      }t         j                  j                  |      st        j                  |       dt        dt        fd}t        || |f      \  }}d}| d| dt        t               d}t        |d	      5 }t        j                  ||f|       d
d
d
       t        j                  t         j"                        rd| d|d}	t%        |	       y
y
# 1 sw Y   BxY w)z
    This function is used to save arguments for a compile_fx_inner function call
    to the file system.  Later on one can replay the compile_fx_inner call
    with the saved arguments using load_args_and_run_compile_fx_inner.
    inductor_saved_argsrp   r-   c                 x    t        | t        j                        rt        t	        |       | j
                        S | S )z
        Pickle FakeTensor will result in error:
        AttributeError: Can't pickle local object 'WeakValueDictionary.__init__.<locals>.remove'

        Convert all Tensor to metadata. This may also makes pickle faster.
        )rB   rz   r  r  r   ri   rp   s    r3   handle_tensorz5save_args_for_compile_fx_inner.<locals>.handle_tensor  s.     a&'(@(CQXXNNHr2   compile_fx_inner/_z.pklwbNz3
Arguments for a compile_fx_inner call is saved to z. To replay the call,
run the following:

from torch._inductor.debug import load_args_and_run_compile_fx_inner
load_args_and_run_compile_fx_inner(z
)
        )r   r   r   tempfile
gettempdirr   mkdirr   r   nextsave_args_cntr   pickler  r=   r  r   r   rN   )
rZ   rk   folderr5  args_to_savekwargs_to_savefn_namer   r  messages
             r3   save_args_for_compile_fx_innerrE    s    WW\\(--/1FGF77>>&!

 
 
 $,MD&>#J L. GXQwiqm!4 5T:D	dD	 7Q\>2A67 &337& 9$ %)8 ,	 	g '7 7s   .DDr   c                    ddl m} t        | d      5 }t        j                  |      \  }}d d d        dt
        dt
        fd}t        j                  j                  d      }|5  t        j                  d	d
      5  t        |f      \  }} ||i |cd d d        cd d d        S # 1 sw Y   ~xY w# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)Nr   )r6  rbrp   r-   c                 
   t        | t              rrt        j                  j                  j                  | j                  j                  | j                  j                  | j                  j                  | j                        S | S rn   )rB   r  rz   _dynamotestingrand_stridedr  r  rx  rM   ri   r4  s    r3   r5  z9load_args_and_run_compile_fx_inner.<locals>.handle_tensor  se    a-.==((55!!''!!((!!''	  Hr2   T)allow_non_fake_inputs	save_argsF)torch._inductor.compile_fxr6  r   r?  loadr   rz   rP  FakeTensorModer   r   r   )r   r6  r  rZ   rk   r5  	fake_modes          r3   "load_args_and_run_compile_fx_innerrR    s    ;	dD	 &Q{{1~f&	 	 	 !!00t0LI	 1FLLe4 1f~>f001 1 1& &1 1 1 1 1s/   B)&C
=B5	C
)B25B>	:C

C)package_pathfunc.exported_programinductor_configsrS  c                   ddl m} ddlm} ddlm} ddlm} |j                  j                  }|j                  d      }	t        |	t        j                  j                        sJ |j                  \  }
}	 |r$|j                  j                   dk(  r ||d	|
       |r|j                  j                   dk(  rt#        j$                  |	      }t#        j$                  |j                        }t#        j$                  |      } |||d   |d   |
      \  }}t'        |      }t        j(                  j)                  ||d      } | |j                  d      |||dd        | |	|
||||      S # |$ r(} ||dd|       t*        j-                  d       |d }~wt.        $ r2}|r)d}|j                  j                   dk(  rd} ||d	||       |d }~ww xY w)Nr   )AccuracyError)dump_to_minify)r   )_aoti_flatten_inputsF)check_guards   aot_inductor)options   r   )strictTaccuracy)rV  rS  load_and_runcheck_accuracy)rV  rS  rb  aot_inductor_accuracyminify)commandr^  zAccuracy failedrun)torch._dynamo.debug_utilsrX  torch._dynamo.repro.aotirY  torch._inductorr   rN  rZ  r]  dump_aoti_minifiermodulerB   rz   r   r   example_inputsrepro_levelr   deepcopyrC   exportr=   r>   rJ   )rT  rU  rV  rS  rX  rY  r   rZ  use_minifierrT   rZ   rk   gm_copyexample_inputs_copyconfig_copyflat_example_inputstuple_inputsflattened_epr  rf  s                       r3   aot_inductor_minifier_wrapperrx    s    87&?&&99L		 	 e	 	4Bb%((..///#22LD&>F//;;q@ (
 F//;;q@ mmB'G"&--0@0O0O"P--(89K/C#A&#A&#	0, !!45L <<..wU.SL###7!,)!) -%%
 	
  #$		
 	%& G""..!3 (	 s%   7C7E/ /G4#FG#-GG)FNr\   r<  )F)rx   r   r   dataclasses	functoolsr_  r@  r  r   r   os.pathr?  r.  r/   r:  r  collections.abcr   r   r   typingr   r   unittest.mockr   rz   functorch.compiler	   r
   r   r   torch._dynamo.repro.after_aotr   torch._dynamo.utilsr   rj  r   torch._loggingr   torch._logging._internalr   torch._utils_internalr   torch.fx.graph_moduler   torch.fx.passes.shape_propr   r   torch.fx.passes.tools_commonr   torch.typesr   torch.utils._ordered_setr   torch.utils._pytreer   rv  r   r   r   	schedulerr   r   r    r!   r"   virtualizedr#   r   r^   r=   r$   r  r  rE   objectr  r%   r   r&   rD   r  r  r  ry   r)   rh  cacher4   rU   r{   r?   r   r   r   rC  r   r   r   r   r   r   r   r   r   r   r:  r  r  r  r  rC   r  r  r  r  	dataclassr  rA  r>  r  r  r  r  r)  rE  rR  rp  ExportedProgramrx  r1   r2   r3   <module>r     s        	    	       8 8    V V  : - ! , 5 0 - O 7   / (     g! 9= tDf-.5 <$  $26 4S4Z(4/ 6%h@ &x1AB I 
 +
 
 VZ,@
AT  + + + 6!"66 :6 
	6r`$'8"9 `bhh `L #'	!t#!S>! 4Z! 	!
 
!H!S>!	#w,!$III 
I" %HTN % %V EG  $sDd3i,@'@"A FGI .S$s)^0D I!% C$J %68 #T#s(^ 857 d3S	>2 701 (# 11
 /
(4. /
 /
dY Yxf fR!. !4 !"/ "D "
S4Z(8 
T 
"	,8,=#> 	,4 	,=Sxc5j8I/J =St =S@B" !htn ! !   
  	!^Tz^!%c3h^ 
#tCcN#
#$^B9%)#s(^9	#tCH~
9x#tCH~ #L'S$sDI~2F-F(G 'Z M-.=MM M 	4Z	M`+# + + +\1S 1S 18 %)Q
38
Qll22Q 38n	Q
 T/Q 	Qr2   