
    9j"9                    T   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ erd d	lmZ d d
lmZ d dlmZ d ad ad ag aded<   g Zded<   d ad aej                   G d d             Z g a!ded<   d a"d a#d a$d a%ded<   d a&d a'd<dZ(e G d d             Z)d=dZ* G d d      Z+i Z,ded<   e G d d             Z-e-j]                  d g d!       e-j]                  d"g d#       e-j]                  d$g d%       e-j]                  d&g d'       e-j]                  d(g d)       d>d*Z/d?d+Z0d@d,Z1dAd-Z2dBd.Z3d?d/Z4dCd0Z5dDd1Z6	 	 	 	 	 	 dEd2Z7	 	 	 	 	 	 	 	 dFd3Z8d<d4Z9dGd5Z:e	dHd6       Z;dId7Z<dJd8Z=e-j]                  d9g d:       	 	 	 	 	 	 	 	 	 	 dKd;Z>y)L    )annotationsN)	dataclass)	lru_cache)TYPE_CHECKING)config)get_benchmark_name)
OrderedSet)Callable)Config)BaseSchedulerNodez#list[tuple[BaseSchedulerNode, int]]nodes_num_elemz%list[tuple[BaseSchedulerNode, float]]node_runtimesc                  &    e Zd ZU ded<   dZded<   y)CppOuterLoopFusedCountintinner_kernel_numberr   local_buffer_numberN)__name__
__module____qualname____annotations__r        W/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/_inductor/metrics.pyr   r   +   s      r   r   zlist[CppOuterLoopFusedCount]!cpp_outer_loop_fused_inner_countsr   num_auto_chunkingc                     da dadat        j	                          t
        j	                          dadat        j	                          da	da
dadadaday )Nr   )generated_kernel_countgenerated_cpp_vec_kernel_countnum_bytes_accessedr   clearr   ir_nodes_pre_fusioncpp_to_dtype_countr   num_comprehensive_padding)num_matches_for_scatter_upon_const_tensornum_loop_reorderingparallel_reduction_countcodegen_mix_order_reductionr   r   r   r   resetr)   A   sj     %&"%++- !01- "#r   c                  N    e Zd ZU dZded<   ded<   ded<   ded<   ded<   ded<   y	)
CachedMetricsDeltasz]
    The subset of metrics we want update across cache hits, e.g., the
    FxGraphCache.
    r   r   r   r"   r#   r    r%   N)r   r   r   __doc__r   r   r   r   r+   r+   _   s-    
  $''/22r   r+   c                 n    t        j                  t              D  cg c]  } | j                   c} S c c} w N)dataclassesfieldsr+   name)fields    r   get_metric_fieldsr3   n   s&    $/$6$67J$KL5EJJLLLs   2c                  2    e Zd ZdZddZddZedd       Zy)	CachedMetricsHelperz
    A helper class to help calculate and apply counter deltas for those
    metrics we want to save with cache entries (e.g., FxGraphCache) and
    apply on a cache hit.
    c                d    i | _         t               D ]  }t               |   | j                   |<    y r.   )cached_metricsr3   globals)selfmetrics     r   __init__zCachedMetricsHelper.__init__y   s3     ') 	<F*1)F*;D'	<r   c                z    i }t               D ]"  }t               |   | j                  |   z
  ||<   $ t        di |S )Nr   )r3   r8   r7   r+   )r9   delta_metricsr:   s      r   
get_deltaszCachedMetricsHelper.get_deltas~   sL    ') 	TF$+If$58K8KF8S$SM&!	T #3]33r   c                `    t               D ]!  }t               |xx   t        | |      z  cc<   # y r.   )r3   r8   getattr)deltar:   s     r   apply_deltasz CachedMetricsHelper.apply_deltas   s-    ') 	8FIf!77	8r   NreturnNone)rD   r+   )rA   r+   rD   rE   )r   r   r   r,   r;   r>   staticmethodrB   r   r   r   r5   r5   r   s%    <
4 8 8r   r5   zdict[str, MetricTable]REGISTERED_METRIC_TABLESc                  b    e Zd ZU ded<   ded<   dZded<   ddZdd	Zdd
ZddZe	dd       Z
y)MetricTablestr
table_name	list[str]column_namesr   r   num_rows_addedc                F   | j                   t               vry  |       }t        | j                        t        |      k(  s(J t        | j                         dt        |              t	        | j                        t	        |j                               k(  s6J t	        | j                         dt	        |j                                       t               }|g| j                  D cg c]  }||   	 c}z   }t        d |D              sJ | j                  |       y c c}w )Nz v.s. c           	   3  \   K   | ]$  }t        |t        t        t        d       f       & y wr.   )
isinstancerJ   floattype).0is     r   	<genexpr>z&MetricTable.add_row.<locals>.<genexpr>   s"     Hq:a#ud4j!9:Hs   *,)	rK   enabled_metric_tableslenrM   r	   keysr   all
_write_row)r9   row_fnrow_dictbncolumn_namerows         r   add_rowzMetricTable.add_row   s   ??"7"9984$$%X6 	
4$$%&fS]O<	
6 $++,
8==?0KK 	
$++,-VJx}}4O3PQ	
K  !dt?P?PQh{+QQHCHHHH Rs   'Dc                "    d| j                    dS )Nmetric_table_z.csv)rK   )r9   s    r   output_filenamezMetricTable.output_filename   s    t/t44r   c                    | j                         }t        |d      5 }t        j                  |d      }|j	                  dg| j
                  z          d d d        y # 1 sw Y   y xY w)Nw
lineterminator
model_name)rd   opencsvwriterwriterowrM   )r9   filenamefdrm   s       r   write_headerzMetricTable.write_header   s^    '')(C  	@BZZ48FOO\NT->->>?	@ 	@ 	@s   7AA&c                   | j                         }| j                  dk(  r/t        j                  j	                  |      s| j                          | xj                  dz  c_        t        |      D ]&  \  }}t        |t              r|d}n|d}n|}|||<   ( t        |d      5 }t        j                  |d      }|j                  |       d d d        y # 1 sw Y   y xY w)Nr      z.6f arg   rh   )rd   rN   ospathexistsrq   	enumeraterQ   rR   rk   rl   rm   rn   )r9   r`   ro   idxorig_valnew_valrp   rm   s           r   r[   zMetricTable._write_row   s    '')!#BGGNN8,Dq &s^ 	MC(E*%cN!"CH	 (C  	!BZZ48FOOC 	! 	! 	!s   $)CCc                .    t        | |      }|t        | <   y r.   )rI   rG   )r1   rM   tables      r   register_tablezMetricTable.register_table   s    D,/). &r   N)r\   z+Callable[[], dict[str, str | float | None]]rD   rE   )rD   rJ   rC   )r`   zlist[str | float | None]rD   rE   )r1   rJ   rM   rL   rD   rE   )r   r   r   r   rN   ra   rd   rq   r[   rF   r   r   r   r   rI   rI      s@    ONC$5@!( / /r   rI   slow_fusion)kernel1_pathkernel1_latencykernel2_pathkernel2_latencyfused_kernel_pathfused_kernel_latencyslow_down_ratiograph_stats)graph_idnum_nodes_before_fusionnum_nodes_after_fusionpersistent_red_perf)
kernel0_pathr   r   kernel3_pathkernel0_latencyr   r   kernel3_latency
size_hintsreduction_hint'fusion_failure_due_to_indexing_mismatch)pre_grad_graph_idpost_grad_graph_id
node1_name
node2_namenode1_debug_strnode2_debug_strcommon_buffer_namesfailure_reasonkernel_metadatakernel_namekernel_pathkernel_categoryr   r   line_of_codenum_load	num_storenum_for_loopnum_atomic_addnum_argsxnumelynumelrnumelkernel_args_num_gbc                    ddl m} ddlm} |j	                  |       } ||      }t        j                  |j                  j                        S )z
    The kernel_module_code is the python module that contains kernel function code.
    kernel function is the proper triton kernel function annotated with
    @triton.jit
    rs   )PyCodeCache)get_triton_kernel)	codecacher   wrapper_benchmarkr   loadinspect	getsourcefn)kernel_module_coder   r   modkernels        r   _parse_kernel_fn_coder     s@     '4


-
.Cs#F VYY\\**r   c                4    t        | j                               S )zJ
    Return the line of code for the kernel excluding the decorators.
    )rX   
splitlines)proper_kernel_fn_codes    r   _parse_kernel_line_of_coder   .  s     $//122r   c                n    |dk(  ry t        j                  d|       }|sJ d       |j                  d      S )Nforeachzsize_hints=(\[[0-9, ]*\]),zsize_hints missing!rs   researchgroup)r   r   ms      r   _parse_size_hintsr   5  s;    )#
		/1CDA###1771:r   c                l    | dvry t        j                  d|      }|sJ d       |j                  d      S )N)	reductionpersistent_reductionz$reduction_hint=ReductionHint\.(\w*),z/reduction_hint not found in kernel source code!rs   r   )r   r   r   s      r   _parse_reduction_hintr   >  s<    CC
		9;MNA???1771:r   c                $    | j                  |      S r.   )count)r   patterns     r   _count_patternr   F  s     &&w//r   c                    | j                         d   }|j                  d      sJ |j                  d      }|j                  d      }||dz   | }|j                  d      }t	        |      S )Nr   def (z):rs   ,)r   
startswithindexsplitrX   )r   def_line	start_idxend_idxdecl_csvcompss         r   _count_argsr   J  sm    $//1!4Hv&&&s#InnT"G	A0HNN3Eu:r   c                .    | j                  d      }| |d S )z
    Skip decorators.
    r   N)r   )kernel_fn_code	start_poss     r   _parse_proper_kernel_fn_coder   T  s!     $$V,I)*%%r   c                n    t        j                  | d|       }|rt        |j                  d            S y )Nz
 = ([\d]+)rs   )r   r   r   r   )r   numel_arg_namer   s      r   _parse_numelr   \  s3    
		^$K02GHA1771:r   c                j    t        j                  d|       }|rt        |j                  d            S 	 y)z
    inductor meta looks like:
        inductor_meta={... 'mutated_arg_names': [], 'no_x_dim': False, 'kernel_num_gb': 2.0},
    z.kernel_num_gb.:\s*([0-9.]+)rs   N)r   r   rR   r   )r   r   r   s      r   _parse_kernel_args_num_gbr   d  s5     			1>BAQWWQZ  	 r   c           
         	 ddl m}  ||      t        |      t        |      	t	        |      t              t              t        d      j                   	fd       y)z
    An utility to log kernel metadata. We may parse metadata from kernel source code here.

    It's fine to parse the generated kernel code here since the logging is
    disabled by default. It would hurt compilation time.
    rs   )"get_kernel_category_by_source_coder   c                      t        d      t        d      t        d      t        d      t              t        d      t        d      t        d      t               dS )	Nztl.loadztl.storezfor ztl.atomic_addr   r   r   r   )r   r   r   r   )r   r   kernel_line_of_coder   r   r   r   r   s   r   <lambda>z%log_kernel_metadata.<locals>.<lambda>  s    &&.$,/&'<iH'(=zJ*+@&I,-BOT#$9:"#8(C"#8(C"#8(C";#
 r   N)	r   r   r   r   r   r   r   get_metric_tablera   )
r   r   r   r   r   r   r   r   r   r   s
   ``  @@@@@@r   log_kernel_metadatar   y  sp     F89KLO*?<NON"#5GJ*+=>N8H 55JK&'//	
 	
r   c                     t         j                         D ]f  \  } }| t               v s|j                         }t        j
                  j                  |      rt	        j                  |       |j                          h y)z
    Purge the old log file at the beginning when the benchmark script runs.
    Should do it in the parent process rather than the child processes running
    each individual model.
    N)	rG   itemsrW   rd   rv   rw   rx   unlinkrq   )r1   r~   ro   s      r   purge_old_log_filesr     sb     0557 !e(**,,.Hww~~h'		(# !r   c                 4    t        t        j                        S r.   )enabled_metric_tables_implr   rW   r   r   r   rW   rW     s    %f&B&BCCr   c                    t               }| j                  d      D ]9  }|j                         }|s|t        v sJ d| d       |j	                  |       ; |S )Nr   zMetric table name z is not registered)r	   r   striprG   add)
config_strenabledr1   s      r   r   r     sl    )|G  % zz|// 	
 &89	
/ 	D Nr   c                    | t               v S r.   )rW   r1   s    r   is_metric_table_enabledr     s    (***r   c                :    | t         v sJ d|  d       t         |    S )NzMetric table z is not defined)rG   r   s    r   r   r     s*    ++R}TF/-RR+#D))r   kernel_autotuner   r   triton_config
latency_msc                L     t        d      j                   fd       y )Nr   c                 $    t               dS )Nr   )rJ   )r   r   r   latencys   r   r   z,log_kernel_autotune_result.<locals>.<lambda>  s    && [!	
 r   )r   ra   )r   r   r   r   s   ````r   log_kernel_autotune_resultr     s     &'//	
r   rC   )rD   rL   )r   rJ   rD   rJ   )r   rJ   rD   r   )r   rJ   r   rJ   rD   
str | None)r   rJ   r   rJ   rD   r   )r   rJ   r   rJ   rD   r   )r   rJ   rD   rJ   )r   rJ   r   rJ   rD   z
int | None)r   rJ   r   rJ   rD   zfloat | None)r   rJ   r   rJ   r   rJ   rD   rE   )rD   OrderedSet[str])r   rJ   rD   r  )r1   rJ   rD   bool)r1   rJ   rD   rI   )
r   rJ   r   rJ   r   r   r   rR   rD   rE   )?
__future__r   rl   r/   r   rv   r   r   	functoolsr   typingr   torch._inductorr   torch._inductor.utilsr   torch.utils._ordered_setr	   collections.abcr
   %torch._inductor.runtime.triton_compatr   torch._inductor.schedulerr   r   r   r    r   r   r   r"   r#   r   r   r$   r%   r&   r   r'   r(   r)   r+   r3   r5   rG   rI   r   r   r   r   r   r   r   r   r   r   r   r   rW   r   r   r   r   r   r   r   <module>r     s}   " 
   	 	 !    " 4 / (<;  !"       8:4 9     ! ! ! CE !#? D ,- )  3    < 3 3 3M8 82 46 0 5 8/ 8/ 8/v         "   -	   6+ 30&*-*))#&)<?)	)X!D 
 
+*
   

#&
06
AF
	
r   