
    9j                    F   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlmZ d dlZd dlmZmZ e
rd dlmZmZ  ed      Zdd	a	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd
ZddZddZddZddZddZg aded<   ddZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZy)    )annotationsN)AbstractContextManager)AnyTYPE_CHECKING)TypeVar)profileProfilerActivity)CallableSequence_Rc                      y )N r       `/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/_functorch/benchmark_utils.pysynchronizer      s    r   c	                   |dg}|dgk7  r8t         j                  j                         rt         j                  j                  a|i }|i }|5  t        j                  d       t        d      D ]  }	 | |fi | t                 t        j                  d       t        j                         }
t        |      D ]  }	 | |fi | t                 t        j                         }ddd       
z
  }t        dd|i|5 }|5  t                t        j                  d       t        |      D ]  }	 | |fi | t                 	 ddd       ddd       j                  |       |S # 1 sw Y   xY w# 1 sw Y   0xY w# 1 sw Y   4xY w)a1  
    Output the chrome trace of running f(input_, **kwargs_for_f) with [optimize_ctx]
    [num_runs] times to [trace_filename].

    [activities] are the activities that the profiler will record, e.g. ProfilerActivity.CUDA.
    Return total runtime without the profiler

    Outputs to trace_filename
    Ncudacpui9     
activitiesr   )
torchr   is_availabler   manual_seedrangetimeperf_counterr   export_chrome_trace)finput_trace_filenameoptimize_ctxr   num_runsdeviceskwargs_for_fkwargs_for_profiler_t0t1timingprofs                 r   dump_chrome_tracer+      s   * ( 5'ejj557jj,," 	 
!$q 	Af%%M	 	$ x 	Af%%M	  
! "WF		>J	>*=	> $ 	Md#8_ &)L)	 	^,M-
! 
!	 	 s2   BE+FAE7F+E47F 	<FFc                z    t        |       5 }t        j                  |      }d d d        d   }|S # 1 sw Y   xY w)NtraceEvents)openjsonload)filenamer   dataeventss       r   get_chrome_trace_eventsr4   U   s<    	h 1yy|- FM s   1:c                D    d| v xr | d   t         v xr d| v xr | d   dk(  S )NpidphX)gpu_pidsevents    r   is_gpu_compute_eventr<   \   s@     	 	%LH$	EM	 $K3	r   c                    g }| D ]  }t        |      s|j                  |       ! t        |t        j                  d            S )Nts)key)r<   appendsortedoperator
itemgetter)r3   sorted_gpu_eventsr;   s      r   get_sorted_gpu_eventsrE   f   sK    .0 (#E*  '( #)<)<T)BCCr   c                    t        |       dk(  ry| d   }|d   |d   z   }|d   }| dd  D ]:  }t        |d   |      }|d   |d   z   }|t        ||z
  d      z   }t        ||      }< |S )Nr   r>   dur   )lenmax)rD   r;   current_end_timetotal_duration
start_timeend_times         r   get_durationrO   o   s    
"a ET{U5\15\N"12& ;t&67
;u-'#h.CQ*GG/:	;
 r   c                j    dd}t        |       }g }|D ]  } ||      s|j                  |        |S )Nc                R    d| v xr" d| d   v xs d| d   v xs d| d   v xs d| d   v S )Nnamegemmconvcutlasswgradr   r:   s    r   is_mm_conv_eventz7get_sorted_gpu_mm_conv_events.<locals>.is_mm_conv_event~   sT     
eFm# (v&(E&M)( %-'		
r   r;   zdict[str, Any]returnbool)rE   r@   )r3   rW   
gpu_eventssorted_eventsr;   s        r   get_sorted_gpu_mm_conv_eventsr]   }   sH    
 'v.J*,M $&U#$ r   z	list[Any]r9   c                   t        |       }g a|D ]3  }d|vr|d   dk(  sd|d   d   v st        j                  |d          5 |dz  }t        |      }t	        |      |z  }t        |      }t	        |      |z  }||fS )a  
    Process the chrome traces outputs by the pytorch profiler to compute GPU Utilization
    and percent of times spent on matmul and convolution

    Args:
        filename(str): Name of chrome traces file produced by pytorch profiler

        total_length(float): total length of the process without profiler in second

    Return:
        tuple: (GPU Utilization, percent of time spent on matmul and convolution)
    rR   process_labelsGPUargslabelsr6   g    .A)r4   r9   r@   rE   rO   r]   )r1   total_lengthr3   r;   rD   utilizationsorted_gpu_mm_conv_eventsmm_conv_utilizations           r   compute_utilizationrg      s     %X.F H *=,,%-:Q1QOOE%L)	*  #%L-f501L@K =f E&'@ALP+++r   c           	     f   t         j                  j                  |      }|s#t        j                  |       t	        d|z          |t        j                         }t         j                  j                  ||dz         }t        | |||t        j                  g|dg      }t        ||      \  }	}
|	|
fS )a  
    Benchmark the GPU Utilization and percent of time spent on matmul and convolution operations of
    running f(input_, **kwargs_for_f) with [optimize_ctx] [num_runs] times.
    It will produce a chrome trace file in trace_folder/trace_file_name.json

    Example:

    ```
    def f(a):
        return a.sum()


    a = torch.rand(2**20, device="cuda")
    utilization, mm_conv_utilization = benchmark_utilization(
        f, a, "tmp", trace_file_name="tmp_chrome_trace"
    )
    ```

    Args:
        f: function to benchmark

        input_: input to :attr:`f`

        trace_folder: name of the folder to store the chrome trace

        optimize_ctx: the context in which f will run

        trace_file_name: name of the dumped chrome trace file, default to "tmp_chrome_trace"

        num_runs: number of times to run f, excluding the warm-up runs, default to 1.

    Return:
        tuple: (GPU Utilization, percent of time spent on matmul and convolution)

    zcreate folder z.jsonr   )r"   r#   )ospathexistsmakedirsprint
contextlibnullcontextjoinr+   r	   CUDArg   )r   r   trace_folderr!   trace_file_namer"   isExistchrome_trace_file_namerc   rd   rf   s              r   benchmark_utilizationrv      s    V ggnn\*G
L!-.!--/WW\\,'8QR$				L (;($K$ +++r   )rY   None)rH   NNN)r   Callable[[tuple[Any, ...]], _R]r   tuple[Any, ...]r    strr!   zAbstractContextManager[Any]r   zSequence[ProfilerActivity]r"   intr#   zlist[str] | Noner$   dict[str, Any] | Noner%   r|   rY   float)r1   rz   rY   list[dict[str, Any]]rX   )r3   r~   rY   r~   )rD   r~   rY   r{   )r1   rz   rc   r}   rY   tuple[float, float])Ntmp_chrome_tracerH   )r   rx   r   ry   rr   rz   r!   z"AbstractContextManager[Any] | Noners   rz   r"   r{   rY   r   ) 
__future__r   rn   r/   rB   ri   r   r   typingr   r   typing_extensionsr   r   torch.profilerr   r	   collections.abcr
   r   r   r   r+   r4   r<   rE   rO   r]   r9   __annotations__rg   rv   r   r   r   <module>r      s8   "    	  - % %  4 2 T]	  $*.157&77 7 .	7
 +7 7 7 (7 /7 7tD$ ) ,L 8<-A,&A,A, A, 5	A,
 A, A, A,r   