
    9j                        d dl Z d dlmZmZ d dlmZmZ d dlmZ ddl	m
Z
 e j                  j                  Z ee j                  e j                  e j                  e j                   g      Z eg ej$                  ej&                  ej(                  ej*                  ej,                  ej.                  ej0                  ej2                  ej4                  ej6                  ej8                  ej:                  ej<                  ej>                  ej@                  ejB                  ejD                  ejF                  ejH                  ejJ                  ejL                  ejN                  ejP                  ejR                  ejT                  ejV                  ejX                  ejZ                  ej\                  ej^                  ej`                        Z1 eejd                  ejf                  ejh                  ejj                  ejl                  ejn                  ejp                  ejr                  ejt                  g	      Z;e1e;z  Z<de=fdZ>d	e j~                  de@fd
ZAde=fdZBy)    N)get_device_tflopsget_gpu_dram_gbps)optimization_hintstatically_known_true)
OrderedSet   )flop_registryreturnc                     | t         v rft        |      dk7  rt        d| d|        |j                         }t	        |      dz  }d}||z  }t         |    }	 |	|i |d|idz  }
|
|z  dz  }|S y	)
aw  
    Estimates the compute time of an aten operator.

    Args:
        func_packet: The operator overload packet.
        args: The arguments to the operator.
        kwargs: The keyword arguments to the operator.
        out: The output of the operator.
        out_dtypes: The output data types.

    Returns:
        float: The estimated compute time in nanoseconds.
    r   z"Only support single out dtype got z for g  4&kCg      ?out_val   g    eAg        )r	   lenAssertionErrorpopr   )func_packetargskwargsout
out_dtypesdtypepeak_gpu_flopsfactorpeak_empirical_flopsflop_count_func
flop_countcompute_times               _/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/utils/_runtime_estimation.pyget_compute_timer   M   s     m#z?a 4ZLk]S   *51D8%6'4$dBfBcBQF
"%99S@    tc                     d}t        | j                  | j                               D ]$  \  }}t        |dk(        r|t	        |d      z  }& || j                         z  S )z
    Calculates the memory consumption of a tensor.

    Args:
        t (torch.Tensor): The input tensor.

    Returns:
        int: The memory consumption of the tensor in bytes.
    r   r   )fallback)zipshapestrider   r   element_size)r    
real_numelsizer%   s       r   get_num_bytesr)   o   s_     JAGGQXXZ0 >f$Vq[1+D1==J>
 (((r   c                 v    t               }t        d | D              }t        d |D              }||z   }||z  }|S )aG  
    Estimates the memory transfer time of input and output tensors.

    Args:
        flat_args_kwargs (List[torch.Tensor]): The flat list of arguments and keyword arguments.
        flat_outs (List[torch.Tensor]): The flat list of outputs.

    Returns:
        float: The estimated memory transfer time in nanoseconds.
    c              3   h   K   | ]*  }t        |t        j                        st        |       , y wN
isinstancetorchTensorr)   .0r    s     r   	<genexpr>z$get_transfer_time.<locals>.<genexpr>   s'      jELL6Qa   22c              3   h   K   | ]*  }t        |t        j                        st        |       , y wr,   r-   r1   s     r   r3   z$get_transfer_time.<locals>.<genexpr>   s'      z!U\\/Jar4   )r   sum)flat_args_kwargs	flat_outsgpu_memory_bandwidth
read_byteswrite_bytescounted_bytestransfer_times          r   get_transfer_timer>      sZ     -. "2 J  "+ K ,M!$88Mr   )Cr/   torch._inductor.utilsr   r   %torch.fx.experimental.symbolic_shapesr   r   torch.utils._ordered_setr   flop_counterr	   opsatenfloat16bfloat16float32float64_FLOAT_TYPES
lift_freshr    	transposeviewdetach_unsafe_viewsplitadjoint
as_strideddiagonalexpand	expand_asmovedimpermuteselectsqueezemTmHrealimagview_as	unflattenunfoldunbind	unsqueezevsplithsplitsplit_with_sizesswapaxesswapdimschunk	_VIEW_OPSrandintrandnrand
randn_like	rand_likerandint_likearange	ones_like
zeros_like_CREATE_OPS_IGNORE_OPSfloatr   r0   intr)   r>    r   r   <module>rw      s    F 0 ' yy~~	     	  				 
 	  	  	

  	  	  	  	  	  	  	  	   	! " 	# $ 	% & 			' ( 			) * 	+ , 	- . 	/ 0 	1 2 	3 4 	5 6 	7 8 	9 : 	; < 	= > 	

? "	H 

		
 +%E D)U\\ )c )&e r   