
    9j@                       U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlZd dlZd dlmZ d dlmZ d dlmc mZ d dlmZ d dlmZ d d	lmZ d
dl m!Z!m"Z"m#Z# d
dl$m%Z% d
dl&m'Z'm(Z(m)Z) erd dl*m+Z+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1  ed      Z2 ed      Z3 ejh                  e5      Z6d&dZ7ed'd       Z8e#d(d       Z9	 d)	 	 	 	 	 	 	 	 	 d*dZ:	 	 	 	 d+dZ;e#d,d       Z< G d dejz                        Z>e#	 	 	 	 	 	 d-d       Z?e#d.d       Z@d/dZAej                  j                  ZCeCj                  eCj                  eCj                  eCj                  eCj                  eCj                  eCj                  eCj                  eCj                  eCj                  eCj                  eCj                  eCj                  eCj                  j                  eCj                  j                  eCj                  eCj                  eCj                  eCj                  eCj                  eCj                  eCj                  hZ[ ee[      Z[e#d,d       Z\	 	 	 	 	 	 d0dZ]	 	 	 	 	 	 d1d Z^d a_d!e`d"<   d2d#Za	 	 	 	 	 	 	 	 	 	 	 	 d3d$Zb	 d4	 	 	 	 	 	 	 d5d%Zcy)6    )annotationsN)contextmanager)partial)AnyTYPE_CHECKING)	ParamSpecTypeVar)SymInt)get_decompositions)bind_symbols   )aot_function
aot_modulemake_boxed_compiler)strip_overloads)default_partition
draw_graph#min_cut_rematerialization_partition)Callable	GeneratorSequence)Node)IntLikeType_P_Rc                    | j                   j                  dt        j                  j                  j
                        D ]+  }t        j                  j                  j                  |_        - | j                          | S )Ncall_functionoptarget)	graph
find_nodestorchopsaten_to_copytor    	recompile)fx_gnodes     Z/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/_functorch/compilers.py_canonicalizer,   /   s`    

%%599>>#:#: &  ( iinn''( 	NNK    c               #     K   t         j                  j                  d      } 	 d  t         j                  j                  |        y # t         j                  j                  |        w xY ww)NF)r#   _C_jit_set_autocast_mode)old_jit_autocast_flags    r+   _disable_jit_autocastr2   8   sM      "HH;;EB? 	''(=>''(=>s    A+A  A+!A((A+c                D   t               5  t        |        | j                  j                  dt        j
                  j                  j                        D ]l  }t        |j                        dk(  st        |j                        dk(  s5d|j                  v sDt        j
                  j                  j                  |_        n | j                  j                  D ]X  }i }|j                  j                         D ]0  \  }}t        |t        j                         r|j"                  }|||<   2 ||_
        Z | j                  j%                          | j'                          t        j(                  j+                  |       }t        j,                  j/                  |j                         t        j(                  j1                  |j3                               }t        j(                  j5                  |      }t7        d |D              s ||  ddd       |S # 1 sw Y   S xY w)a  
    Compiles the :attr:`fx_g` with Torchscript compiler.

    .. warning::
        This API is experimental and likely to change.

    Args:
        fx_g(fx.GraphModule): The input Fx graph module to be compiled.

    Returns:
        Torch scripted model.
    r   r   r   dtypec              3  d   K   | ](  }t        |t        j                  j                         * y wN)
isinstancer#   _subclasses
FakeTensor).0ts     r+   	<genexpr>zts_compile.<locals>.<genexpr>n   s#     M1:a!2!2!=!=>Ms   .0N)r2   r   r!   r"   r#   r$   r%   r&   lenargskwargsr'   r    nodesitemsr7   devicetypelintr(   jitscriptr/   _jit_pass_remove_mutationfreezeevaloptimize_for_inferenceany)r)   inpsr*   
new_kwargskvfs          r+   
ts_compilerQ   C   s    
	  JJ))uyy~~'>'> * 
 	0D 499~"s4;;'71'<DKKAW#iinn//		0 JJ$$ 	%DJ))+ "1a.A !
1" %DK	% 	

IIT" 	**1773IIQVVX&II,,Q/MMMtH;< H=< Hs   A&H2HHE1HHc                L    t        | j                         t        | ||       | S )N)
clear_meta)printcoder   )r)   _namerS   s       r+   _draw_graph_compilerX   s   s!     
$))tTj1Kr-   c                6    t        t        t        |             S )NrW   )r   r   rX   rZ   s    r+   draw_graph_compiler[   {   s     w':FGGr-   c                    | S )z
    Returns the :attr:`fx_g` Fx graph module as it is. This is a no-op compiler
    and can be used to check accuracy.

    .. warning::
        This API is experimental and likely to change.

     r)   rV   s     r+   nopr_      s	     Kr-   c                  B     e Zd Zddd	 	 	 	 	 	 	 d fdZd fdZ xZS )DebugInterpreterNTinitial_envenable_io_processingc               Z    t        | j                  g| | _        t        |   |||dS )Nrb   )r   modulesymbol_mappingsuperrun)selfrc   rd   r>   	__class__s       r+   ri   zDebugInterpreter.run   s>     +KK
 

 w{{AU
 	
r-   c                   
 d fdd	fdd
fd
d
fd}t            |      }d|j                  v rt        j                  |j                  d         \  }}t        j                  |      \  }}t        |      t        |      k7  r"t        t        |       dt        |             t        t        t        |            ||      D ]/  \  }}	t        |	t        j                        s" |||	 fd       1 |S )Nc                    t        | t              s| S t        j                  | j                  j
                  j                  j                              }|j                  st        d|       t        |      S )Nzexpected r to be a number, got )r7   r
   sympyexpandr*   exprxreplacerg   	is_numberAssertionErrorint)nirrj   s     r+   subst_symintz/DebugInterpreter.run_node.<locals>.subst_symint   s\    b&)	RWW\\2243F3FGHA;;$'Fqc%JKKq6Mr-   c                ,    t        fd| D              S )Nc              3  .   K   | ]  } |        y wr6   r]   )r:   ru   rw   s     r+   r<   zHDebugInterpreter.run_node.<locals>.subst_symint_tuple.<locals>.<genexpr>   s     8bb)8s   )tuple)nisrw   s    r+   subst_symint_tuplez5DebugInterpreter.run_node.<locals>.subst_symint_tuple   s    8C888r-   c                     | j                               dkD  r`t        | j                        D ]H  } | j                  |            |j                  |      k7  s- | j	                  |            dkD  sH y y)Nr   r   FT)numelrangendimstridesize)abidxrw   s      r+   check_significant_stridesz<DebugInterpreter.run_node.<locals>.check_significant_strides   sg    AGGI&* = %C$QXXc]3qxx}D(59$% r-   c           
     d   t        |      st        dt        |             | j                  |j                  k7  r,t         |        d| j                   d|j                          | j	                               |j	                         k7  rKt         |        d| j	                          d | j	                                d|j	                                 | |      }|sKt         |        d| j                          d | j                                d|j                                y )Nz"expected desc to be callable, got z:  != z aka )callablers   rC   r4   r   r   )nvrvdescsame_stridesr   r|   s       r+   checkz(DebugInterpreter.run_node.<locals>.check   s   D>$'I$t*%VWWxx288#$xr"((4z%JKK!"''),	9$vhb51CBGGI1N0OtTVT[T[T]S^_  5R<L$vhbU3Ebiik3R2SSWXZXaXaXcWde   r-   valr   c                 (    d  dj                    S )Nzoutput z where )rg   )irj   s   r+   <lambda>z+DebugInterpreter.run_node.<locals>.<lambda>   s    s'$:M:M9N&O r-   )ru   r   returnrt   )r{   ztuple[IntLikeType, ...]r   ztuple[int, ...])r   torch.Tensorr   r   r   bool)r   r   r   r   r   zCallable[[], str]r   None)rh   run_nodemetapytreetree_flattenr=   rs   zipr   r7   r#   Tensor)rj   nr   rv   n_vals_n_specr_vals_r_specr   r   r   r   rw   r|   rk   s   `         @@@@r+   r   zDebugInterpreter.run_node   s    		9		 GQAFF?$11!&&-@OFG$11!4OFG 6{c&k)$F}DV%FGG s6{!3VVD Q	2r!"ell3b"OPQ r-   )r>   r   rc   zdict[Node, Any] | Nonerd   r   r   r   )r   r   r   r   )__name__
__module____qualname__ri   r   __classcell__)rk   s   @r+   ra   ra      s@     /3%)	

 ,
 #	

 

5 5r-   ra   c                ,    t        |       j                  S )z
    Returns a (slow) interpreter over the FX graph module that also checks
    various debugging properties (e.g., that tracing strides matched real
    strides.)
    )ra   ri   r^   s     r+   	debug_nopr      s     D!%%%r-   c                    t        |        t        j                  j                  |       }t        j                  j	                  |j                               }|S r6   )r   r#   rE   rF   rH   rI   )r)   rV   rP   s      r+   simple_ts_compiler      s=    D		A		"AHr-   c                "    t        | t              S r6   )r   r   )rP   s    r+   nnc_jitr      s    ,--r-   c                0    t        | j                         | S r6   )rT   rU   r^   s     r+   print_compiler     s    	$))Kr-   c                    t         t         t        t        d}|j                  |       t	        | t
        j                  j                        rt        | fi |S t        | fi |S )a~  
    Wrapper function over :func:`aot_function` and :func:`aot_module` to perform
    memory efficient fusion. It uses the
    :func:`min_cut_rematerialization_partition` partitioner to perform efficient
    recomputation. It uses NVFuser to compile the generated forward and backward
    graphs.

    .. warning::
        This API is experimental and likely to change.

    Args:
        fn (Union[Callable, nn.Module]): A Python function or a ``nn.Module``
            that takes one or more arguments. Must return one or more Tensors.
        **kwargs: Any other overrides you want to make to the settings

    Returns:
        Returns a ``Callable``  or ``nn.Module`` that retains the eager behavior
        of the original :attr:`fn`, but whose forward and backward graphs have
        gone through recomputation optimizations, and the graphs have been
        compiled with nvfuser.

    fw_compilerbw_compilerpartition_fndecompositions)
rQ   r   default_decompositionsupdater7   r#   nnModuler   r   )fnr?   configs      r+   memory_efficient_fusionr     sW    6 "!;0	F MM&"ehhoo&"'''B)&))r-   c                    | j                  d       t        d|D cg c]  }|j                  |j                  f c} d       ddlm}   |       j                         |  t        | |      S c c}w )NfooaQ  
##############################################################
# To minimize FX graph, copy and paste the below and run it  #
##############################################################

import torch
import torch.fx as fx
from functorch.compile import minifier, check_nvfuser_subprocess, check_nvfuser_correctness_subprocess

inps = a?  
inps = [torch.ones(shape, dtype=dtype, device='cuda') for (shape, dtype) in inps]
from foo import FxModule
mod = FxModule().cuda()

with torch.jit.fuser("fuser2"):
  # check_nvfuser_subprocess can be replaced with check_nvfuser_correctness_subprocess
  minifier(fx.symbolic_trace(mod), inps, check_nvfuser_subprocess)
r   )FxModule)	to_folderrT   shaper4   r   r   cudarQ   )r)   rL   r   r   s       r+   debug_compiler   9  sq     	NN5		 &**!''177	*+ ,	* HJOOtdD!! 	+s   A/
rt   graph_indexc                J   g }t        | d      5 }t        j                  |      }g }|D ]  }t        |      dk(  r|} |t	        j                               }n|\  }}}}	}
|	t
        j                  t
        j                  t
        j                  t
        j                  t
        j                  t
        j                  t        t        hv rt        j                  dd||	|
      }nt        j                  ||	|
      }|j                  |        	 ddd       |S # 1 sw Y   |S xY w)zZ
    Return a random input for the given inputs meta generated from _save_fx_default.
    rbr   r   )r4   rB   N)openpickleloadr=   randomr#   rt   int32int64r   uint8floatrandintrandappend)input_data_pathinputsrP   inputs_metar   rC   input_r   _strider4   rB   s              r+   
get_inputsr   \  s     "$F	ot	$ "kk!n 	"D4yA~fmmo.6:3eWeVIIKKKKJJIIKK	 	 #]]1aeFSF"ZZU6JFMM&!'	"". M/". Ms   C>DD"c                    	
 ddl m} d		fd		 	 	 	 	 	 	 	 d
 	fd
	 	 	 	 	 	 d
fd}	 	 	 	 	 	 d
fd}	 	 	 	 	 	 d
fd} ||||||t              S )aO  
    The forward, backward, and joint computation graph will be stored in
    {folder_name}/{current_name}/{current_name}_forward_{graph_index},
    {folder_name}/{current_name}/{current_name}_backward_{graph_index}, and
    {folder_name}/{current_name}/{current_name}_joint_{graph_index} respectively.
    The input shape of the graphs will be stored in the .input files.
    These files can be loaded with pickle,
    and is a list of format (type, shape, stride, dtype, device).
    In the case of type = int or float, it is just (type,).
    For joint graph input, it is a nested list [[],[]]
    where the two inner lists have the same format.
    If dump_example_input is True, example_inputs will be stored in .pt file.
    Since each function might produce multiple graphs,
    the graph_index is used to distinguish difference graphs
    r   )aot_module_simplifiedc                   g }t        |       dkD  r1t        | d   t              r| | d         z  }| | d         z  }|S | D ]  }t        |      t        u st        |      t
        u r|j                  t        |      f       A|j                  t        |      |j                  |j                         |j                  |j                  f        |S )Nr   r   )r=   r7   rz   rC   rt   r   r   r   r   r4   rB   )r>   
input_metaargget_input_metas      r+   r   z(_save_fx_default.<locals>.get_input_meta  s    
t9q=ZQ7.a11J.a11J 	CCyC49#5!!49,/!!#Y		3::<CJJO		 r-   c                   t        | j                  j                        dk(  r,t        j                  t        j
                  d|t               y t        j                  |       }|j                  j                  t        j                  j                  j                                |j                           	|      }t        j                   d d       |j!                   d d d| dt         	       t#         d d d| dt         d d| dt         dd      5 }t%        j&                  ||       d d d        r7t        j(                  | d d d| dt         d d| dt         d	       y y # 1 sw Y   CxY w)
Nr   z!No nodes in graph {%s}_{%s}_{%s}./T)exist_okrV   z.inputwbz.pt)r=   r!   r@   logloggingWARNINGr   copydeepcopyset_codegenr#   fxCodeGenr(   osmakedirsr   r   r   dumpsave)

gm_to_saver>   	type_namegmr   rP   current_namedump_example_inputfolder_namer   s
         r+   graph_saver_helperz,_save_fx_default.<locals>.graph_saver_helper  s    z%%&!+GG3 ]]:&
UXX^^3356
#D)

{m1\N3dC
m1\N!L>9+Q{mT	
 m1\N!L>9+Q{mSTUaTbbcdmcnnop{o|  }C  D
 	' KK
A&		'
 JJ-qa~Qyk;-WXYeXffghqgrrst  tA  AD  E 	' 	's   )FFc                     | |d       | S )Nforwardr]   r   example_inputsr   s     r+   graph_saver_forwardz-_save_fx_default.<locals>.graph_saver_forward  s     	2~y9	r-   c                .     | |d       t         dz  a | S )Nbackwardr   )r   r   s     r+   graph_saver_backwardz._save_fx_default.<locals>.graph_saver_backward  s      	2~z:q	r-   c                0     | |d       t        | |      S )Njoint)r   )r   
joint_argsr   s     r+   graph_saver_jointz+_save_fx_default.<locals>.graph_saver_joint  s     	2z73 Z00r-   r   )r>   r   r   z	list[Any])r   fx.GraphModuler>   r   r   strr   r   )r   r   r   list[torch.Tensor]r   r   )r   r   r   r  r   z%tuple[fx.GraphModule, fx.GraphModule])functorch.compiler   r   )r   r   r   r   r   r   r   r   r   r   r   s   ```      @@r+   _save_fx_defaultr  {  s    , 8!"!*-!:=!	! !F,>	,>	11(:1	.1 !
'(&- r-   c                *    da t        t        | ||      S )as  
    Dump the forward, backward, and joint computation graph.
    Example Usage:
    save_fx_func = graph_dumper_aot(current_name, folder_name, dump_example_input = False)
    optimize_ctx = torchdynamo.optimize(
        save_fx_func
    )
    with torch.enable_grad():
        with optimize_ctx:
            result = forward_and_backward_pass(model, example_inputs)
    r   )r   r   r  )r   r   r   s      r+   graph_dumper_aotr    s     K#\;@RSSr-   )r)   r   r   r   )r   zGenerator[None, None, None])r)   r   rL   zSequence[Any]r   torch.jit.ScriptModule)T)
r)   r   rV   r   rW   r   rS   r   r   r   )rW   r   r   z5Callable[[fx.GraphModule, list[Any]], fx.GraphModule])r)   r   rV   r   r   r   )r)   r   rV   r   r   zDCallable[[DebugInterpreter, Any, dict[Node, Any] | None, bool], Any])r)   r   rV   r   r   r  )rP   Callable[..., Any]r   r  )r   Callable[_P, _R] | nn.Moduler?   r   r   r  )r)   r   rL   zSequence[torch.Tensor]r   r  )r   r   r   r  )r   r   r   r   r   r   r   ztorch.fx.GraphModuler   r  r   z	nn.Module)F)r   r   r   r   r   r   r   z Callable[[bool, nn.Module], Any])d
__future__r   r   r   r   r   r   
contextlibr   	functoolsr   typingr   r   typing_extensionsr   r	   rn   r#   torch.fxr   torch.nnr   torch.utils._pytreeutils_pytreer   r
   torch._decompr   %torch.fx.experimental.symbolic_shapesr   aot_autogradr   r   r   compile_utilsr   partitionersr   r   r   collections.abcr   r   r   torch.fx.noder   torch.typesr   r   r   	getLoggerr   r   r,   r2   rQ   rX   r[   r_   Interpreterra   r   r   r   r$   r%   detachgelu_backwardleaky_relu_backwardsigmoid_backwardthreshold_backwardhardtanh_backwardhardsigmoid_backwardhardswish_backwardtanh_backwardsilu_backwardelu_backwardcudnn_batch_normcudnn_batch_norm_backwardmasked_fillScalarr   elu
leaky_reluhardtanh	hardswishhardsigmoidconj_physicalis_same_sizer   r   r   r   r   __annotations__r   r  r  r]   r-   r+   <module>r4     s4   "   	   %  % 0     $ $  , > G G *  =="' t_T]g!
 ? ? , ,` AE
 (+9=H
H:H 	 	Er~~ EP &
& &I& &  . yy~~KK""HHOOMMNN- 4 ,,BC   
$*$$*$* "$*N"
" 6""@ S >fff f 		f
 'f fV EJTT$'T=AT%Tr-   