
    9jFZ                        d dl Z d dlZd dlmZmZ d dlmZmZmZm	Z	 d dl
mZmZ d dlmZ g dZdej                   deej                   gej                   dz  f   fdZdej                   d	eej                   eeef   f   d
edeej                   gej                   dz  f   fdZdej                   deeej                         deej                      deeef   d	eej                   eeef   f   f
dZdefdZ G d de      Zdej                   dededeeej                         def
dZ G d de      Z G d de      Zdej                   dedededef
dZdej                   dededeeej                         def
dZ dej                   de	e   defd Z!dej                   dededefd!Z" ed"      ddfdej                   deded#ed$eeej                         dz  d%eeej                         dz  defd&Z#ejH                  ejJ                  he#_&        ejN                  he#_(        e jR                  d'ed(eded)   fd*       Z*dej                   d+edej                   fd,Z+dej                   d'edej                   fd-Z,	 d5dej                   d.ed'edeej                      d/eejZ                     d0ed1ede.ej                   ef   fd2Z/ G d3 d4      Z0y)6    N)ABCabstractmethod)Callable	GeneratorIterableSequence)Anycast)always_wrap_policylambda_auto_wrap_policytransformer_auto_wrap_policysize_based_auto_wrap_policyenable_wrapwrapCustomPolicyModuleWrapPolicyroot_modulefnc                     | hdt         j                  dt        dt         j                  dz  ffd | dd       y)aQ  
    This applies ``fn`` to every module in the module tree of ``root_module``
    following a post-order traversal. If ``fn`` returns an :class:`nn.Module`,
    then this replaces the original module with the newly returned one in the
    tree. Otherwise, ``fn`` should return ``None``, in which case the module is
    not changed.
    modulemodule_nameparent_moduleNc                 l   | j                         D ]%  \  }}|vsj                  |        |||        '  |       }|qt        |t        j                        st        d| d|        |st        d|        t        |t        j                        st        d|       t        |||       y y )Nz=Non-root modules should have their parent module set but got z for zTNon-root modules should have their module name set but got an empty module name for z.fn should return None or an nn.Module but got )named_childrenadd
isinstancennModuleAssertionErrorsetattr)	r   r   r   child_module_namechild_moduleoptional_module_post_order_apply_innerr   visited_moduless	         [/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/distributed/fsdp/wrap.pyr$   z2_post_order_apply.<locals>._post_order_apply_inner-   s    
 06/D/D/F 	Q+|?2##L1'6GP	Q V*&mRYY7$S$oU6(4  $006x9  oryy9$D_DUV  M;@ '     )r   r   str)r   r   r$   r%   s    `@@r&   _post_order_applyr*      sJ     (3mOA		AA yy4'A6 KT2r'   target_module_to_kwargsfsdp_fnreturnc                 b     dt         j                  dt         j                  dz  f fd}|S )z
    This constructs the "wrap" function to pass to :func:`_post_order_apply`
    based on ``target_module_to_kwargs``, which should be constructed from the
    wrapping policy.
    r   r-   Nc                 2    | v r| ur|    } | fi |S y N )r   kwargsr,   r   r+   s     r&   r   z_construct_wrap_fn.<locals>.fnV   s2     ,,{1J,V4F6,V,,r'   )r   r   )r   r+   r,   r   s   ``` r&   _construct_wrap_fnr3   K   s*    299 T!1  Ir'   module_classesignored_modulesroot_kwargsc                     t        t        |            }| j                         D ]%  }||v rt        ||      s||vr|||<   d ||   d<   ' |S )Nmixed_precision)tuplesetmodulesr   )r   r4   r5   r6   r+   module_classes_tupler   s          r&   $_run_mixed_precision_override_policyr=   a   sq     !^!45%%' F_$ 45442='/AE#F+,=>F #"r'   c                       y)z
    A simple recursive wrap policy that always returns ``True``. This means
    that every submodule is wrapped by the wrapper class in
    :func:`_recursive_wrap`.
    Tr1   )argsr2   s     r&   r   r   u   s     r'   c                       e Zd ZdZedej                  deej                     dee	e
f   deej                  ee	e
f   f   fd       Zy)_Policyzk
    This defines an abstract base class that represents a policy for applying
    a module-level API.
    r   r5   r6   r-   c                      y)z
        This should return a dict ``target_module_to_kwargs`` that maps from
        each target module to wrap to its kwargs.
        Nr1   )selfr   r5   r6   s       r&   _run_policyz_Policy._run_policy   s     	r'   N)__name__
__module____qualname____doc__r   r   r   r:   dictr)   r	   rD   r1   r'   r&   rA   rA   ~   sk    
 
YY
 RYY
 #s(^	

 
biic3h'	(
 
r'   rA   r   recursenonwrapped_numelc                 2    |ryt        | t        |            S )a   
    This auto wrap policy wraps every module that is an instance of any type in
    ``module_classes`` as its own FSDP instance. The root module given by
    ``module`` is always wrapped as an FSDP instance regardless. Since the
    wrapping proceeds bottom up, each FSDP instance manages the parameters in
    its subtree excluding any already managed by a child FSDP instance.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.
        module_classes (Set[Type[nn.Module]]): Set of module classes that are
            wrapped as FSDP instances.

    Returns:
        ``True`` if ``recurse=True``, and whether ``module`` should be wrapped
        if ``recurse=False``.
    Tr   r9   )r   rJ   rK   r4   s       r&   _module_wrap_policyrN      s    6 feN344r'   c                        e Zd ZdZdeeej                        fdZdej                  de	ej                     de
eef   de
ej                  e
eef   f   fdZd	 Zdef fd
Z xZS )r   z{
    This policy applies to every module of the specified module classes,
    passing in the kwargs given to the root.
    r4   c                 H    t        |      }|| _        t        |      | _        y r0   )r:   _module_classesr)   _module_classes_str)rC   r4   module_classes_sets      r&   __init__zModuleWrapPolicy.__init__   s#     01#&'9#: r'   r   r5   r6   r-   c                     t        | j                        }i }|j                         D ],  }||v rt        ||      st	        j                  |      ||<   . |S r0   )r9   rQ   r;   r   copy)rC   r   r5   r6   r4   r+   r   s          r&   rD   zModuleWrapPolicy._run_policy   se     t334CE!))+ 	IF(FN326))K2H'/	I '&r'   c                 4    t        ||d| j                        S )N)rK   r4   )rN   rQ   )rC   r   rJ   r?   r2   s        r&   __call__zModuleWrapPolicy.__call__   s    "GbAUAU
 	
r'   c                 B    t         |          d| j                   dz   S )N())super__repr__rR   )rC   	__class__s    r&   r^   zModuleWrapPolicy.__repr__   s&    w!a(@(@'A$CCCr'   )rE   rF   rG   rH   r   typer   r   rT   r:   rI   r)   r	   rD   rY   r^   __classcell__)r_   s   @r&   r   r      s    
;xRYY'@ ;
'YY' RYY' #s(^	'
 
biic3h'	(' 
D# D Dr'   r   c                       e Zd ZdZdeej                  geee	e
f   z  f   fdZdej                  deej                     dee	e
f   deej                  ee	e
f   f   fdZy	)
r   a  
    This policy takes in a lambda function that maps a given ``nn.Module`` to
    either ``False``, ``True``, or a kwarg dictionary.
    - If the function returns ``False`` or an empty dictionary, then the module
      does not have the API applied.
    - If the function returns ``True``, then the module has the API applied
      with the root's kwargs.
    - If the function returns a non-empty dictionary, then the module has the
      API applied, and the dictionary overrides the root's kwargs.

    Example::

        >>> # xdoctest: +SKIP("undefined variables")
        >>> model = init_transformer_model(...)
        >>> def lambda_fn(module: nn.Module):
        >>>     if module is model.lm_head:
        >>>         return {"sharding_strategy": ShardingStrategy.SHARD_GRAD_OP}
        >>>     elif isinstance(module, TransformerBlock):
        >>>         return True
        >>>     return False
        >>> policy = CustomPolicy(lambda_fn)
        >>> fsdp_model = FSDP(model, auto_wrap_policy=policy)
    	lambda_fnc                     || _         y r0   )
_lambda_fn)rC   rc   s     r&   rT   zCustomPolicy.__init__   s	    #r'   r   r5   r6   r-   c                 $   i }|j                         D ]z  }||v r| j                  |      }t        |t        t        f      st        d|       |s@t        j                  |      }t        |t              r|j                  |       |||<   | |S )Nz_The lambda_fn passed to CustomPolicy should return False/True or a kwarg dict, but it returned )r;   re   r   rI   bool
ValueErrorrV   update)rC   r   r5   r6   r+   r   resr2   s           r&   rD   zCustomPolicy._run_policy   s     DF!))+ 	5F(//&)CcD$<0 CCF%I  YY{+F#t$ c".4#F+!	5" '&r'   N)rE   rF   rG   rH   r   r   r   rg   rI   r)   r	   rT   r:   rD   r1   r'   r&   r   r      s    0$(BII;tCH~8M+M"N $'YY' RYY' #s(^	'
 
biic3h'	('r'   r   rc   c                     |ry ||       S )aU  
    A convenient auto wrap policy to wrap submodules based on an arbitrary user
    function. If `lambda_fn(submodule) == True``, the submodule will be wrapped as
    a `wrapper_cls` unit.

    Return if a module should be wrapped during auto wrapping.

    The first three parameters are required by :func:`_recursive_wrap`.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.

        lambda_fn (Callable[[nn.Module], bool]): If this returns ``True``, then
            this module will be wrapped.
    Tr1   )r   rJ   rK   rc   s       r&   r   r     s    . Vr'   transformer_layer_clsc                     t        | |||      S )a-  
    See :func:`_module_wrap_policy`, where ``transformer_layer_cls`` is the
    same as ``module_classes``. Note that shared parameters must be wrapped in
    the same FSDP instance, so this auto wrap policy can help wrap shared
    embeddings into the same FSDP instance for transformer models.
    )rN   )r   rJ   rK   rl   s       r&   r   r   *  s     vw0@BWXXr'   c                 2    |ryt        | t        |            S )NTrM   )r   r4   rJ   r?   r2   s        r&   _wrap_module_cls_individuallyro   9  s      &%"788r'   c                 4     t         fd|D              S )zv
    A policy that wraps ``module`` if any policy in the passed in iterable of
    ``policies`` returns ``True``.
    c              3   4   K   | ]  } |         yw)r   rJ   rK   Nr1   ).0policyr   rK   rJ   s     r&   	<genexpr>z_or_policy.<locals>.<genexpr>O  s&       	fg@PQQs   )any)r   rJ   rK   policiess   ``` r&   
_or_policyrx   E  s        r'   g    חAmin_num_paramsforce_leaf_modulesexclude_wrap_modulesc                     |t         j                  n|}|t         j                  n|}|}||k\  }|r|xr t        | t	        |             S |xr t        | t	        |             S )a  
    A size-based auto wrap policy.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.

        min_num_params (int): Customizable policy input that controls the size
            threshold over which a module is ready to be wrapped. This is in
            units of numel.
        force_leaf_modules (Optional[set[type[nn.Module]]]): Set of module types to keep
            as leaves, i.e. their children will never be wrapped.
        exclude_wrap_modules (Optional[set[type[nn.Module]]]): Set of module types to be
            excluded in wrapping.

    Returns:
        Whether ``module`` should be wrapped.
    )r   FORCE_LEAF_MODULESEXCLUDE_WRAP_MODULESr   r9   )r   rJ   rK   ry   rz   r{   min_nonwrapped_numelis_larges           r&   r   r   U  s    B % 	$66   ' 	$88!  *#77HM
659K3L MMM O
659M3N OOOr'   wrapper_clswrapper_kwargs)NNNc              +   `   K   d| i|}t        di |5  d ddd       y# 1 sw Y   yxY ww)a  
    Context manager to wrap modules using a wrapper.

    Useful for when you'd like to apply the same configuration arguments to all
    child modules that you wrap. A particularly important use case is wrapping
    large layers so that they get sharded (in-place) during initialization, to
    avoid running out of system memory. Large layers can indicate that they
    should be sharded via the ``wrap`` annotation and this context manager can
    provide the exact configuration for these nested instances.

    Usage::

        with enable_wrap(wrapper_cls, **params):
            # Wraps layer in FSDP by default if within context
            self.l1 = wrap(torch.nn.Linear(5, 5))

    Args:
        wrapper_cls:
            Class that `wrap` annotation will `wrap` modules with, such as
            `FullyShardedDataParallel`.
        **wrapper_kwargs:
            Configuration settings that will be passed to all ``wrap``
            instances inside the context
    r   Nr1   )_ConfigAutoWrap)r   r   r2   s      r&   r   r     s@     : 	{
F 
	"6	"   s   ."	.+.wrap_overridesc                     t         j                  rJt         j                  t        d      i t         j                  |}t        | t         j                  fi |S | S )a  
    Annotate that a module should be wrapped. Annotated modules will only be
    wrapped if inside of an :func:`enable_wrap` context manager. This allows
    a module to be initialized both with and without a wrapper without code
    change.

    The class that this function wraps the passed in ``nn.Module`` with is the
    passed in ``wrapper_cls`` argument into ``enable_wrap``. Both
    ``enable_wrap`` and ``wrap`` can take in kwargs specifying how to construct
    the ``wrapper_cls`` instance. In the case of duplicate kwargs in
    ``enable_wrap`` and ``wrap``, the argument passed into ``wrap`` will be
    respected.

    Usage::

        with enable_wrap(wrapper_cls=FSDP, **fsdp_config):
            # Wraps layer in FSDP by default if within context
            self.l1 = wrap(torch.nn.Linear(5, 5))

    Args:
        module (nn.Module): module to wrap (if in :func:`enable_wrap` context)
        **wrap_overrides: configuration overrides that will take priority over
            the values provided by the :func:`enable_wrap` context
    z.Expected _ConfigAutoWrap.wrapper_cls to be set)r   in_autowrap_contextr   r   r2   _wrap)r   r   s     r&   r   r     sd    2 **&&. !QRREO22EnE''
 
 	

 Mr'   c                 x    |t        d      t        | d      ri || j                  } || fi |S  || fi |S )NzExpected wrapper_cls to be set_wrap_overrides)r   hasattrr   )r   r   r2   	overridess       r&   r   r     sV    =>>v()
 9v8!7!78	6/Y//v(((r'   auto_wrap_policyignored_paramsonly_wrap_childrenr2   c           
      8   |t        d      |t        d      | j                         D ]6  \  }}||v r	 t        |t        t        |            rt        d| d|       8 t        fd| j                         D              }	|t        d       || d|	      rjd	}
| j                         D ]0  \  }}||v rt        d||||d
|\  }}t        | ||       |
|z  }
2 |	|
z
  }|s || d|      rt        | |fi ||	fS | |
fS | d	fS # t
        $ r Y w xY w)a  
    Wraps submodules of ``module`` for which ``auto_wrap_policy`` returns
    ``True`` with ``wrapper_cls``.

    Args:
        module (nn.Module): Module to recursively wrap.
        auto_wrap_policy (Callable): A callable representing a policy that
            determines which modules to recursively wrap with ``wrapper_cls``.
        ignored_modules (set[torch.nn.Module]): Modules to ignore when
            wrapping.
        ignored_params (set[torch.nn.Parameter]): Parameters to ignore when
            wrapping; these should be the parameters contained in the modules
            in ``ignored_modules``.
    Returns:
        (nn.Module, int):
            ``module`` after wrapping and the numel recursively wrapped.
    zMust specify auto_wrap_policy.zMust specify wrapper_clszChild module z is already wrapped by c              3   H   K   | ]  }|vs|j                           y wr0   )numel)rs   pr   s     r&   ru   z"_recursive_wrap.<locals>.<genexpr>  s#      !>2I	s   	""z#Expected auto_wrap_policy to be setTrr   r   )r   r   r   r5   r   Fr1   )r   named_modulesr   r
   r`   	TypeErrorsum
parametersr   _recursive_wrapr    r   )r   r   r   r5   r   r   r2   _childrK   total_wrapped_numelnamewrapped_childnum_wrapped_params	remainders       `          r&   r   r     s   4 =>>788((* 
5O#	%dK!89$#E7*A+O  :	
  !,,.  BCCvtFVW!002 	6KD%'0? 1!1' /-1 1-M- FD-0#55	6  %'::	!&659'
 779III...19M  		s   +D	DDc                       e Zd ZU dZdZeed<   dZedz  ed<   i Z	e
eef   ed<   de
eef   fdZededdfd	       Zedd
       ZddZdedededdfdZy)r   z
    Helper class to wrap modules based on default config args via a context manager.
    See :func:`enable_wrap` for more information.
    Fr   Nr   r2   c                     || _         y r0   r2   )rC   r2   s     r&   rT   z_ConfigAutoWrap.__init__B  s	    r'   r-   c                     t         j                  rt        d      dt         _        d| vrt        d      t	        t
        | d         t         _        | d= | t         _        y )Nz]You are already within an autowrap context and we currently do not supported nested autowrap.Tr   z9Expected to pass in wrapper_cls arg into _ConfigAutoWrap.)r   r   NotImplementedErrorr   r
   r   r   r2   r   s    r&   enable_autowrap_contextz'_ConfigAutoWrap.enable_autowrap_contextE  sc    ..%o  /3+& K  '+8VM5J&K#=!!'r'   c                  F    dt         _        d t         _        i t         _        y )NF)r   r   r   r2   r1   r'   r&   disable_autowrap_contextz(_ConfigAutoWrap.disable_autowrap_contextV  s    .3+&*#!#r'   c                 :    | j                  | j                         y r0   )r   r2   )rC   s    r&   	__enter__z_ConfigAutoWrap.__enter__\  s    $$T[[1r'   exc_typeexc_valexc_tbc                 $    | j                          y r0   )r   )rC   r   r   r   s       r&   __exit__z_ConfigAutoWrap.__exit___  s    %%'r'   )r-   N)rE   rF   rG   rH   r   rg   __annotations__r   r   r2   rI   r)   r	   rT   staticmethodr   r   r   r   r1   r'   r&   r   r   8  s    
 !&%#'KD'FDcNc3h  ( ( ( (  $ $
2( (s (C (D (r'   r   )F)1
contextlibrV   abcr   r   collections.abcr   r   r   r   typingr	   r
   torch.nnr   __all__r   r*   rI   r)   r3   r`   r:   r=   rg   r   rA   intrN   r   r   r   r   ro   rx   r   
ModuleList
ModuleDictr~   MultiheadAttentionr}   contextmanagerr   r   r   	Parameterr9   r   r   r1   r'   r&   <module>r      s     # C C  	)3)3"))bii$../)3X!"))T#s(^";<  ryyk299t++,	,##T"))_-# ^# c3h	#
 ""))T#s(^";<#(4 c (5II55 5 RYY(	5
 
5@"Dw "DJ4'7 4'nII $8;HP	8YIIYY Y tBII/	Y
 
Y	9II	9'/~	9@D	9II 
 
* c(6:8<3PII3P3P 3P
 3P DO,t33P d299o.53P 
3Pn 57MM2==3Q  0242G2G1H  .   +.     F# #c #bii #L)")) )( ) )(  %MIIMM M ^	M
 %M M M 299c>M`(( ((r'   