
    9j U                     4   d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZ g d	Z e j(                  e      Z	 d
Z G d d      Z G d de      Z e ej4                  d      ej6                        Zd Z G d d      Z G d d      Zdede de!e   fdZ"dejF                  dede deejF                     fdZ$d Z%	 	 d$de&edf   de'e(ef   dz  de d e&edf   dz  d!e'e(ef   dz  de&e!e&   e!e'   f   fd"Z)de!e   fd#Z*y)%    N)Sequence)Any)DTensor)	local_mapmap_aggregate)	BlockMask)tree_flattentree_maptree_unflatten)TensorChunkSpecsplit_args_kwargs_into_chunksmerge_chunksFc                       e Zd ZdZd Zy)_CustomReducera$  
    Custom reducer class that can be used to specify a custom operation that
    reduces losses of multiple microbatches into one value.

    Example:
    >>> # xdoctest: +SKIP
    >>> sum_reducer = _CustomReducer(
    >>>     torch.tensor(0.0),
    >>>     lambda a, b: a + b
    >>> )
    c                      || _         || _        y N)
init_value	reduce_fn)selfr   r   s      g/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/distributed/pipelining/microbatch.py__init__z_CustomReducer.__init__-   s    $"    N)__name__
__module____qualname____doc__r    r   r   r   r       s    
#r   r   c                       e Zd Zy)_LossReducerNr   r   r   r   r   r   r    r    2       r   r    g        c                   n    e Zd ZU dZd Zeed<   d Zd Ze	de
edf   fd       Ze	deeef   fd	       Zy
)r   z2
    Class used to specify chunking of inputs
    c                     || _         y r   	split_dim)r   r&   s     r   r   zTensorChunkSpec.__init__B   s	    "r   r&   c                 |    | j                   j                   d| j                   j                   d| j                   dS )N.())	__class__r   r   r&   r   s    r   __repr__zTensorChunkSpec.__repr__G   s9    ~~(()4>>+B+B*C1T^^DTTUV	
r   c                 "    d| j                    dS )NzTensorChunkSpec(r*   r%   r,   s    r   __str__zTensorChunkSpec.__str__L   s    !$..!133r   
chunk_dims.c                      t        | d       }|S )a  
        A helper for creating a tuple of `TensorChunkSpec` from a tuple of chunk
        dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # There are three positional arguments to the model, and
            >>> # we are chunking them along dimension 0, 0 and 1, respectively
            >>> args_chunk_spec = TensorChunkSpec.from_tuple((0, 0, 1))
        c                     t        |       S r   r   dims    r   <lambda>z,TensorChunkSpec.from_tuple.<locals>.<lambda>^       , r   r   )r0   args_chunk_specs     r   
from_tuplezTensorChunkSpec.from_tupleO   s     (,
 r   c                      t        | d       }|S )a\  
        A helper for creating a dictionary of `TensorChunkSpec` from a
        dictionary of chunk dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # Chunk dimension 0 for the "id" argument, 1 for the "mask" argument
            >>> kwargs_chunk_spec = TensorChunkSpec.from_dict({"id": 0, "mask": 1})
        c                     t        |       S r   r3   r4   s    r   r6   z+TensorChunkSpec.from_dict.<locals>.<lambda>p   r7   r   r   )r0   kwargs_chunk_specs     r   	from_dictzTensorChunkSpec.from_dictb   s     *,
 ! r   N)r   r   r   r   r   int__annotations__r-   r/   staticmethodtupler9   dictstrr=   r   r   r   r   r   =   se    # N

4 #s(O $ !cN! !r   r   c                       e Zd Zy)
_ReplicateNr!   r   r   r   rE   rE   v   r"   r   rE   
block_mask
num_chunksreturnc                      j                   j                  d      dk(  r g|z  S  j                   j                  d      |k\  st        d      d}t        j                   j                   ||      }t        j                   j
                  ||      } j                  !t        j                   j                  ||      ndg|z  } j                  !t        j                   j                  ||      ndg|z  }g }d}t        |      D ]o  }	 fd}
|j                  t        j                  ||	   ||	   ||	   ||	    j                   |
|       j                               |||	   j                  d      z  }q |S )a	  Given a block mask, split the block mask along the batch dimension (dim0).

    Args:
        block_mask: Block mask to split
        num_chunks: Number of chunks to split the block mask into

    Returns:
        chunk_block_masks: List of chunked block masks
    r      z;Block mask has fewer batch size than the number of chunks. Nc                       fd}|S )Nc                 ^    t        j                  |       }j                  | |z   |||      S r   )torch	full_likemask_mod)bhq_idxkv_idxb_offsetrF   idxs        r   batch_offset_mask_modzI_split_block_mask.<locals>.create_mask_mod.<locals>.batch_offset_mask_mod   s.     ??1c2!**1x<E6JJr   r   )rU   rV   rF   s   ` r   create_mask_modz*_split_block_mask.<locals>.create_mask_mod   s    K )(r   )kv_num_blocks
kv_indicesfull_kv_num_blocksfull_kv_indices
BLOCK_SIZErO   seq_lengths)rX   sizeAssertionErrorrM   tensor_splitrY   rZ   r[   rangeappendr	   from_kv_blocksr\   r]   )rF   rG   	batch_dimkv_num_blocks_chunkskv_indices_chunksfull_kv_num_blocks_chunksfull_kv_indices_chunkschunk_block_masksbatch_offset	chunk_idxrW   s   `          r   _split_block_maskrl   z   s    $$Q'1,|j((##((+z9I
 	
 I --  *i **:+@+@*iX ((4 	:88*iPVj   %%1 	:55z9MVj   L:& @		) 	  $$29=,Y7#<Y#G 6y A%00(6&22
	
 	,Y7<<Q??)@* r   tensorspecc                    | j                  j                        k\  s(t        d| j                  j                         d      t        | t              }|r,| j
                  }t        fd|fz  |f      } ||       }n!t        j                  | j                        }| j                  r#| j                  r|D ]  }|j                           t        s|S dt        j                  dt        j                  dt        t        j                  df   ffd	}|rC| j
                  }t        |      }	t        ||f|	z  |f|f|	z  z         }
t!         |
| g|       S t!         || g|       S )
zGiven a tensor, and a chunking spec, split the tensor.
    Args:

        tensor: Tensor to split
        spec: Chunking spec
        num_chunks: Number of chunks to split the tensor into

    Returns:
        chunk_tensors: List of chunked tensors
    zTensor size z is smaller than num_chunksc                 F    t        j                  | j                        S r   )rM   r`   r&   )trG   rn   s    r   r6   z_split_tensor.<locals>.<lambda>   s    e((JG r   out_placementsin_placementsorigchunksrH   .c                 b   g }d}|D ]  }t        j                  |       }||j                  j                        z   }t	        d       g|j
                  z  }t	        ||      |j                  <   |||<   |j                  |       ||j                  j                        z  } t        |      S )Nr   )rM   
zeros_liker^   r&   slicendimrb   rA   )	ru   rv   expandedrU   chunknew_valupperslicesrn   s	           r   _expand_chunksz%_split_tensor.<locals>._expand_chunks   s      	.E&&t,G%**T^^44E#(;-',,">F%*3%6F4>>"#GFOOOG$5::dnn--C	. Xr   )r^   r&   r_   
isinstancer   
placementsr   rM   r`   requires_gradis_leafretain_grad_debug_mask_minibatchesTensorrA   lenlist)rm   rn   rG   _is_dtensorr   split_fnchunk_tensorsr|   r   n	expand_fns    ``        r   _split_tensorr      sx     ;;t~~&*46;;t~~677RS
 	
 VW-K
 &&
G&=:5%-

 190@**6:t~~N
 " 	 E	  #ll%*\\	u||S 	! &&
&=1,%-:-!*;;
	
 If5}566N6:M:;;r   c           	      $   | st        |      D cg c]  }i  c}S t        |       t        |      k(  s?t        dt        | j	                                dt        |j	                                      |t        d      t        | d       \  }}t        |d       \  }}g }t        ||d      D ]Z  \  }}	|	t        u st        |	t              r|j                  |       1t        |t        j                        rRt        |	t              st        d	t        |	             |j                  |j                  |	j                               t        |t               rt        |	t              st        d	t        |	             |	j                  d
k(  st        d      |j"                  j                  d
      dk(  r|j                  |       |j                  |j"                  j                  d
             Kt%        d|	 d| d       t'        g || }
t        |
      D cg c]  }g  }}t        ||d      D ]  \  }}	g }|	t        u st        |	t              r|g|
z  }nWt        |t        j                        rt)        ||	|
      }n/t        |t               rt+        ||
      }nt%        d|	 d| d      t        ||d      D ]  \  }}|j                  |         |D cg c]  }t-        ||       c}S c c}w c c}w c c}w )aW  
    Given a dictionary of args, and a dictionary of chunking specs, shard the
    args according to the chunking specs.

    Args:
        args_dict: Dictionary of args
        args_chunk_spec: Dictionary of chunking specs
        num_chunks: Number of chunks to shard the args into

    Returns:
        args_split: List of sharded args
    zargs_dict.keys() = z args_chunk_spec.keys() = z.args_chunk_spec should have been set by callerc                 "    t        | t              S r   r   r	   xs    r   r6   z%_shard_dict_of_args.<locals>.<lambda>%  s    Z9%= r   r   c                 "    t        | t              S r   r   r   s    r   r6   z%_shard_dict_of_args.<locals>.<lambda>(  s    :a+C r   TstrictzExpected TensorChunkSpec, got r   z#BlockMask only supports split_dim=0rJ   zUnsupported chunk spec: z and value: z combination.)ra   r   r_   r   keysr
   ziprE   r   rb   rM   r   r   typer^   r&   r	   rX   
ValueErrorminr   rl   r   )	args_dictr8   rG   _values	tree_specchunk_specssplit_sizesvrn   result_num_chunksflat_split_resultsv_splits_flat_split_result_v_splits                  r   _shard_dict_of_argsr     s
   $ !*-.q..y>S11!$y~~'7"8!9 :((,_-A-A-C(D'EG
 	
 MNN$=FI "!CNK
 Kv{48 4 :D*!=z*5<<(dO4$'Ed4j\%RSSqvvdnn569%dO4$'Ed4j\%RSS>>Q&$%JKK##A&!+"":.""1??#7#7#:;*4&QC}M ). 5[5*5167H1I$JAR$J$Jv{48 04"$:D*!=s..H5<<($Q.?@H9%(,=>H*4&QC}M  -0-
 	0( %%h/	00( #5 	)95  /X %K&s   	L)	L-Largs.kwargsrv   r8   r<   c                 j  	 |i }d }|t        || d       }|t        ||d       }t        t        t        |             t        t        |            |      }t	        |      }t        |||      }t	        |      |k  r<t	        |      }t        t        t        |             t        t        |            |      }t	        |      t	        |      k7  r#t        dt	        |       dt	        |             |D 		cg c](  	t        	fdt        t	        	            D              * }
}	|
|fS c c}	w )a  
    Given a sequence of args and kwargs, split them into a number of chunks
    according to  their respective chunking specs.

    Args:
        args: Tuple of args
        kwargs: Dict of kwargs
        chunks: Number of chunks to split the args and kwargs into
        args_chunk_spec: chunking specs for args, in same shape as args
        kwargs_chunk_spec: chunking specs for kwargs, in same shape as kwargs

    Returns:
        args_split: List of sharded args
        kwargs_split: List of sharded kwargs
    c                 v    t        | t        j                  t        z        rt	        t
              S t               S r   )r   rM   r   r	   r   DEFAULT_CHUNK_DIMrE   r   s    r   default_specz3split_args_kwargs_into_chunks.<locals>.default_spec  s)    a	12"#455<r   c                 "    t        | t              S r   r   r   s    r   r6   z/split_args_kwargs_into_chunks.<locals>.<lambda>  s    *Q	2J r   r   c                 "    t        | t              S r   r   r   s    r   r6   z/split_args_kwargs_into_chunks.<locals>.<lambda>  s    Jq)4L r   z;args and kwargs are split into different number of chunks: z, c              3   (   K   | ]	  }|     y wr   r   ).0i
chunk_argss     r   	<genexpr>z0split_args_kwargs_into_chunks.<locals>.<genexpr>  s     <jm<s   )r   r   rB   	enumerater   RuntimeErrorrA   ra   )r   r   rv   r8   r<   r   args_split_dictreal_num_chunkskwargs_splitr   
args_splits            ` r   r   r   _  sW   p ~  "$(J
  $&*L
 *Yt_Y'(O
 /*O&L <?* l+-4!?+,
 ?s<00I?#$Bs<'8&9;
 	
 * 	<U3z?%;<<J 
 |##s   =-D0c                    |t        |      \  }}n-t        | d         \  }}t        t              gt        |      z  }g }| D ]I  }t        |      \  }}t        |      t        |      k7  rt	        d| d|       |j                  |       K g }	t        |      D ]K  \  }
t        t              rt        t        |            D cg c]
  }||   |
    }}t        r@|d   j                  }|dd D ],  }|j                  |k(  rt        d| d|j                          t        j                  t        j                  |dd	it        |      j                  
      }g }d}t        |      t        |      k(  s#t        dt        |       dt        |             t!        ||d      D ]o  \  }}||j#                  j                        z   }t%        ddd      g|j&                  z  }t%        ||      |j                  <   ||   }|j                  |       |}q n|}|D cg c]  }t        |t(               }}t+        |      rt-        |      st        d      |d   j.                  t        |dd d      D ]2  \  }}|j.                  k7  st        d| d d|j.                          t1        fdft3        fdt        t        |            D                    }|	j                   ||        e|	j                  t        j4                  |j                               t        t6              rPj8                  }t        t        |            D ]  }j;                  |||   |
         } |	j                  |       |d   |
   }t        dt        |            D ]$  }||   |
   |k(  rt        d| d||   |
           |	j                  |       N t=        |	|      S c c}w c c}w )z
    Given a list of chunks, merge them into a single value according to
    the chunk spec.

    Args:
        chunks: list of chunks
        chunk_spec: Chunking spec for the chunks

    Returns:
        value: Merged value
    Nr   zChunk z did not match chunk spec rJ   zExpected shape z, got devicemeta)sectionsr5   z6Expected len(partial_values) == len(meta_chunks), got z != Tr   zRmerge_chunks: expected all values to be DTensors or none to be DTensors, got a mixz*merge_chunks: placement mismatch at chunk z: expected c                  F    t        j                  | j                        S )Nr4   )rM   catr&   )rv   args    r   r6   zmerge_chunks.<locals>.<lambda>S  s    EIIf#--$H r   c              3   "   K   | ]  }  y wr   r   )r   r   r   s     r   r   zmerge_chunks.<locals>.<genexpr>U  s     'Vq
'Vs   rr   r4   z	Expected )r
   r   r   r   r   rb   r   r   ra   r   shaper_   rM   r`   emptyr&   r   r^   ry   rz   r   anyallr   r   rA   r   r   r   r   r   )rv   
chunk_specspec_flattenedflatten_specchunk0_flatchunks_flattenedr|   chunk_flattenedr   args_flattenedarg_idxrk   partial_valuesoverall_shapevalmeta_chunksvalues_to_catchunk_start_idxpartial_value
meta_chunkchunk_end_idxslice_indicesslicedr   dtensor_flagsr   cat_fnreduced_valvaluer   r   s                                @@r   r   r     sf   Z '3J'?$ %1$;!\)*;<=K@PP  1)%03~#66veW,FzlSTT01 N!.1 T)c?+ "'s+;'<!= !+G4N 
 ' .q 1 7 7)!"- C995,-m_F399+N 
 $00KK>v> 0 !#"#>*c+.>>(PQTUcQdPeeijmnyjzi{|  25"K2 
4-M: %4joocmm6T$TM%*4t%<$=@R@R$RM38-3XM#--0*=9F!((0&3O
4 !/ >KKZ73KMK=!=)(9 
 +1-88
%mAB&7; DAq||z1,H L((2|6!,,I  #H$.="''VE#mBT<U'V"V
 %%fm&<=%%eii3==&QR^,..K"3'7#89 	!mm!1)!<W!E
 !!+.$Q'0E"1c*:&;< 	'	27;uD(#E7&1A)1LW1U0VW 
 !!%(iT)n .,77kV Ls   O9*O>)NN)+loggingoperatorcollections.abcr   typingr   rM   torch.distributed.tensorr   %torch.distributed.tensor.experimentalr   torch.fx.noder   !torch.nn.attention.flex_attentionr	   torch.utils._pytreer
   r   r   __all__	getLoggerr   loggerr   r   r    rm   addsum_reducerr   r   rE   r>   r   rl   r   r   r   rA   rB   rC   r   r   r   r   r   <module>r      s     $   , ; ' 7 F F 
		8	$
   # #$	> 	 <5<<,hll;  5! 5!r	 	>>> 
)_>BI<LLI<
I< I< ell	I<XUx ;?;?p$
S/p$cNT!p$ p$ ?C/047	p$
 C01D8p$ 4;T
"#p$f[8I[8r   