
    9j{                        U d dl mZmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZmZ  G d d	e      Zdaedz  ed
<   deddfdZ	 ddej0                  dedz  deej0                  edz  f   fdZ	 ddej0                  dededz  dej0                  fdZ	 ddej0                  dedededej:                  dedz  dej0                  fdZ	 ddej0                  dedededz  dej0                  f
dZ	 ddej0                  dedz  deej0                  e e   f   fdZ!	 ddededz  dedz  dej0                  fdZ"y)    )ABCabstractmethod)AnyN)ShardedTensor)Shard)_all_gather_dtensor_create_chunk_dtensor_create_chunk_sharded_tensor)
DeviceMeshDTensorc                      e Zd ZdZedej                  deej                  edz  f   fd       Z	edej                  dedej                  fd       Z
e	 ddej                  ded	ed
edej                  dej                  dz  dej                  fd       Zedej                  dededej                  fd       Zedej                  deej                  ee   f   fd       Zedededz  dej                  fd       Zy)FSDPExtensionsz
    This enables some customizable hooks to enable composability with tensor
    parallelism. To activate these hooks, use :func:`_set_fsdp_extensions` to
    set a custom :class:`FSDPExtensions` that implements the hooks.
    tensorreturnNc                      y)z6E.g. converting ``DistributedTensor`` to local tensor.N selfr   s     g/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/distributed/fsdp/_fsdp_extensions.pypre_flatten_transformz$FSDPExtensions.pre_flatten_transform   s     	    param_extensionc                      y)z6E.g. converting local tensor to ``DistributedTensor``.Nr   )r   r   r   s      r   post_unflatten_transformz'FSDPExtensions.post_unflatten_transform   s     	r   rank
world_sizenum_devices_per_nodepgdevicec                      y)z6Shards a tensor to chunks and returns the local chunk.Nr   )r   r   r   r   r   r   r   s          r   chunk_tensorzFSDPExtensions.chunk_tensor(        	r   device_meshc                      y)zAShards a tensor/DTensor to DTensor and returns the local DTensor.Nr   )r   r   r   r#   s       r   chunk_dtensorzFSDPExtensions.chunk_dtensor5   s     	r   c                      y)z
        This is to be called before loading a *sharded* model state dict and
        should return the tensor and list of shards from which to load data.
        Nr   r   s     r   pre_load_state_dict_transformz,FSDPExtensions.pre_load_state_dict_transform?   s     	r   parent_meshc                      y)z
        This is to be called before loading a *sharded* DTensor state dict.
        This gathers tensor in FSDP dimension and returns local tensor of
        TP DTensor.
        Nr   )r   r   r(   s      r   all_gather_dtensorz!FSDPExtensions.all_gather_dtensorJ   r"   r   N)__name__
__module____qualname____doc__r   torchTensortupler   r   r   intdistProcessGroupr   r!   r   r%   listr   r'   r   r*   r   r   r   r   r      s     
u||S4Z'	(    
	   '+

 
 	

 "
 
 t#
 

 
    	
 
   
u||T%[(	)  

  $&
 
	
 
r   r   _extensions	flattenerr   c                     | a y r+   )r7   )r8   s    r   _set_fsdp_extensionsr:   [   s    Kr   r   fsdp_extensionc                 B    ||j                  |       \  }}|||fS | d fS r+   )r   )r   r;   
new_tensorr   s       r   _ext_pre_flatten_transformr>   `   s:     !&4&J&J6&R#
O&..4<r   r   c                 2    |||j                  | |      S | S r+   )r   )r   r   r;   s      r   _ext_post_unflatten_transformr@   k   s'    
 !o&A66vOOMr   r   r   r   r   c                 B    ||j                   nt        } || ||||      S r+   )r!   r
   )r   r   r   r   r   r;   chunk_tensor_fns          r   _ext_chunk_tensorrC   u   s;     % 	##) 
 
 r   r#   c                 >    ||j                   nt        } || ||      S r+   )r%   r	   )r   r   r#   r;   chunk_dtensor_fns        r   _ext_chunk_dtensorrF      s5     % 	$$" 
  r   c                     ||j                  |       S t        |       t        urt        dt        |              | j	                         }| |fS )NzExpected ShardedTensor, got )r'   typer   AssertionErrorlocal_shards)r   r;   shardss      r   "_ext_pre_load_state_dict_transformrL      sW     !;;FCCF|=(;DL>JKK  "FFr   r(   c                 <    ||j                   nt        } || |      S r+   )r*   r   )r   r(   r;   all_gather_dtensor_fns       r   _ext_all_gather_dtensorrO      s,     % 	))  
 !55r   r+   )#abcr   r   typingr   r0   torch.distributeddistributedr4   +torch.distributed._shard.sharded_tensor.apir   -torch.distributed._shard.sharded_tensor.shardr   #torch.distributed.fsdp._shard_utilsr   r	   r
   torch.distributed.tensorr   r   r   r7   __annotations__r:   r1   r2   r>   r@   r3   r5   rC   rF   r6   rL   rO   r   r   r   <module>rY      s   # #     E ? 
 9ES EP &*^d" )N t  -1LL"T) 5<<t#$ -1LL #T) \\	  -1LL
  	
 	 #T) \\4 -1	LL
  #T)	
 \\( -1
LL
"T)
 5<<e$%
  -1
6
6d"
6 #T)
6 \\	
6r   