
    9j                     N   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZmZ d dlmZ d dlmZmZmZmZ d Z	 ddej.                  d	ed
ededej2                  dej4                  dz  defdZdej.                  d	ededefdZdededz  dej.                  fdZy)    N)_get_device_module)distributed_c10d)ShardShardedTensorShardedTensorMetadataTensorProperties)ShardMetadata)
DeviceMeshDTensor	Replicater   c                     |j                         dk(  rd|  d| S |j                         dk(  r"d|  d| dt        |      j                          S d|  d| d| |z   S )Ncpuzrank:/hpu:)lowerr   current_device)rankdevice_typenum_devices_per_nodes      c/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/distributed/fsdp/_shard_utils.py_get_remote_device_strr      s    e#tfAk]++					%tfAk]!,>{,K,Z,Z,\+]^^tfAk]!D3G,G+HII    tensorr   
world_sizer   pgdevicereturnc                 R   | j                  |d      }t        |      |kD  rx||   j                         }| j                         D cg c]  }d }	}t	        j
                  | j                         d   |z        |z  |	d<   t        j                  ||	|      g}
ng }
|D cg c]  }t        |j                                }}dgt        t        j                  |D cg c]  }|d   	 c}            dd z   }dgt        |d         dz
  z  }	|D cg c]  }|g|	z   
 }}|t        j                  |      j                  n|j                  }t        t        |            D cg c]#  }t        t!        j"                  ||      ||      % }}t        |      t        |      k7  st        |      t        |      k7  r/t%        dt        |       dt        |       dt        |             t'        |||      D cg c]  \  }}}t)        |||       }}}}t+        || j                         t-        | j.                  | j0                  dt2        j4                  | j7                         	      
      }t9        j:                  |
||      S c c}w c c}w c c}w c c}w c c}w c c}}}w )z
    Shard a tensor to chunks along the first dimension. The local rank will gets its
    corresponding chunk as the local shard to create a ShardedTensor.
    r   )dimN   zQExpected chunk_sizes, chunk_offsets, and placements to have the same length, got z, F)dtypelayoutrequires_gradmemory_format
pin_memory)shards_metadatasizetensor_properties)sharded_tensor_metadataprocess_group)chunklencloner)   mathceilr   from_tensor_and_offsetslist	itertools
accumulater   _get_pg_default_devicetyperanger   distget_global_rankAssertionErrorzipr	   r   r   r#   r$   torchcontiguous_format	is_pinnedr   +_init_from_local_shards_and_global_metadata)r   r   r   r   r   r   chunkslocal_shard_offsetslocal_shardsr-   chunk_sizes
chunk_sizedim0_offsetsd0chunk_offsetsr   r
placementsoffsetr)   	placementshard_metadatar+   s                            r   _create_chunk_sharded_tensorrP      s    \\*!\,F
6{TTl((*$kkm,1,,YYv{{}Q/*<=D
55k7DQR 4::%4

%:K:3kJ
jmJK	r L cSQ(1,-G.:;bTG^;M; > 	//388[[  s;'(  	  Q' 	
J  ;3}--[1AS_1T{#$Bs='9&:"S_<MO
 	
 (+=+z'R #FD) 	fdI.N  4&[[]*,,==11'')

 DD.EUW ] - ;J <s$   	J	  JJJ#(J;J"device_meshc                 h   | j                         j                         } t        |j                        D cg c]  }t	                }}t        |j                        D cg c]  }t	                }}t        d      |d<   t        j                  | ||d      j                  |      S c c}w c c}w )z
    Shard a tensor to chunks along the first dimension. The local rank will gets its
    corresponding chunk as the local tensor to create a DTensor.
    r   r!   F)	run_check)rL   )	detachr/   r8   ndimr   DShardr   
from_localredistribute)r   r   rQ   rC   replicate_placementsshard_placementss         r   _create_chunk_dtensorr[   _   s     ]]_""$F 27{7G7G1HIAIKII-2;3C3C-DE	EE!!9R1Ul#  	 JEs   B* B/	root_meshc                     || j                   k7  rt        d      t        t        j                  | j
                              }t               |d<   | j                  | j                   |      } | j                         S )zT
    All gather a DTensor in its sharded dimension and return the local tensor.
    z2The device mesh of a tensor should be a root mesh.r!   )rQ   rL   )	rQ   r;   r3   copydeepcopyrL   r   rX   to_local)r   r\   rL   s      r   _all_gather_dtensorra   x   st     F&&&QRRdmmF$5$567J [JrN  && ! F
 ??r   )N)r^   r4   r0   r=   torch.distributeddistributedr9   torch._utilsr   r   'torch.distributed._shard.sharded_tensorr   r   r   r   &torch.distributed._shard.sharding_specr	   torch.distributed.tensorr
   r   r   rV   r   TensorintProcessGroupr   rP   r[   ra    r   r   <module>rl      s          + .  A T TJ #'?LL?
? ? 	?
 	? LL4? ?DLL
  	2D  \\r   