
    9j<3                    "   d Z ddlmZ ddlZddlmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZmZ ddlmZ ddlmZ ddZddl m!Z!m"Z"m#Z#m$Z$ erddl%m&Z& ddl'm(Z( ddZ) G d de      Z* G d d      Z+y)a  
Decomposition-based sharding propagation for DTensor.

When an operator doesn't have a registered sharding strategy, we derive one by
tracing through its decomposition. The decomposed ops (which do have strategies)
determine how placements propagate through the original op.
    )annotationsN)AnyTYPE_CHECKING)decomposition_table)_are_we_tracing)
DeviceMesh)DTensorSpec)OpSchema
OpStrategyRuntimeSchemaInfo)try_find_mesh_from_args)_StridedShard	Placement	ReplicateShard)GuardOnDataDependentSymNode)TorchDispatchModec                   | j                   }d}t        |j                        D ]7  \  }}|j                  r n&|j                  j                         dk7  s2|5|} n g }|j                  D ]H  }|j                  s|j                  j                         dk7  s.|j                  |j                         J i }|||d<   |r||d<   t        di |S )zGInfer RuntimeSchemaInfo from an operator's schema for decomposition opsN
TensorTypestatic_argnumstatic_kwargkey )	_schema	enumerate	arguments
kwarg_onlytypekindappendnamer   )opschemar   iargkwarg_only_nameskwargss          h/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/distributed/tensor/_decompositions.py_infer_schema_info_from_opr(       s    ZZF MF,,- 3>>88==?l*}/DM  .>>chhmmo=##CHH-. F "/$4 ! &v&&    )tree_anytree_flattentree_maptree_map_only)
OpOverload)ShardingPropagatorc                b    | j                   t        | j                  j                               z   S N)args_schematuplekwargs_schemavalues)	op_schemas    r'   _extract_input_specsr7   F   s'      5)@)@)G)G)I#JJJr)   c                  4     e Zd ZdZd fdZddZddZ xZS )PlacementTrackingModez
    TorchDispatchMode that tracks DTensor placements through op execution.

    Used during decomposition tracing: intercepts each op, propagates sharding
    via the ShardingPropagator, and records output placements on the result tensors.
    c                >    t         |           || _        || _        y r1   )super__init__sharding_propmesh)selfr=   r>   	__class__s      r'   r<   zPlacementTrackingMode.__init__R   s    *	r)   c                   t        d ||xs i f      \  }}t        d ||f      st        d|       t        |||      }| j                  j
                  j                  |      }|%| j                  j                  j                  |      }|||_        |j                          t               r| j                  j                  |      }	n| j                  j                  |      }	|	j                  r^|	j                  x}
	 Ot        |j                   |
j                         D ],  \  }}|j"                  |j"                  k7  s t%        d|         ||i |}| j'                  ||	j(                         |S )Nc                T    t        | t        j                        rt        | d|       S | S )N_spec)
isinstancetorchTensorgetattrxs    r'   <lambda>z:PlacementTrackingMode.__torch_dispatch__.<locals>.<lambda>Y   s!    
1ell0Kga!, QR r)   c                "    t        | t              S r1   rD   r	   rH   s    r'   rJ   z:PlacementTrackingMode.__torch_dispatch__.<locals>.<lambda>^       jK0 r)   z(No DTensorSpec found in args/kwargs for z*Decomposition requires redistribution for )r,   r*   NotImplementedErrorr
   r=   op_to_schema_infoget)op_to_schema_info_for_single_dim_strategyschema_info_recompute_comparison_keyr    propagate_op_sharding_non_cachedpropagate_op_shardingneeds_redistributeredistribute_schemazip	args_spec
placementsRuntimeError_record_output_specsoutput_spec)r?   functypesargsr&   r2   r4   r6   rR   output_shardingrW   origdesiredouts                 r'   __torch_dispatch__z(PlacementTrackingMode.__torch_dispatch__W   s   %-R6<R &
"]
 0;2N
 &(PQUPV&WXX T;>	((::>>tD""LLPPQUV  "$/I!//1"00QQO #00FFyQO .. #667# 	 "%###--" g ??g&8&88&DTFK  D#F#!!#'B'BC
r)   c                    t        |t        j                        r
|||_        y t        |t        t
        f      r>t        |t        t
        f      r't        ||      D ]  \  }}| j                  ||        y y y r1   )rD   rE   rF   rC   r3   listrX   r\   )r?   outputr]   tss        r'   r\   z*PlacementTrackingMode._record_output_specs   sn    fell+0G&FL.:%4
 FK0 01))!Q/04
.r)   )r=   r/   r>   r   )r   N)rh   r   r]   zDTensorSpec | AnyreturnNone)__name__
__module____qualname____doc__r<   re   r\   __classcell__)r@   s   @r'   r9   r9   J   s    
5n0r)   r9   c                  |    e Zd ZdZd
dZddZedd       ZddZ	 	 	 	 ddZ		 	 	 	 	 	 	 	 ddZ
e	 	 	 	 dd       Zy	)DecompShardingStrategyaA  
    Generates sharding strategies for ops by tracing through their decompositions.

    For each candidate input placement combination, runs the decomposition on meta
    tensors under PlacementTrackingMode to determine the output placement. These
    single-dimension strategies are then expanded to the full mesh.
    c                     || _         i | _        y r1   )r=   _fake_meshes)r?   r=   s     r'   r<   zDecompShardingStrategy.__init__   s    * 46r)   c                ~    | j                   j                  |      }|t        |dgdd      }|| j                   |<   |S )Nr   F)_init_backend_rank)ru   rP   r   )r?   device_type	fake_meshs      r'   _get_fake_meshz%DecompShardingStrategy._get_fake_mesh   sG    %%))+6	";5PQRI-6Dk*r)   c                6    | t         v xs | j                         S r1   )r   _can_decompose)r!   s    r'   
has_decompz!DecompShardingStrategy.has_decomp   s     ((?B,=,=,??r)   c                ~    || j                   j                  vr%t        |      }|| j                   j                  |<   yy)z
        Register schema_info for decomposition op on first invocation.
        Needed for correct shard prop cache key.
        N)r=   rO   r(   )r?   r!   rR   s      r'   ensure_schema_infoz)DecompShardingStrategy.ensure_schema_info   s=    
 T''9994R8K7BD004 :r)   c                   t        d |j                  |j                  f      sy | j                  |      }t	        |j
                  |j                  t        |j                  j                               z         }| j                  |j                        }g }g }|D ]Q  }	 | j                  |||      }t!        |t              s|gn
t#        |      }|j%                  |t#        |      z          S |st'        d      t)        |      }	| j*                  j-                  |      }
ddlm}  |||
||	      S # t        $ r Y  y t        $ r Y  y t        t        t        f$ r Y w xY w)Nc                "    t        | t              S r1   rL   rH   s    r'   rJ   z;DecompShardingStrategy.propagate_strategy.<locals>.<lambda>   rM   r)   zGSharding propagation should have produced at least Replicate() strategyr   )expand_to_full_mesh_op_strategy)input_index)r*   r2   r4   _get_candidate_placementsr   r!   r3   r5   r{   ry   _propagate_through_decomprN   r   r[   KeyError
IndexErrorrD   rg   r   AssertionErrorlenr=   _wrap_with_op_strategy#torch.distributed.tensor._ops.utilsr   )r?   r6   candidate_placementsr>   rz   single_dim_strategiesoutput_placementsinput_placementsrh   	n_outputsstrategy_schemar   s               r'   propagate_strategyz)DecompShardingStrategy.propagate_strategy   s    0""I$;$;<
 #==iH&LL!!E)*A*A*H*H*J$KK

 ''(8(89	 "EG 4 	U77$ !+65 9tF|  "(():TBR=S)ST'	U* % Y  )*	,,CCIN	
 //#8i
 	
5 ' .  (J7  s   D44	E! E!E! E!c                t   |j                   }|t        v r
t        |   }n+|j                         r|j                  }nt	        d|       t        |      fd}ddlm}  |       5  t        ||j                        }t        ||j                        }	t        | j                        5   ||i |	}
d d d        d d d        d }t        |
      }t        |t        t        f      r0|D cg c]  }||	 }}t!        |      dk(  r|d   S t        |      S |S # 1 sw Y   hxY w# 1 sw Y   lxY wc c}w )NzNo decomposition found for c                    t              }t        | t              rVt        j                  | j
                  | j                  j                  d      }t        |f| j                        |_        |S | S )Nmeta)dtypedevice)tensor_meta)	nextrD   r	   rE   emptyshaper   r   rC   )rI   pr   r>   placement_iters      r'   to_metazADecompShardingStrategy._propagate_through_decomp.<locals>.to_meta  sW    ^$A![){{177!--2E2EfU(tO
Hr)   r   )maybe_disable_local_tensor_modec                x    t        | t        j                        r t        | dd       }|r|j                  d   S d S y )NrC   r   )rD   rE   rF   rG   rZ   )ri   specs     r'   get_placementzGDecompShardingStrategy._propagate_through_decomp.<locals>.get_placement  s7    !U\\*q'40-1tq);t;r)      )r!   r   r}   	decomposerN   itertorch.distributed._local_tensorr   r,   r2   r4   r9   r=   rD   r3   rg   r   )r?   r6   	placementr>   r!   	decomp_fnr   r   	args_metakwargs_metarh   r   resultr   flatr   s      `           @r'   r   z0DecompShardingStrategy._propagate_through_decomp   s.    \\$$+B/I I%(CB4&HIIi	 	T,. 	> )*?*?@I"7I,C,CDK&t'9'94@ >"I==>	>	 -0fudm,%7!A7D7!$i1n47=%+=> >	> 	>  8s1   *AD)-	D6D)/D57D5D&	"D))D2c           
       	 t        |       }t        t        |            \  }}t               h	t	        t
        	fd|       g }|D ]  }t        |t
              s|j                  d g       &t        	      }	D ]  }t        |t              r<|t        |j                        D ch c]  }t        ||j                         c}z  }Ot        |t              s`|t        |j                        D ch c]  }t        |       c}z  } |j                  t        |              t        t        j                  |       S c c}w c c}w )Nc                :    j                  | j                        S r1   )updaterZ   )r   all_placementss    r'   rJ   zBDecompShardingStrategy._get_candidate_placements.<locals>.<lambda>4  s    ..t? r)   )split_factor)r7   r+   rg   r   r-   r	   rD   r   setr   rangendimr   r   	itertoolsproduct)
r6   tensor_specs
flat_specs_
candidatesr   optionsr   r#   r   s
            @r'   r   z0DecompShardingStrategy._get_candidate_placements)  s,    ,I6$T,%78
A +4+?	
 46
 	1DdK0!!4&)n-' HA!!]3%*499%5$ ! *!!..I$  $Au-eDII6F#GE!H#GGH !!$w-0	1  I%%z233$
 $Hs   %E
2E
N)r=   r/   )ry   strrk   r   )r!   r.   rk   bool)r!   r.   rk   rl   )r6   r
   rk   zOpStrategy | None)r6   r
   r   ztuple[Placement | None]r>   r   rk   z!Placement | tuple[Placement, ...])r6   r
   rk   zlist[tuple[Placement | None]])rm   rn   ro   rp   r<   r{   staticmethodr~   r   r   r   r   r   r)   r'   rs   rs      s    6 @ @C6
6
 
6
p00 +0 	0
 
+0d  4 4	& 4  4r)   rs   )r!   r.   rk   r   )r6   r
   rk   z tuple[DTensorSpec | object, ...]),rp   
__future__r   r   typingr   r   rE   torch._decompr   )torch.distributed._functional_collectivesr   torch.distributed.device_meshr   &torch.distributed.tensor._dtensor_specr	   #torch.distributed.tensor._op_schemar
   r   r   torch.distributed.tensor._utilsr   (torch.distributed.tensor.placement_typesr   r   r   r   %torch.fx.experimental.symbolic_shapesr   torch.utils._python_dispatchr   r(   torch.utils._pytreer*   r+   r,   r-   
torch._opsr.   'torch.distributed.tensor._sharding_propr/   r7   r9   rs   r   r)   r'   <module>r      s{    #  %  - E 4 > W W C  N :'< P O %JKK0- K0\r4 r4r)   