
    9j*                         d dl Z d dlmZ d dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlmZ ddZd ZddZd	 Ze j"                  d
ed   fd       Ze j"                  d
ed   fd       Ze j"                  d
ed   fd       Zy)    N)	Generator)global_decomposition_table)_rnn_helpergather_paramsgru_cell	lstm_cell)
while_loopc                    |d   }|d   |r|d   nd}|r|d   ndt        |      dk(  r|d   nt        |      dk(  r|d   nd|d   j                  d      }|d   j                  d      }t        j                  j                  j                  | ||      |rj                  d      nt        j                  j                  d      gt        |j                  dd       |j                  |j                  d}	fd	}
fd
}t        j                  dt        j                        }t        |
|||	||g      \  }}}}|r|j                  d      }||j!                  d      |j!                  d      ffS )ay  
    1 layer fn for while loop LSTM

    Args:
        inp: Input tensor of shape (seq_len, batch, input_size)
        hidden: Tuple of (hx, cx) hidden states
        params: List of weight and bias tensors
        has_biases: Whether biases are included
        reverse: Whether to process sequence in reverse

    Returns:
        Tuple of (output, (final_hx, final_cx))
    r         N         dtypedevicec                 ,    | j                  d      k  S Nr   size)iouthxcxprecomputed_inputs       U/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/export/_patches.pycond_fnz*one_layer_while_loop_lstm.<locals>.cond_fn.       $))!,,,    c           	      *   | j                         }t        j                  |       t        j                  |j                  d      dz
         t	        |   ||d      \  }}|j                         }|j                  d      ||<   | dz   |||fS )Nr   r   maxr   )	chunk_dim)itemtorch_check_is_sizer   r   clonesqueeze)	idxr   r   r   r   hh_bias	hh_weight	hr_weightr   s	        r   body_fnz*one_layer_while_loop_lstm.<locals>.body_fn1   s    HHJQQ$5$:$:1$=$ABa "b)WiST
B iikAAQwR##r   r   )len	unsqueezer%   nn
functionallinearflipemptyr   tupleshaper   r   tensorint64r	   r(   )inphiddenparams
has_biasesreverse	ih_weightih_biasr   r   step_outputr   r-   cnt_r   final_hxfinal_cxr*   r+   r,   r   s                    @@@@r   one_layer_while_loop_lstmrF   
   s    q	Iq	I%fQi4G%fQi4G[A%q	Fq8H6!9d  
		Q	B			Q	B++223	7K5<)..q1BS ++q!	rxx|	 hhyy	K-$ ,,q
,C!+3R4"AsHh hhqk !!!$h&6&6q&9:::r   c	                    t        |      dk7  rt        d      t        |||d   j                  d      |d   j                  d      k7        }t	        t        |d   |d               }	t        }
t        | |	||||||||

      \  }}t	        t        |       }|t        j                  |d   d      t        j                  |d   d      fS )a  
    LSTM implementation using while_loop for export compatibility.

    This is a drop-in replacement for the default LSTM decomposition that uses
    while_loop instead of Python loops, making it more suitable for torch.export.

    Args:
        input: Input tensor
        hx: Tuple of (h0, c0) hidden states
        params: List of weight and bias tensors
        has_biases: Whether biases are included
        num_layers: Number of LSTM layers
        dropout: Dropout probability
        train: Training mode
        bidirectional: Whether to use bidirectional LSTM
        batch_first: Whether batch dimension is first

    Returns:
        Tuple of (output, h_n, c_n)
    r   zlstm expects two hidden statesr   r   )
r/   AssertionErrorr   r   listziprF   r   r%   stackinputr   r<   r=   
num_layersdropouttrainbidirectionalbatch_firstr;   layer_fnr   final_hiddenss                r   lstm_while_loop_implrU   I   s    > 2w!|=>>6:r!uzz!}1

1/MNF#beRU#$F(H$C m,-MM!,a0%++mA>NPQ2RRRr   c                 f   |d   |d   |r|d   nd|r|d   ndt         j                  j                  j                  |       |rj	                  d      n|j                  d      }t        j                  j                  d      gt        |j                  dd       |j                  |j                  d}fd}fd}t        j                  dt         j                  	      }	t        |||	||g      \  }
}}|r|j	                  d      }||j                  d      fS )
ad  
    1 layer fn for while loop GRU

    Args:
        inp: Input tensor of shape (seq_len, batch, input_size)
        hidden: Hidden state tensor
        params: List of weight and bias tensors
        has_biases: Whether biases are included
        reverse: Whether to process sequence in reverse

    Returns:
        Tuple of (output, final_hidden)
    r   r   r   Nr   r   c                 ,    | j                  d      k  S r   r   )r   r   
cur_hiddenr   s      r   r   z)one_layer_while_loop_gru.<locals>.cond_fn   r   r   c                    | j                         }t        j                  |       t        j                  |j                  d      dz
         t	        |   |      }|j                         }|j                  d      ||<   | dz   ||fS )Nr   r   r!   )r$   r%   r&   r   r   r'   r(   )	r)   r   rX   r   r*   r+   r@   r?   r   s	       r   r-   z)one_layer_while_loop_gru.<locals>.body_fn   s    HHJQQ$5$:$:1$=$ABa *i)W

 iik##A&AQwZ''r   r.   )r%   r1   r2   r3   r4   r0   r5   r   r6   r7   r   r   r8   r9   r	   r(   )r:   r;   r<   r=   r>   rX   rA   r   r-   rB   rC   r   final_hiddenr*   r+   r@   r?   r   s                @@@@@r   one_layer_while_loop_grur[   }   s/    q	Iq	I%fQi4G%fQi4G++223	7K5<)..q1BS!!!$J ++q!	z#	$   	K-
( 
( ,,q
,C%gwk:8VWAsLhhqk$$Q'''r   c	                     t        ||d      }t        |j                  d            }	t        }
t	        | |	||||||||

      \  }}|t        j                  |d      fS )a  
    GRU implementation using while_loop for export compatibility.

    This is a drop-in replacement for the default GRU decomposition that uses
    while_loop instead of Python loops, making it more suitable for torch.export.

    Args:
        input: Input tensor
        hx: Hidden state tensor
        params: List of weight and bias tensors
        has_biases: Whether biases are included
        num_layers: Number of GRU layers
        dropout: Dropout probability
        train: Training mode
        bidirectional: Whether to use bidirectional GRU
        batch_first: Whether batch dimension is first

    Returns:
        Tuple of (output, h_n)
    Fr   )r   rI   unbindr[   r   r%   rK   rL   s                r   gru_while_loop_implr^      sn    > 6:u5F"))A,F'H$C M1---r   return)NNNc              #   n  K   t         d   }|j                  | d      }| j                  j                  t        j                  j
                  j                  d      }	 ||| <   || j                  t        j                  j
                  j                  <   d |||| <   n|j                  | d       |2|| j                  t        j                  j
                  j                  <   y| j                  j                  t        j                  j
                  j                  d       y# |||| <   n|j                  | d       |2|| j                  t        j                  j
                  j                  <   w | j                  j                  t        j                  j
                  j                  d       w xY ww)a  
    Generic context manager for registering while_loop-based RNN decompositions.

    Args:
        rnn_op: The aten operation to patch (e.g., torch.ops.aten.lstm.input)
        rnn_impl: The while_loop-based implementation function

    Note:
        This is an internal helper. Use register_lstm_while_loop_decomposition()
        or register_gru_while_loop_decomposition() instead.
    post_autogradN)r   get
py_kernelsr%   _CDispatchKeyCompositeImplicitAutogradpop)rnn_oprnn_implregistryoriginal_decomporiginal_py_kernels        r   &_register_rnn_while_loop_decompositionrm      sr     */:H ll640O  **..66X#LT%((..HHI &.HV LL& )" ehh22LLM
 !!%(("6"6"P"PRVW &.HV LL& )" ehh22LLM
 !!%(("6"6"P"PRVWs    AF5:D$ BF5$BF22F5c               #      K   t        t        j                  j                  j                  j
                  t              5  d ddd       y# 1 sw Y   yxY ww)a  
    Context manager that temporarily registers the while_loop-based LSTM decomposition.

    The while_loop-based decomposition is more suitable for export and graph-based
    execution, as it avoids Python control flow that cannot be captured in the graph.
    This should support dynamic sequence lengths, however as while_loop does not
    support Autograd yet, an ExportedProgram created with this will not be trainable.

    Usage::

        from torch.export._patches import register_lstm_while_loop_decomposition
        from torch.export import export

        with register_lstm_while_loop_decomposition():
            # Export your model with LSTM
            ep = export(model, (x, h0, c0))

    Note:
        This context manager temporarily modifies the global decomposition table
        and py_kernels registration. The original registrations are restored when
        exiting the context.
    N)rm   r%   opsatenlstmrM   rU    r   r   &register_lstm_while_loop_decompositionrs     sC     0 
0		!!#7
  	     =AA	AAAc               #      K   t        t        j                  j                  j                  j
                  t              5  d ddd       y# 1 sw Y   yxY ww)a  
    Context manager that temporarily registers the while_loop-based GRU decomposition.

    The while_loop-based decomposition is more suitable for export and graph-based
    execution, as it avoids Python control flow that cannot be captured in the graph.
    This should support dynamic sequence lengths, however as while_loop does not
    support Autograd yet, an ExportedProgram created with this will not be trainable.

    Usage::

        from torch.export._patches import register_gru_while_loop_decomposition
        from torch.export import export

        with register_gru_while_loop_decomposition():
            # Export your model with GRU
            ep = export(model, (x, h0))

    Note:
        This context manager temporarily modifies the global decomposition table
        and py_kernels registration. The original registrations are restored when
        exiting the context.
    N)rm   r%   ro   rp   grurM   r^   rr   r   r   %register_gru_while_loop_decompositionrw   2  sC     0 
0		  "5
  	  rt   )F)
contextlibcollections.abcr   r%   torch._decompr   torch._decomp.decompositionsr   r   r   r   "torch._higher_order_ops.while_loopr	   rF   rU   r[   r^   contextmanagerrm   rs   rw   rr   r   r   <module>r~      s     %  4 X X 9<;~1Sh3(l..b ,X ,X ,X^ 	:J0K  : y9I/J  r   