
    9j4>                         d dl Z d dlZd dlmZmZ d dlmZmZ d dlm	Z	 ddl
mZ ddlmZ g dZ G d	 d
e      Z G d de      Zeee   z  ez  Z G d de      Z G d de      Z G d de      Zy)    N)SizeTensor)
functionalinit)	Parameter   )CrossMapLRN2d)Module)LocalResponseNormr	   	LayerNorm	GroupNormRMSNormc                        e Zd ZU dZg dZeed<   eed<   eed<   eed<   	 ddededededdf
 fd	Zd
e	de	fdZ
d Z xZS )r   a  Applies local response normalization over an input signal.

    The input signal is composed of several input planes, where channels occupy the second dimension.
    Applies normalization across channels.

    .. math::
        b_{c} = a_{c}\left(k + \frac{\alpha}{n}
        \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}

    Args:
        size: amount of neighbouring channels used for normalization
        alpha: multiplicative factor. Default: 0.0001
        beta: exponent. Default: 0.75
        k: additive factor. Default: 1

    Shape:
        - Input: :math:`(N, C, *)`
        - Output: :math:`(N, C, *)` (same shape as input)

    Examples::

        >>> lrn = nn.LocalResponseNorm(2)
        >>> signal_2d = torch.randn(32, 5, 24, 24)
        >>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7)
        >>> output_2d = lrn(signal_2d)
        >>> output_4d = lrn(signal_4d)

    )sizealphabetakr   r   r   r   returnNc                 Z    t         |           || _        || _        || _        || _        y Nsuper__init__r   r   r   r   selfr   r   r   r   	__class__s        ^/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/nn/modules/normalization.pyr   zLocalResponseNorm.__init__4   ,     		
	    inputc                     t        j                  || j                  | j                  | j                  | j
                        S z(
        Runs the forward pass.
        )Flocal_response_normr   r   r   r   r   r    s     r   forwardzLocalResponseNorm.forward=   s-     $$UDIItzz499dffUUr   c                 :     dj                   di | j                  S @
        Return the extra representation of the module.
        z){size}, alpha={alpha}, beta={beta}, k={k} format__dict__r   s    r   
extra_reprzLocalResponseNorm.extra_reprC         B:AARDMMRRr   )-C6?      ?g      ?)__name__
__module____qualname____doc____constants__int__annotations__floatr   r   r&   r/   __classcell__r   s   @r   r   r      ss    : 3M
IL
KH NQ %49EJ	VV V VSr   r   c                   ~     e Zd ZU eed<   eed<   eed<   eed<   	 ddededededdf
 fdZdedefd	Zde	fd
Z
 xZS )r	   r   r   r   r   r   Nc                 Z    t         |           || _        || _        || _        || _        y r   r   r   s        r   r   zCrossMapLRN2d.__init__P   r   r   r    c                     t        j                  || j                  | j                  | j                  | j
                        S r"   )_cross_map_lrn2dapplyr   r   r   r   r%   s     r   r&   zCrossMapLRN2d.forwardY   s-      %%eTYY

DIItvvVVr   c                 :     dj                   di | j                  S r(   r+   r.   s    r   r/   zCrossMapLRN2d.extra_repr_   r0   r   )r1   r2   r   )r3   r4   r5   r8   r9   r:   r   r   r&   strr/   r;   r<   s   @r   r	   r	   J   so    
IL
KH NO %49EJ	WV W WSC Sr   r	   c                        e Zd ZU dZg dZeedf   ed<   eed<   e	ed<   	 	 	 	 	 dde
dede	de	d	df
 fd
ZddZded	efdZd	efdZ xZS )r   a  Applies Layer Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Layer Normalization <https://arxiv.org/abs/1607.06450>`__

    .. math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated over the last `D` dimensions, where `D`
    is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
    is ``(3, 5)`` (a 2-dimensional shape), the mean and standard-deviation are computed over
    the last 2 dimensions of the input (i.e. ``input.mean((-2, -1))``).
    :math:`\gamma` and :math:`\beta` are learnable affine transform parameters of
    :attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``.
    The variance is calculated via the biased estimator, equivalent to
    `torch.var(input, correction=0)`.

    .. note::
        Unlike Batch Normalization and Instance Normalization, which applies
        scalar scale and bias for each entire channel/plane with the
        :attr:`affine` option, Layer Normalization applies per-element scale and
        bias with :attr:`elementwise_affine`.

    This layer uses statistics computed from input data in both training and
    evaluation modes.

    Args:
        normalized_shape (int or list or torch.Size): input shape from an expected input
            of size

            .. math::
                [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
                    \times \ldots \times \text{normalized\_shape}[-1]]

            If a single integer is used, it is treated as a singleton list, and this module will
            normalize over the last dimension which is expected to be of that specific size.
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        elementwise_affine: a boolean value that when set to ``True``, this module
            has learnable per-element affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``
        bias: If set to ``False``, the layer will not learn an additive bias (only relevant if
            :attr:`elementwise_affine` is ``True``). Default: ``True``

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
            The values are initialized to 1.
        bias:   the learnable bias of the module of shape
                :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
                The values are initialized to 0.

    Shape:
        - Input: :math:`(N, *)`
        - Output: :math:`(N, *)` (same shape as input)

    Examples::

        >>> # NLP Example
        >>> batch, sentence_length, embedding_dim = 20, 5, 10
        >>> embedding = torch.randn(batch, sentence_length, embedding_dim)
        >>> layer_norm = nn.LayerNorm(embedding_dim)
        >>> # Activate module
        >>> layer_norm(embedding)
        >>>
        >>> # Image Example
        >>> N, C, H, W = 20, 5, 10, 10
        >>> input = torch.randn(N, C, H, W)
        >>> # Normalize over the last three dimensions (i.e. the channel and spatial dimensions)
        >>> # as shown in the image below
        >>> layer_norm = nn.LayerNorm([C, H, W])
        >>> output = layer_norm(input)

    .. image:: ../_static/img/nn/layer_norm.jpg
        :scale: 50 %

    normalized_shapeepselementwise_affine.rF   rG   rH   Nbiasr   c                    ||d}t         |           t        |t        j                        r|f}t        |      | _        || _        || _        | j                  rrt        t        j                  | j                  fi |      | _        |r/t        t        j                  | j                  fi |      | _        n7| j                  dd        n$| j                  dd        | j                  dd        | j                          y )NdevicedtyperI   weight)r   r   
isinstancenumbersIntegraltuplerF   rG   rH   r   torchemptyrN   rI   register_parameterreset_parameters)	r   rF   rG   rH   rI   rL   rM   factory_kwargsr   s	           r   r   zLayerNorm.__init__   s     %+U;&(8(89 02 %&6 7"4""#D11D^DDK %KK 5 5HH	 ''5##Hd3##FD1r   c                     | j                   rLt        j                  | j                         | j                   t        j
                  | j                         y y y r   )rH   r   ones_rN   rI   zeros_r.   s    r   rV   zLayerNorm.reset_parameters   s?    ""JJt{{#yy$DII& % #r   r    c                     t        j                  || j                  | j                  | j                  | j
                        S r   )r#   
layer_normrF   rN   rI   rG   r%   s     r   r&   zLayerNorm.forward   s0    ||4(($++tyy$((
 	
r   c                 Z     dj                   di | j                  d| j                  d uiS )NzW{normalized_shape}, eps={eps}, elementwise_affine={elementwise_affine}, bias={use_bias}use_biasr*   r,   r-   rI   r.   s    r   r/   zLayerNorm.extra_repr   <    % $fV'+}}V?CyyPT?TV	
r   )h㈵>TTNNr   N)r3   r4   r5   r6   r7   rR   r8   r9   r:   bool_shape_tr   rV   r   r&   rC   r/   r;   r<   s   @r   r   r   i   s    KZ FMCHo%	J
 #' "    !	 
   
 B'
V 
 


C 
r   r   c                        e Zd ZU dZg dZeed<   eed<   eed<   eed<   	 	 	 	 ddd	dedededed
eddf fdZ	ddZ
dedefdZdefdZ xZS )r   aR  Applies Group Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__

    .. math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The input channels are separated into :attr:`num_groups` groups, each containing
    ``num_channels / num_groups`` channels. :attr:`num_channels` must be divisible by
    :attr:`num_groups`. The mean and standard-deviation are calculated
    separately over each group. :math:`\gamma` and :math:`\beta` are learnable
    per-channel affine transform parameter vectors of size :attr:`num_channels` if
    :attr:`affine` is ``True``.
    The variance is calculated via the biased estimator, equivalent to
    `torch.var(input, correction=0)`.

    This layer uses statistics computed from input data in both training and
    evaluation modes.

    Args:
        num_groups (int): number of groups to separate the channels into
        num_channels (int): number of channels expected in input
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        affine: a boolean value that when set to ``True``, this module
            has learnable per-channel affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``
        bias: If set to ``False``, the layer will not learn an additive bias (only relevant if
            :attr:`affine` is ``True``). Default: ``True``

    Shape:
        - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
        - Output: :math:`(N, C, *)` (same shape as input)

    Examples::

        >>> input = torch.randn(20, 6, 10, 10)
        >>> # Separate 6 channels into 3 groups
        >>> m = nn.GroupNorm(3, 6)
        >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
        >>> m = nn.GroupNorm(6, 6)
        >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
        >>> m = nn.GroupNorm(1, 6)
        >>> # Activating the module
        >>> output = m(input)
    )
num_groupsnum_channelsrG   affinerf   rg   rG   rh   TN)rI   rI   r   c                   ||d}t         	|           ||z  dk7  rt        d| d| d      || _        || _        || _        || _        | j                  r^t        t        j                  |fi |      | _
        |r%t        t        j                  |fi |      | _        n7| j                  dd        n$| j                  dd        | j                  dd        | j                          y )NrK   r   znum_channels (z#) must be divisible by num_groups ()rI   rN   )r   r   
ValueErrorrf   rg   rG   rh   r   rS   rT   rN   rI   rU   rV   )
r   rf   rg   rG   rh   rL   rM   rI   rW   r   s
            r   r   zGroupNorm.__init__%  s     %+U;*$) .QR\Q]]^_  %(;;#EKK$O$OPDK%ekk,&Q.&QR	''5##Hd3##FD1r   c                     | j                   rLt        j                  | j                         | j                   t        j
                  | j                         y y y r   )rh   r   rY   rN   rI   rZ   r.   s    r   rV   zGroupNorm.reset_parametersG  s=    ;;JJt{{#yy$DII& % r   r    c                     t        j                  || j                  | j                  | j                  | j
                        S r   )r#   
group_normrf   rN   rI   rG   r%   s     r   r&   zGroupNorm.forwardM  s)    ||E4??DKKDHHUUr   c                 Z     dj                   di | j                  d| j                  d uiS )NzI{num_groups}, {num_channels}, eps={eps}, affine={affine}, bias={use_bias}r^   r*   r_   r.   s    r   r/   zGroupNorm.extra_reprP  r`   r   )ra   TNNrb   )r3   r4   r5   r6   r7   r8   r9   r:   rc   r   rV   r   r&   rC   r/   r;   r<   s   @r   r   r      s    -^ DMO	JL            	  
       
  D'VV V V
C 
r   r   c            	            e Zd ZU dZg dZeedf   ed<   edz  ed<   e	ed<   	 	 	 	 dde
dedz  de	ddf fd	Zdd
Zdej                  dej                  fdZdefdZ xZS )r   a  Applies Root Mean Square Layer Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Root Mean Square Layer Normalization <https://arxiv.org/pdf/1910.07467.pdf>`__

    .. math::
        y_i = \frac{x_i}{\mathrm{RMS}(x)} * \gamma_i, \quad
        \text{where} \quad \text{RMS}(x) = \sqrt{\epsilon + \frac{1}{n} \sum_{i=1}^{n} x_i^2}

    The RMS is taken over the last ``D`` dimensions, where ``D``
    is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
    is ``(3, 5)`` (a 2-dimensional shape), the RMS is computed over
    the last 2 dimensions of the input.

    Args:
        normalized_shape (int or list or torch.Size): input shape from an expected input
            of size

            .. math::
                [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
                    \times \ldots \times \text{normalized\_shape}[-1]]

            If a single integer is used, it is treated as a singleton list, and this module will
            normalize over the last dimension which is expected to be of that specific size.
        eps (float, optional): a value added to the denominator for numerical stability.
            If not specified, uses the machine epsilon of the computation (opmath) type:
            fp16/bf16 and fp32 inputs use ``torch.finfo(torch.float32).eps``, while fp64
            inputs use ``torch.finfo(torch.float64).eps``. Default: ``None``
        elementwise_affine: a boolean value that when set to ``True``, this module
            has learnable per-element affine parameters initialized to ones (for weights). Default: ``True``.

    Shape:
        - Input: :math:`(N, *)`
        - Output: :math:`(N, *)` (same shape as input)

    Examples::

        >>> rms_norm = nn.RMSNorm([2, 3])
        >>> input = torch.randn(2, 2, 3)
        >>> rms_norm(input)

    rE   .rF   NrG   rH   r   c                 \   ||d}t         |           t        |t        j                        r|f}t        |      | _        || _        || _        | j                  r/t        t        j                  | j                  fi |      | _        n| j                  dd        | j                          y )NrK   rN   )r   r   rO   rP   rQ   rR   rF   rG   rH   r   rS   rT   rN   rU   rV   )r   rF   rG   rH   rL   rM   rW   r   s          r   r   zRMSNorm.__init__  s     %+U;&(8(89 02 %&6 7"4""#D11D^DDK ##Hd3r   c                 \    | j                   r t        j                  | j                         yy)zS
        Resets parameters based on their initialization used in __init__.
        N)rH   r   rY   rN   r.   s    r   rV   zRMSNorm.reset_parameters  s"     ""JJt{{# #r   xc                 n    t        j                  || j                  | j                  | j                        S r"   )r#   rms_normrF   rN   rG   )r   rs   s     r   r&   zRMSNorm.forward  s'     zz!T22DKKJJr   c                 :     dj                   di | j                  S )r)   zF{normalized_shape}, eps={eps}, elementwise_affine={elementwise_affine}r*   r+   r.   s    r   r/   zRMSNorm.extra_repr  s)    
= 66<fN?C}}N	
r   )NTNNrb   )r3   r4   r5   r6   r7   rR   r8   r9   r:   rc   rd   r   rV   rS   r   r&   rC   r/   r;   r<   s   @r   r   r   W  s    )V FMCHo%	
 !#' "  T\  !	  
 0$K K%,, K
C 
r   r   )rP   rS   r   r   torch.nnr   r#   r   torch.nn.parameterr   
_functionsr	   r@   moduler
   __all__r   r8   listrd   r   r   r   r*   r   r   <module>r}      s       * ( 9  V7S 7StSF S8 c?T!C
 C
Le
 e
P]
f ]
r   