
    9jY              #          d dl mZ d dlZd dlmZ ddlmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZ ddgZ G d de      Zd	d
e de de de d	z   e_        	 	 	 	 	 	 	 d$dee   dee   dee   dee   dedz  dedz  dedz  dededz  dedededededededdf"dZd  Zdee   dee   dee   dee   dedz  dedz  dededededededededdfd!Zdee   dee   dee   dee   dedz  dedz  dededededededededdfd"Zdee   dee   dee   dee   dedz  dedz  deez  dedededededededdfd#Zy)%    )castN)Tensor   )_default_to_fused_or_foreach_device_dtype_check_for_fused_differentiable_doc_foreach_doc_get_scalar_dtype
_get_value_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real
DeviceDict	OptimizerParamsTAdagradadagradc                        e Zd Z	 	 	 	 	 	 ddddddedeez  dededed	ed
edz  dedededz  ddf fdZ fdZddZ	d Z
edd       Z xZS )r   NF)maximizedifferentiablefusedparamslrlr_decayweight_decayinitial_accumulator_valueepsforeachr   r   r   returnc          
      D   t        |t              r|j                         dk7  rt        d      d|k  st        d|       d|k  st        d|       d|k  st        d|       d|k  st        d|       d|k  st        d|       ||||||||	|
d		}t        |   ||       |
r(|	rt        d
      |rt        d      d| _        d| _        | j                  D ]  }|d   D ]  }| j                  |   }|d   r/t        j                  dt        |d         |j                        nt        j                  dt                     |d<   t        j                   |      rt#        ||      n|}t        j$                  ||t        j&                        |d<     y )Nr   zTensor lr must be 1-element        zInvalid learning rate: zInvalid lr_decay value: zInvalid weight_decay value: z)Invalid initial_accumulator_value value: zInvalid epsilon value: )	r   r   r   r   r   r    r   r   r   z)`fused` does not support `differentiable`z0`fused` and `foreach` cannot be `True` together.Tr   r    is_fuseddtypedevicer(   stepmemory_formatsum)
isinstancer   numel
ValueErrorsuper__init__RuntimeError"_need_device_dtype_check_for_fused_step_supports_amp_scalingparam_groupsstatetorchzerosr
   r)   tensor
is_complexcomplex	full_likepreserve_format)selfr   r   r   r   r   r   r    r   r   r   defaultsgrouppr8   
init_value	__class__s                   S/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/optim/adagrad.pyr3   zAdagrad.__init__   s    b&!bhhjAo:;;by6rd;<<h7zBCCl";L>JKK//;<U;VW  cz6se<==  ()B ,

 	*"#NOO"#UVV6:D3.2D+&& 	E8_ 

1 W~ KK/wH xx c1B1DE f ''* 57PQ2 
  %z1F1F e!	    c                 H   t         
|   |       d }| j                  D ]  }|j                  dd        |j                  dd       |j                  dd       |j                  dd       }|d   D ]  }| j                  j                  |g       }t        |      dk7  s.t        j                  |d         rGt        |d         }|d   r,t        j                  |t        |	      |j                  
      nt        j                  |t                     |d<     t        | j                  j                               }t        |      dk7  xr t        j                  |d   d         }|s8|D ]2  }	t        j                  t        |	d         t        |	            |	d<   4 y y )Nr    r   Fr   r   r   r   r+   r%   r'   r*   )r2   __setstate__r7   
setdefaultr8   getlenr9   	is_tensorfloatr;   r
   r)   listvalues)r@   r8   r   rB   rC   p_statestep_valstate_valuesstep_is_tensorsrE   s             rF   rI   zAdagrad.__setstate__d   s   U# && 	EY-Z/-u5$$Wd3E8_ **..B/w<1$U__WV_-M$WV_5H !> $"3U"C#$88 #\\(:K:MN FO		( DJJ--/0l+q0 
eooOF#7
 ! !LL!F)$,=u,M&	 rG   c                 ~    | j                   D ].  }|d   D ]$  }| j                  |   }|d   j                          & 0 y)z6Calls tensor.share_memory_() on the state sum tensors.r   r.   N)r7   r8   share_memory_)r@   rB   rC   r8   s       rF   share_memoryzAdagrad.share_memory   sG    && 	-E8_ -

1e**,-	-rG   c                 T   d\  }}|d   D ]  }|j                   |d   rt        | dd      rt        |       d| _        ||j                   j                  z  }|t        j                  |      z  }|j                  |       |j                  |j                          | j                  |   }	t        |	      dk(  r|d   rt        |       |d   r/t        j                  dt        |d   	      |j                  
      nt        j                  dt                     |	d<   | j                  d   }
t        j                  |      rt        |
|
      n|
}t        j                   ||t
        j"                        |	d<   |j                  |	d          |j                  |	d           ||fS )N)FFr   r   r5   TFr   r$   r%   r'   r#   r*   r+   r   r,   r.   )gradgetattrr   r5   	is_sparser9   r<   appendr8   rL   r:   r
   r)   r;   rA   r=   r>   r?   )r@   rB   params_with_gradgrads
state_sumsstate_stepshas_sparse_gradhas_complexrC   r8   r   rD   s               rF   _init_groupzAdagrad._init_group   s   '3$x (	2Avv!>g8'
 2!4>CD;166#3#33u//22 ''*QVV$

1u:?W~5a8 !> "3U7^"L#$88 #\\#5F5HI &M 1531-
 !++A.   9;TU6 
 $)??:U5J5J$E%L !!%,/""5=1Q(	2T ++rG   c                 b   d}|$t        j                         5   |       }ddd       | j                  D ]k  }g }g }g }g }| j                  |||||      \  }}	t	        |||||d   |d   |d   |d   ||d   |d   |d   |	|d	   t        | d
d      t        | dd             m |S # 1 sw Y   xY w)zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r    r   r   r   
grad_scale	found_inf)r   r   r   r   rb   r    r   r   rc   r   rf   rg   )r9   enable_gradr7   rd   r   r[   )
r@   closurelossrB   r^   r_   r`   ra   rb   rc   s
             rF   r+   zAdagrad.step   s     ""$ !y! && 	E-/"$E')J(*K+/+;+;'
K,(O[  ;">2z*%L /i(z*$%56'Gn"4t<!$T:!	: A! !s   B%%B.)g{Gz?r   r   r   g|=N)r!   NN)__name__
__module____qualname__r   rN   r   boolr3   rI   rX   rd   r   r+   __classcell__)rE   s   @rF   r   r      s     "+,#E $!EE FNE 	E
 E $)E E E E E d{E 
EN!F-,,\ "* "*rG   a[  Implements Adagrad algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta)
                \text{ (objective)}, \: \lambda \text{ (weight decay)},                          \\
            &\hspace{12mm}    \tau \text{ (initial accumulator value)}, \: \eta\text{ (lr decay)}\\
            &\textbf{initialize} :  state\_sum_0 \leftarrow \tau                          \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \tilde{\gamma}    \leftarrow \gamma / (1 +(t-1) \eta)                  \\
            &\hspace{5mm} \textbf{if} \: \lambda \neq 0                                          \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda \theta_{t-1}                             \\
            &\hspace{5mm}state\_sum_t  \leftarrow  state\_sum_{t-1} + g^2_t                      \\
            &\hspace{5mm}\theta_t \leftarrow
                \theta_{t-1}- \tilde{\gamma} \frac{g_t}{\sqrt{state\_sum_t}+\epsilon}            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `Adaptive Subgradient Methods for Online Learning
    and Stochastic Optimization`_.
    z
    Args:
        a  
        lr (float, Tensor, optional): learning rate (default: 1e-2)
        lr_decay (float, optional): learning rate decay (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        initial_accumulator_value (float, optional): initial value of the
            sum of squares of gradients (default: 0)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-10)
        z	
        a  
        fused (bool, optional): whether the fused implementation (CPU and CUDA only) is used.
            Currently, `torch.float64`, `torch.float32`, `torch.float16`, and `torch.bfloat16`
            are supported. (default: None). Please note that the fused implementations does not
            support sparse or complex gradients.
    .. _Adaptive Subgradient Methods for Online Learning and Stochastic
        Optimization: http://jmlr.org/papers/v12/duchi11a.html

    r   r_   r`   ra   r   rf   rg   rb   r    r   rc   r   r   r   r   r   r!   c                   t        d |D              st        d      ||t        | |	d      \  }}|d}|d}|r)t        j                  j                         rt        d      |r)t        j                  j                         rt        d      |r%t        j                  j                         st        }n-|r%t        j                  j                         st        }nt        } || ||||||||||	|
||       y)	ztFunctional API that performs Adagrad algorithm computation.

    See :class:`~torch.optim.Adagrad` for details.
    c              3   P   K   | ]  }t        |t        j                           y wrk   )r/   r9   r   ).0ts     rF   	<genexpr>zadagrad.<locals>.<genexpr>6  s     @qz!U\\*@s   $&zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizersz4torch.jit.script not supported with fused optimizers
r   r   r   r   rb   r   r   rc   rf   rg   )	allr4   r   r9   jitis_scripting_fused_adagrad_multi_tensor_adagrad_single_tensor_adagrad)r   r_   r`   ra   r   rf   rg   rb   r    r   rc   r   r   r   r   r   _funcs                     rF   r   r     s    2 @K@@^
 	
 }1Ne

7 }599))+STT'')QRRUYY++-	//1$%!'%rG   c                 P    | j                         }t        j                  |||      S rk   )sizer9   sparse_coo_tensor)rZ   grad_indicesrP   r   s       rF   _make_sparser   g  s"    99;D""<>>rG   c          
         ||t        d      t        j                  j                         st	        |      }t        | |||d      D ]  \  }}}}|dz  }t        |      }|s|n| }|dk7  r*|j                  rt        d      |j                  ||      }|d|dz
  |z  z   z  }|j                  r|j                         }|j                         }|j                         }|j                  t        |||j                  d                   |j!                  |      }|j                         j#                         j                  |	      }|j                  t        ||||z        |        &t        j$                  |      }|r?t        j&                  |      }t        j&                  |      }t        j&                  |      }|j)                  ||d	       |r|j+                         |	z   }n|j+                         j                  |	      }|j-                  ||| 	       |st        j.                  |      }t        j.                  |      }
 y )
N,Expected grad_scale and found_inf to be NoneT)strictr   r   z;weight_decay option is not compatible with sparse gradientsalpha   value)AssertionErrorr9   ry   rz   r   zipr   r\   r4   addcoalesce_indices_valuesadd_r   powsparse_masksqrt_r<   view_as_realaddcmul_sqrtaddcdiv_view_as_complex)r   r_   r`   ra   rf   rg   r   r   r   r   rb   r   r   rc   paramrZ   	state_sumstep_tr+   clrr   grad_valuesstd
std_valuesr<   s                            rF   r}   r}   l  s!   " !6KLL99!!#^*-z;t+ *=&tY 	!&!#t$1~~"Q  88E86DAX--.>>==?D==?L,,.KNN<lKOOA<NOP''-C,,.33C8JJJT<z1IJSVRV   ))%0J))$/!..y9	**51tT3nn&,nn&++C0NN4SDN1--e4!11)<	U*=rG   c                   |rt        d      ||t        d      t        |       dk(  ry t        |      }t        j                  | |||g      }|j                         D ]  \  \  }}}}}t        t        t           |      }t        t        t           |      }t        t        t           |      }t        t        t           |      }|
xr t        d |D              }|rt        ||||||||	d|||||       |rt        |||       |rt        j                  |      }t        j                  j                         s=|d   j                   r.t        j"                  |t        j$                  dd	      d
       nt        j"                  |d       |dk7  r3|rt        j"                  |||
       nt        j&                  |||
      }|D cg c]  }| dt)        |      dz
  |z  z   z   }}t        j*                  |||d       t        j,                  |      }t        j"                  ||	       |dk7  s|rt        j.                  ||       |}nt        j0                  ||      }t        j2                  |||       ! y c c}w )Nz#_foreach ops don't support autogradr   r   c              3   4   K   | ]  }|j                     y wrk   )r\   )rs   rZ   s     rF   ru   z(_multi_tensor_adagrad.<locals>.<genexpr>  s      9
#DNN9
s   Trw   g      ?cpu)r)   r   r   r   )r   rL   r   r   "_group_tensors_by_device_and_dtyperP   r   rO   r   anyr}   r   r9   _foreach_negcompileris_compilingis_cpu_foreach_add_r;   _foreach_addr   _foreach_addcmul__foreach_sqrt_foreach_mul__foreach_mul_foreach_addcdiv_)r   r_   r`   ra   rf   rg   r   r   r   r   rb   r   r   rc   grouped_tensorlistsdevice_params_device_grads_device_state_sums_device_state_steps_r~   device_paramsdevice_gradsdevice_state_sumsdevice_state_stepsdevice_has_sparse_gradr+   	minus_clrr   	numerators                                rF   r|   r|     s   " BCC!6KLL 6{a	BB#FF	
K0  &&(M? 		 	T&\>:DL-8 f/AB!$v,0CD!0 "
S 9
'39
 6
 ""!")! $!-'%#   -7HI --l;L ~~**,1CA1F1M1M"ELLU$C3  2A61##L-|T$11 -| 
 GY
>BRC1
4(1,889
	 
 	 1<UVW!!"34C%1i8$I**<CIy#>[M?z
s   Jc                   | sy |
s|rt        d      |rt        d      ||j                  |ini }||j                  |ini }t        |t              r&t	        |j                        dk7  r|j                  |ind }t        j                  | |||g      }|j                         D ]:  \  \  }}\  \  }}}}}t        t        t           |      }t        t        t           |      }t        t        t           |      }t        t        t           |      }d\  }}|#|j                  ||j                  |d            }|#|j                  ||j                  |d            }|||vr|j                  |d      ||<   ||   }t        j                  |d       t        j                  ||||||||	|||	       |t        j                  ||gt!        |      z         = y )
Nz5`fused` does not support sparse grad or complex paramz<adagrad with fused=True does not support differentiable=Truer   )NNT)non_blocking)r)   r   r   )r   r   r   r   r   rf   rg   )r4   r)   r/   r   strr   r   itemsr   rO   rJ   tor9   r   _fused_adagrad__foreach_sub_rL   )r   r_   r`   ra   rf   rg   r   r   r   r   rb   r   r   rc   grad_scale_dictfound_inf_dictlr_dictgrouped_tensorsr)   r~   r   r   r   r   r   r   r   r   device_grad_scaledevice_found_infs                                 rF   r{   r{     s*   " +RSSJ
 	

 ,6+A		J'r  *3)>		9%B  &b&1c"))n6MBSW   BB	
K0O 
			 + 	 	
	T&\>:DL-8 f/AB!$v,0CD.8++! / : :
f4@!  -88	V$?  6#8 ee6eEGFOB.2%(&	
 '"%5$6=O9P$PS+rG   )NNNFNFF)typingr   r9   r   	optimizerr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   __all__r   __doc__rO   ro   rN   r   r   r}   r|   r{   r$   rG   rF   <module>r      s        $ i
 Ki K^4		 	 
 		 		 5. p  $# " GLG<G VG f	G
 $;G G }G G D[G G G 	G  !G" #G$ 
%G& 'G( 
)GT?
A=LA=<A= VA= f	A=
 A= }A= 	A= A= A= 
A= A= A= A= A=  
!A=Hl?Ll?<l? Vl? f	l?
 l? }l? 	l? l? l? 
l? l? l? l? l?  
!l?^SLS<S VS f	S
 S }S 	S S S 
S S S S S  
!SrG   