
    9j             !       Py   U d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dl mZmZ d dlmZmZ d dlmZmZ d dlZd dlZd dlmZ d dlmZ d dlmc mZ d dlmZm Z m!Z! d d	l"m#Z# d d
l$m%Z% d dlm&Z&m'Z'm(Z(m)Z)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z2 d dl3m4Z4 ejj                  jl                  Z6g Z7e8e9   e:d<   ejv                  jx                  jz                  Z= G d de      Z>	 	 ddedej~                  de@de@fdZA eeAej~                  j                  d      ZC eeAej~                  j                        ZD eeAej~                  j                  d      ZE eeAej~                  j                        ZGde!deHde!fdZI e#e=j                         e/d       eDd!e!d"e!fd#                     ZJ e#e=j                         e/d       eDd!e!d"e!fd$                     ZK e#e=j                         e/d       eDd!e!de!d%eMd&eMfd'                     ZL e#e=j                         e/d       eDd(e!d)eMd*eMd+eMd,e@d-e!fd.                     ZN e#e=j                  j                  g      d/        ZQ e#e=j                  jB                  g      d0e!fd1       ZR e#e=j                         e/       eDd2e!de!fd3                     ZS e#e=j                         e/d       eDd(e!d2e!fd4                     ZT e#e=j                         e/d       d(e!d2e!d5eMd6eMfd7              ZU e#e=j                         e/       eDd2e!de!fd8                     ZV e#e=j                         e/       eDd(e!d2e!de!fd9                     ZW e#e=j                         e/d       d(e!d2e!d&eMfd:              ZX e#e=j                         e/d       eDd(e!d2e!d;eMd<e@fd=                     ZY e#e=j                         e/d       eDdd>e!d2e!d?e9fd@                     ZZ e#e=j                        eDd(e!dAe!fdB              Z[ e#e=j                         e/       eDd2e!de!fdC                     Z\ e#e=j                         e/d       eDd(e!d2e!de!fdD                     Z] e#e=j                        d2e!dEe!de!fdF       Z^ e#e=j                        d(e!d2e!dEe!de`e!e!f   fdG       Z_ e#e=j                         e/       eDd(e!d2e!dHe!dIeMdJeMdKe@d<e@de!fdL                     Za e#e=j                         e/d       eDd(e!d2e!dMe!de!fdN                     Zb e#e=j                         e/       d2e!dOe!de!fdP              ZcdQe!dReHfdSZddTej                  fdUZf e#e=j                         e/       eDe>j                  j                  fd2e!dVe!dReHde!fdW                     Zg e#e=j                         e/d       eDd(e!dAe!dVe!dReHfdX                     Zj e#e=j                        ddY       Zl e#e=j                         e/       eDe>j                  j                  dZfd2e!dVe!dReHd%eMfd[                     Zm e#e=j                  j                        eDd(e!d2e!dVe!dReHd%eMf
d\              Zn e#e=j                  j                        eDd(e!d2e!dVe!dReHd%eMd e!fd]              Zq e#e=j                  j                        eDd(e!d2e!dVe!dReHd^eMf
d_              Zr e#e=j                  j                        eDd(e!d2e!dVe!dReHd^eMd e!fd`              Ztd(e!d2e!dVe!dEe!dz  dReHdaeHdbe!de!fdcZu e#e=j                         e/d       eDd(e!d2e!deHde!fdd                     Zv e#e=j                         e/d       d(e!d2e!dVe!dEe!dz  dReHdaeHdbe!de!fde              Zw e#e=j                         e/d       d(e!d2e!dVe!dEe!dz  dReHdaeHdbe!de!fdf              Zx e#e=j                         e/       eDde>j                  j                  fd2e!dVe!dEe!dz  dReHde!f
dg                     Zy e#e=j                         e/d       eDde>j                  j                  fd(e!d2e!dVe!dEe!dz  dReHde!fdh                     Zz e#e=j                         e/       eDe>j                  j                  fdAe!dVe!dReHde!fdi                     Z{ e#e=j                         e/d       eDe>j                  j                  fd(e!d2e!dVe!dReHde!f
dj                     Z| e#e=j                         e/       ddAe!dOe!dkeMfdl              Z} e#e=j                         e/       dme!dne!de!fdo              Z~ e#e=j                         e/       d(e!dpe8eH   deHdqeHdreHdseHfdt              Z e#e=j                   jB                        	 	 	 	 dd2e!deHdqeHdz  dreHdz  dseHf
dv       Zde!deHdqeHdz  dreHdz  de`eHeHf   f
dwZ e#e=j                         e/       	 	 	 	 ddAe!dxe!deHdqeHdz  dreHdz  dseHfdy              Z e#e=j                         e/       d(e!dpe8eH   deHdzeHfd{              Z e#e=j
                         e/       d(e!dpe8eH   d|eHd}eHd~eHf
d              Zd(e!d e!dej                  fdZ e#e=j                         e/d       eCd(e!de!deHdej                  fd                     Z e#e=j                         e/       eCd(e!de!deHdej                  fd                     Zd Z e#e=j                         e/       dAe!de8eH   de8eH   de8eH   de8eH   de!fd              Z e#e=j                         e/       eDdAe!de8eH   de8eH   de8eH   de8eH   de8eH   de!fd                     Z e#e=j                         e/       d(e!de!d*eMfd              Z e#e=j                         e/       d>e!de8eH   deHdeHdseHde!fd              Z e#e=j                  j                        eD	 dd(e!d2e!deMdz  de!fd              Z e#e=j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        dAe!dkeMde@dz  fd                     Z e#e=j&                         e/dd      dAe!dkeMde@dz  fd              Z e#e=j(                         e/       de!deHde@fd              Z e#e=j*                         e/d      de!deHde@fd              Z e#e=j,                         e/       	 	 	 ddEe!de!deHde@de@de!fd              Z e#e=j.                         e/       d(e!de!deHdeHde@f
d              Zde8eH   fdZde8e!   deHdeHde8e!   fdZde8e!   fdZde8e!   deHfdZde8e!   deHdeHfdZ e#e=j:                  j                  e=j:                  j                  g      	 dde8e!   deHdeHde!dz  de!f
d       Z e#e=j<                  j                  e=j<                  j                  g      	 	 dd2e!de8eH   deHde8e!   dz  de8e!   dz  f
d       Z e#e=j>                  jB                        ddAe!deHdeHde`e!df   fd       Z e#e=j@                  j                        	 ddAe!de8eH   deHde`e!df   fd       Z e#e=jB                  jB                        dd2e!deHdeHde`e!df   fd       Ze=jD                  jF                  j!                  e6j"                        	 dd2e!de!deHde`e!df   fd       Z e#e=jJ                         e/d      eDdd2e!de!de!d%eHd)eHf
d                     Z e#e=jL                         e/       eD	 	 	 dd2e!de!de!d%eHd)eHde@fd                     Z e#e=jN                         e/d      eDdd2e!de!de!d%eHd)eHf
d                     Z e#e=jP                  j                        eDd(e!dAe!de!de!de!dz  deHdeHdeHdeHde8e@   de`e!dz  e!dz  e!dz  f   fdƄ              Z e#e=jP                  j                        d(e!dAe!de!de!de!dz  deHdeHdeHdeHde8e@   dejB                  dejB                  dejB                  de`e!dz  e!dz  e!dz  f   fdȄ       Zde!dz  de!dz  fdɄZ e#e=jV                  j                        de!dAe!de8eH   de!de!dEe!dz  de!dz  de8e@   de`e!dz  e!dz  e!dz  f   fd̈́       Z e#e=jV                  j                        de!dAe!de8eH   de!de!dEe!dz  de!dz  de8e@   dejB                  dejB                  dejB                  de`e!dz  e!dz  e!dz  f   fd΄       Z e#e=jZ                  j                        dAe!de8eH   dEe!dz  deMdz  de`e!e!f   f
dτ       Z e#e=j\                  j                        de!dAe!de8eH   de!dEe!dz  de8e@   de`e!dz  e!dz  f   fdЄ       ZdAe!dEe!dz  de!dz  de!dz  de!dz  dKe@deMdeMde@de`e!e!e!e!dz  e!dz  f   fdՄZ e#e=j`                         e/ddd׫      dAe!dEe!dz  de!dz  de!dz  de!dz  dKe@deMdeMde`e!e!e!f   fd؄              Ze=j`                  j                  j!                  e6j$                        e=j`                  j                  j!                  e6j"                        dAe!dEe!dz  de!dz  de!dz  de!dz  dKe@deMdeMde`e!e!e!f   fdل              Ze=jd                  j                  j!                  e6j"                        dde8e!   fdڄ       Z e#e=jh                  j                        dAe!dEe!dz  de!dz  de!de!deMdeMde`e!e!e!f   fdۄ       Z e#e=jj                  j                        dAe!dEe!dz  de!dz  de!de!dKe@deMdeMde`e!e!e!f   fd܄       Z e#e=jj                  jl                        dAe!dEe!dz  de!dz  dKe@deMdeMde`e!e!e!f   fd݄       Z e#e=jp                  j                        dAe!dEe!dz  de!dz  de!de!dKe@deMdeMde`e!e!e!e!e!f   fdބ       ZdAe!dEe!dz  de!dz  de!de!deMdKe@de!fd߄Z e#e=jt                  j                        dAe!dEe!dz  de!dz  de!de!deMdeMde`e!e!e!e!f   fd       Z e#e=jv                  j                        dAe!dEe!dz  de!dz  de!de!deMdeMde`e!e!e!e!e!e!f   fd       Z e#e=jx                  j                        dAe!dEe!dz  de!dz  de!de!deMdeMde`e!e!e!e!f   fd       Z e#e=jz                         e/dd      eDdd                     Z e#e=j~                         e/       dddddddde!e'z  dTej                  dz  dej                  dz  de@de@dej                  dz  fd              Z e#e=j                  e=j                  e=j                  g       e/       d               Ze=j                  j                  j!                  e6j$                         e#e=j                         e/dddd      dAe!dEe!de!dz  de!dz  de!dz  dKe@deMdeMfd                     Zd Z e#e=j                  j                        de!dAe!dEe!dz  de!dz  de!dz  de!dz  de!dz  de@deMde8e@   de!de`e!e!dz  e!dz  f   fd       Z e#e=j                  j                        de!dAe!dEe!dz  de!dz  de!dz  de!dz  de!dz  de@deMde8e@   de`e!e!dz  e!dz  f   fd       Z e#e=j                  j                        de!dAe!dEe!dz  de!dz  de!dz  de!dz  de!dz  de@deMde8e@   dejB                  dejB                  dejB                  de`e!e!dz  e!dz  f   fd       Z e#e=j                         e/dddǫ      dAe!d(e!dEe!de!dz  de!dz  de!dz  de!dz  deMfd              Z e#e=j                         e/dddǫ      dAe!d(e!dEe!de!dz  de!dz  de!dz  de!dz  deMde!fd              Z e#e=j                         e/       eDdAe!de`eHeHf   fd                     Zd2e)de)de8eH   deHfdZ e#e=j                         e/       d2e)de)de8eH   fd              Z e#e=j                         e/       dAe)de)de8eH   de8eH   de8eH   f
d              Z e#e=j                        dudde)deHdze)de)d)e'f
d       Z e#e=j                         e/       dudde)deHdze)de)d)e'f
d              Zdudde)deHdze)de)d e@d)e'fdZ e#e=j                  j                        e=j                  j                  j!                  e6j"                        dd              Z e#e=j                        de)deHdze)de)fd       Z e#e=j                         e/       de)deHdze)de)fd              Zde)deHdze)de)d e@f
dZ e#e=j                         e/ddM      eDd2e!de`e!e!f   fd                     Z e#e=j                         e/       	 	 	 dde!de@eHz  eMz  de@eHz  eMz  d	ej                  dz  fd
              Z e#e=j                        dd       Zܐd Zݐd Z e#e=j                  j                         e#e=j                  j                         e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        dAe!de8eH   dz  de8eM   dz  de!fd                                                               Z e#e=j                  j                         e#e=j                  j                         e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        dAe!de8eH   dz  de8eM   dz  de!fd                                                               ZddZ e#e=j                  j                  e=j                  j                  g      e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                         e/dd      	 ddAe!de8eH   deMdz  de!fd                            Z e#e=j                  j                  e=j                  j                  g      e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                         e/dd      	 ddAe!de8eH   deMdz  de!fd                            Z e#e=j                  j                  e=j                  j                  g      e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                         e/dd      	 	 ddAe!de8eH   deMdz  deMdz  de!f
d                            Z e#e=j                  j                  e=j                  j                  g      e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                         e/dd      	 	 ddAe!de8eH   deMdz  deMdz  de!f
d                            Z e#e=j                  j                  e=j                  j                  g      e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                         e/dd      	 	 	 ddAe!de8eH   deMdz  deMdz  deMdz  de!fd                            Z e#e=j                  j                  e=j                  j                  g      e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                         e/dd      	 	 	 ddAe!de8eH   deMdz  deMdz  deMdz  de!fd                            ZeD	 ddAe!de8eH   de8eMdz     de@de!f
d       Zd Zd  Zd! Zd" Z	 dd#Zd$ Zd% Zdd&Zdd'Zd( Z e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        d)                      Z e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        d*                      Z e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        d+                      Z e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        d,                      Zd- Zdd.Zdd/Zd0 Z  e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        d1                      Z e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        d2                      Zd3 Zd4 Z e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        d5                      Z e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        d6                      Z e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        d7                      Z
 e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        d8                      Z e#e=j                  j                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        d9                      Z e#e=j                  j                         e#e=j                   j                        e=j"                  j                  j!                  e6j"                        e=j"                  j                  j!                  e6j$                        e=j                  j                  j!                  e6j"                        e=j                  j                  j!                  e6j$                        e=j                   j                  j!                  e6j"                        e=j                   j                  j!                  e6j$                        d:                                                         Z e#e=j"                  j                  e=j"                  j                  g       e/       	 ddAe!de8eH   d;e@deMdz  de!f
d<              Z e#e=j                  j                  e=j                  j                  g      e=j                  j                  j!                  e6j$                         e/       	 	 ddAe!de8eH   d;e@deMdz  deMdz  de!fd=                     Z e#e=j                   j                  e=j                   j                  g       e/       	 	 	 ddAe!de8eH   d;e@deMdz  deMdz  deMdz  de!fd>              Zdd?Zd@ Zdxee!   dAee!   dBe!de!fdCZdAe*de!fdDZeDdAe!de8eH   d;e@de8eMdz     de!f
dE       Z e#e=j0                  j                        dFe!dGe!de@fdH       Z e#e=j2                  e=j4                  g       e/       dI               Z e#e=j6                  g      dJ        Z e#e=j8                  g      ddK       Z e#e=j:                  g      dL        Z e#e=j<                  g      dM        Zd2e!dVe!dEe!dz  dReHdaeHde`e!e!f   fdNZ e#e=j@                         e/ddb      d2e!dVe!dEe!dz  dReHdaeHde`e!e!f   fdO              Z  e#e=jB                         e/ddb      d2e!dVe!dEe!dz  dReHdaeHde`e!e!f   fdP              Z!de!dQeMde!fdRZ"de!dQeMde!fdSZ#dTe!de*fdUZ$dVe*dWe!de!fdXZ%dWee!   de!fdYZ&dZeHd;e@dTej                  dej                  fd[Z'd\e!d]eHd^eHd;e@fd_Z(d\e!d`eHd]eHd^eHd;e@f
daZ)d\e!de8eH   d;e@fdbZ*d\e!de8eH   d;e@fdcZ+ e#e=jX                         e/       eDd\e!de8eH   d;e@fdd                     Z,	 	 	 	 ddFe!dee!dfeHdgeHd;e@dhe@de!fdiZ- e#e=j\                         e/       eD	 	 	 ddFe!dee!dfeHdgeHd;e@de!fdj                     Z. e#e=j^                         e/d      eDdk                      Z/ e#e=j`                         e/       dde>j                  j                  fdl              Z0dmejB                  dnejB                  doe@de@fdpZ1e=jd                  j                  j!                  e6j"                        e=jd                  j                  j!                  e6j"                         e/dq      ddrds                     Z2 e#e=jf                  j                  e=jf                  j                  g      e=jf                  j                  j!                  e6j$                         e/       eD	 	 ddAe!de`eHeHf   d;e@dteMdz  dueMdz  de!fdv                            Z4 e#e=jf                  j                        e=jf                  j                  j!                  e6j"                        e=jf                  j                  j!                  e6j$                         e/       eD	 ddFe!de`eHeHf   dz  d;e@de`eMeMf   dz  de!f
dw                                   Z5 e#e=jl                         e#e=jn                         e#e=jp                        eD e/       dFe!de`eHdf   de!fdx                                   Z9 e#e=jt                         e#e=jv                         e#e=jx                        eD e/       dFe!de`eHdf   de!fdy                                   Z=dFe!de`eHdf   dzeeHeHeHge!f   de!fd{Z> e#e=j~                         e#e=j                         e#e=j                         e/d       d|                             ZB e#e=j                         e/d}d~      dddd              ZC e#e=j                         e/       dddd              ZD e#e=j                  j                  e=j                  j                  g       e/       dej                  ddddre'dTej                  dz  dej                  dej                  dz  de@f
d              ZH e#e=j                  j                  g      dej                  ddddqe'dre'dTej                  dz  dej                  dej                  dz  de@fd       ZJ e#e%      d        ZK e#e=j                        e=j                  j                  j!                  e6j$                         e/       dudude>j                  j                  fdAe!dVe!dke'de'dEe!dz  dReHde!fd                     ZL e#e=j                        e=j                  j                  j!                  e6j$                         e/dd      dAe!dVe!dReHde`e!e!f   fd                     ZM e#e=j                  j                        	 	 ddddde!de!d0e!deMde@de!dz  d*eMdz  de`e!e!f   fd       ZOd ZP e#e=j                  g       e/d      eDdd                     ZQ e#e=j                         e/       d               ZR e#e=j                        d        ZS e#e=j                  j                  e=j                  j                  g      dddd2e!dTej                  dz  de!dz  de!fd       ZU e#e=j                  j                  e=j                  j                  g      dd2e!deHdz  fd       ZX e#ejx                  jz                  j                        dd       ZY e#e=j                         e/       dddd              ZZ e#e=j                  j                        ddd2ejB                  d	ej                  dz  dejB                  fd       Z[dddZ\ddddZ] e#e=j                         e/       d               Z^ e#e=j                        dd       Z_ e#e=j                        d(e!d2e!de@de!fd       Z` e#e=j                  j                  e=j                  j                  g       e/       ddddddeHdTej                  dz  dej                  dz  dej                  dz  de@dz  de!fd              Za e#e=j                  j                  e=j                  j                  g       e/       	 dddddddeHde@dTej                  dz  dej                  dz  dej                  dz  de@dz  de!fd              Zd ePe=j                  e=j                          ePe=j                  e=jJ                          ePe=j                  e=jN                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                   e=j                          ePe=j                  e=j                          ePe=j                  e=j
                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                  e=j                          ePe=j                   e=j                         e=j"                  j                  j!                  e6j"                        dd2e!deHde!fd       Zy(      N)CallableIterable)nullcontext)Enum)partialreduce)chainproduct)Anycast)	sym_floatsym_intTensorregister_decomposition)	out_dtype)IntLike
NumberTypesuggest_memory_format
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)_pytree)tree_map__all__c                       e Zd ZdZdZdZy)	Reductionr         N)__name__
__module____qualname__NONEMEANSUM     \/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/_decomp/decompositions.pyr    r    1   s    DD
Cr*   r    Fftype_promotioncompute_dtype_onlyinclude_non_tensor_argsc                 N     t        j                          fd       }|S )Nc                  \  	 r t         t        j                  j                  fnt         f}t	        j
                  | i |D cg c]  }t        ||      r| }}t        j                  |di\  	fd}	fd} t        ||       i t        ||      }
r|S t        ||      S c c}w )Ntype_promotion_kindc                 J    t        | t              r| j                        S | S N
isinstancer   to)xcomputation_dtypes    r+   increase_precz0type_casts.<locals>.inner.<locals>.increase_precO   s"    !V$tt-..r*   c                 J    t        | t              r| j                        S | S r4   r5   )r8   result_dtypes    r+   decrease_precz0type_casts.<locals>.inner.<locals>.decrease_precU   s!    !V$ttL))r*   )
r   torchtypes_Numberpytreearg_tree_leavesr6   utilselementwise_dtypesr   )argskwargsallowed_typesr8   	flat_argsr:   r=   rr9   r<   r.   r,   r/   r-   s           @@r+   innerztype_casts.<locals>.inner@   s     .EVU[[(()6) 	
 ++T<V<
!]+ 
	 

 +0*B*B+
,:+
'<
		 xt,P0OPHM1--5
s   B))	functoolswraps)r,   r-   r.   r/   rJ   s   ```` r+   
type_castsrM   :   s(     __Q. .@ Lr*   T)r-   r.   )r-   )r-   r/   r8   dimreturnc                 j    t        || j                         z
        D ]  }| j                  d      }  | S N)rangerN   	unsqueeze)r8   rN   _s      r+   _unsqueeze_to_dimrV   w   s2    3=! KKOHr*   
grad_inputout_gradyc                 4    | d||z  z
  j                         z  S Nr!   conj_physicalrX   rY   s     r+   tanh_backwardr_   }   s      q1q5y//111r*   c                 4    | |d|z
  z  j                         z  S r[   r\   r^   s     r+   sigmoid_backwardra      s      qAE{11333r*   beta	thresholdc                 t    ||z  j                         }t        j                  ||z  |kD  | | |z  |dz   z        S N      ?)expr>   where)rX   r8   rb   rc   zs        r+   softplus_backwardrj      s=     
TA;;DI-xAS9QRRr*   grad_outputalphascaleinput_scale	is_resultself_or_resultc                     ||z  }|}|}|r&t        j                  |dk  | |z  ||z   z  | |z        S t        j                  |dk  | |z  |z  t        j                  ||z        z  | |z        S Nr   )r>   rh   rg   )	rk   rl   rm   rn   ro   rp   negcoefposcoef
negiptcoefs	            r+   elu_backwardrv      s     emGGJ{{a*$(@A'!
 	
 {{a*$w.>J;V1WW'!
 	
r*   c                 .    t        j                  | |      S r4   )r>   	full_likeselfvalues     r+   fill_scalarr|      s    ??4''r*   r{   c                     t        j                  j                         dk(  fd       t        j	                  |       S )Nr   c                  ,    d j                          dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrN   )r{   s   r+   <lambda>zfill_tensor.<locals>.<lambda>   s    RSXS\S\S^R__jk r*   )r>   _checkrN   atencopyry   s    `r+   fill_tensorr      s3    	LL		qk 99T5!!r*   rz   c                 f    t        j                  t        j                  | dz   d      d      dz  S N   r   min   maxr>   clamprz   s    r+   hardsigmoidr      s)     ;;u{{4!83;a??r*   c                 H    t        j                  |dkD  |dk  z  | dz  d      S )Ng      g      @gUUUUUU?        r>   rh   rk   rz   s     r+   hardsigmoid_backwardr      s0     ;;	$y! r*   min_valmax_valc                 B    t        j                  ||k  ||k\  z  d|       S )Nr   r   )rk   rz   r   r   s       r+   hardtanh_backwardr      s$    
 ;;DGO<c;OOr*   c                 l    | t        j                  t        j                  | dz   d      d      z  dz  S r   r   r   s    r+   	hardswishr      s.     %++ekk$(:BBQFFr*   c           
      x    t        j                  |dk  dt        j                  |dk  | |dz  dz   z  |             S )Nr   r         ?r   r   s     r+   hardswish_backwardr      sA     ;;
D1HkdQh#-=>L r*   c                 6    t        j                  ||k  d|       S rr   r   )rk   rz   rc   s      r+   threshold_backwardr      s     ;;ty(![99r*   negative_slopeself_is_resultc                 <    t        j                  |dkD  | | |z        S rr   r   )rk   rz   r   r   s       r+   leaky_relu_backwardr      s      ;;taxkN.JKKr*   gradapproximatec                    d}d}d}|dk(  ri||z  dz  }d}||z  }||z  }	||||	z  z   z  }
t        j                  |
      }d|z  }d|z   }d|z  }d||z  z
  }|dd|z  |z  z   z  }||z  |z  }| ||z   z  S |}||z  dz  }ddt        j                  ||z        z   z  }|t        j                  ||z  d	z        z  }| |||z  z   z  S )
Ng;f?g;f?gmBP?tanhr   gHm?r!   r         )r>   r   erfrg   )r   rz   r   M_SQRT2	M_SQRT1_2
M_2_SQRTPIkBetakKappax_sqx_cuberJ   
tanh_innerleftrightleft_derivativetanh_derivativeinner_derivativeright_derivativekAlphacdfpdfs                        r+   gelu_backwardr      s'    %G&I'Jf*$s*d{/0ZZ&
TzJ+j:55 AF
T(9$9:/14DD)99::Y&,Q4&=112eiitd 233sTCZ'((r*   inputc                     t        j                  t        j                  |            }t        j                  |      }||z  d||z  z
  z  }| ||z   z  S r[   )r>   r   Fsoftplussigmoid)rk   r   input_tanh_softplusinput_sigmoidouts        r+   mish_backwardr     sV      **QZZ%67MM%(M
-
1':=P'P#P
QC-344r*   c                 2    | t        j                  |       z  S r4   )r>   r   r   s    r+   silur   "  s     %--%%%r*   c                 \    ddt        j                  |       z   z  }| |z  d|d|z
  z  z   z  S r[   )r>   rg   )rk   rz   r   s      r+   silu_backwardr   )  s<     1uyy$''(G AG(<$<==r*   weightc                 <    t        j                  | dkD  | || z        S rr   r   )rz   r   s     r+   _prelu_kernelr   1  s    ;;taxv}55r*   c                 ~    t        j                  |dkD  | || z        }t        j                  |dkD  d|| z        }||fS )Nr   r   r   )rk   rz   r   
input_gradweight_grads        r+   _prelu_kernel_backwardr   6  sE     TAX{F[4HIJ++dQhTK-?@K$$r*   noiseloweruppertrainingc                 h    |r| j                  |      S ||z   dz  }t        j                  | |||      S Nr"   )mulr   r   )rk   rz   r   r   r   r   r   r   s           r+   rrelu_with_noise_backwardr   A  s@     u%%%-1,''~~
 	
r*   bufferc                    |dk  }t        j                  |dd      }t        j                  |dd      }|j                         dkD  r|n(t        j                  t        j                  |             }| |||d|z   z  z  z
  z  S )Nr   r!   rR   )r>   rh   numelrg   abs)rk   rz   r   in_negative	max_derivsignri   s          r+   log_sigmoid_backwardr   V  sx     (KKA.I;;{Ar*D ,,.1$%))UYYt_4D*EA)da1q5k&::;;r*   otherc                    t        j                  | j                        st        j                  | j                        rt        j
                  n| j                  }| j                  dd|      }| t	        j                  ||      z  S )Nr)          @dtype)rC   is_integer_dtyper   is_boolean_dtyper>   float32new_fullpow)rz   r   	two_dtype
two_tensors       r+   ldexpr   c  sh    
 !!$**-1G1G

1S 	ZZ 
 r3i8J%))J...r*   loss	reductionc                     |t         j                  j                  k(  rt        j                  |       S |t         j
                  j                  k(  rt        j                  |       S | S r4   )r    r'   r{   r>   meanr(   sum)r   r   s     r+   apply_loss_reductionr   o  sH    INN(((zz$	imm))	)yyr*   r   c                     | t         j                  k(  rt         j                  S | t         j                  k(  rt         j                  S | t         j
                  k(  rt         j                  S y r4   )r>   	complex32float16	complex64r   
complex128float64r   s    r+   to_real_dtyper   x  sK    }}	%//	!}}	%""	"}} 
#r*   targetc                 *    | |z
  dz  }t        ||      S r   )r   )rz   r   r   r   s       r+   mse_lossr     s     6MaDi00r*   c                 |    |t         j                  j                  k(  rd|j                         z  nd}|||z
  z  | z  S )Nr   )r    r'   r{   r   )rk   r   r   r   norms        r+   mse_loss_backwardr    s;     #,y~~/C/C"C3D56>"[00r*   c                     t        j                  | ||      }| j                  t        d            }t        j                  ||d      }t        j
                  |      }t        j                  |||      S )N)rN   r   z-infTrN   keepdim)r>   softmaxeqfloatall
zeros_likerh   )rz   rN   r   r   maskedmasked_rowszeross          r+   safe_softmaxr    s[    
--#U
3CWWU6]#F))FT:KS!E;;{E3//r*   rf   c                     | |z
  j                         }t        j                  ||k  d|dz  z  |z  |d|z  z
        }t        ||      S )Nr   r"   )r   r>   rh   r   )rz   r   r   rb   r   s        r+   smooth_l1_lossr    sO     6M D;;td{C$'MD$8$t:KLDi00r*   c                    |t         j                  j                  k(  rd|j                         z  nd}||z
  }t	        j
                  |      }|| z  }t	        j                  ||k  ||z  |z  |t	        j                  |      z        S re   )r    r'   r{   r   r>   r   rh   r   )	rk   rz   r   r   rb   r   r8   abs_x	norm_grads	            r+   smooth_l1_loss_backwardr    s{    
 "+inn.B.B!B3DvAIIaLE{"I;;AEJJqM! r*   c                 h    t        | ||||      }t        ||j                         t        ||d      S NT	copy_fromcopy_toexact_dtype)r  r   shaper   )rk   rz   r   r   rb   rW   results          r+   smooth_l1_loss_backward_outr    s3     %[$	4PFj&,,/FJDQQr*   deltac           
          |t         j                  j                  k(  rd|j                         z  nd}||z
  }t	        j
                  || k  | | z  |z  t	        j
                  ||kD  || z  |z  ||z  | z              S re   )r    r'   r{   r   r>   rh   )rk   rz   r   r   r  r   r8   s          r+   huber_loss_backwardr    s    
 "+inn.B.B!B3DvA;;	UF
	e#AItk1E94!8k;QR r*   c                 h    t        | ||||      }t        ||j                         t        ||d      S r  )r  r   r  r   )rk   rz   r   r   r  rW   r  s          r+   huber_loss_backward_outr!    s3     !dFIuMFj&,,/FJDQQr*   ignore_indextotal_weightc                    |j                         dk  rdnd}|t        j                  j                  k(  r| |z  } |j                         dk(  r|j                         dkD  r|d   }|j	                  |      }t        j                  ||k7  |d      }t        j                  |      }	t        j                  |	||d      }	|	j                         | j                         cxkD  rdkD  rn n| j	                  |      } |Nt        |j                               D 
cg c]  }
d }}
|j                  d   ||<   |j                  |      }| |z  } t        j                  ||k7  | d      } |	| z  S c c}
w )Nr"   r   r!   g      )rN   r    r'   r{   rT   r>   rh   r	  scatterrS   r  reshape)rk   rz   r   r   r   r"  r#  channel_dimsafe_targetrW   rU   	new_shapes               r+   _nll_loss_backwardr*    sC    xxzA~!1KINN(((!L0
 xxzQ6::<!+k*F++f4fa@K!!$'Jz;TJJ~~+//+/a/!++K8 %dhhj 121Q2	2!'a	+	*!F*++f4k1EK## 3s   	E'c                    |j                         dk  rt        d      t        j                  |j                         |      }|j	                  |      }|dz  dk7  rt        d| d|       |dz  }|j                  |d|      }|j                  |||      }t        j                  |      }d|z
  |z  |z  | z  }	|| z  }t        j                  ||	g|      S )Nr   z*glu does not support 0-dimensional tensorsr"   z.Halving dimension must be even, but dimension z	 is size rf   r   )	rN   AssertionErrorrC   canonicalize_dimsizenarrowr>   r   cat)
rk   rz   rN   wrap_dimnIn	inputSize	firstHalf
secondHalfgradInputFirstHalfgradInputSecondHalfs
             r+   glu_backwardr8    s     xxzQIJJ%%dhhj#6H
))H
C
Qw!|<XJiPSuU
 	
 qIHa3IXy)<Jz2	!	!%77)CkQ  ,k999(*=>HMMr*   c           	      ^   d|j                         cxk  rdk  sn t        d|j                          d      |j                         dkD  rt        d|j                          d      |j                         dk(  xr |j                         dk(  }|sE|j                  d   |j                  d   k(  s&t        d|j                   d|j                   d	      |j                         dk7  r*t        d
|j                   d|j                          d      |+|j                         |j                  d   k7  rt        d      |t        j
                  j                  k(  r|j                         dk(  rn| j                         dk(  r| j                  d   |j                  d   k(  szt        d|j                  d    d| j                          d| j                  d          | j                         dk  r| j                         dk(  st        d| j                         t        | ||||||      S )Nr   r"   %input tensor should be 1D or 2D, got Dr!   A0D or 1D target tensor expected, multi-target not supported, got size mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rR   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rN   r,  r  r   r    r&   r{   r*  )rk   rz   r   r   r   r"  r#  no_batch_dims           r+   nll_loss_backwardrB  -  s+     q DTXXZLPQRSSzz|aOPVPZPZP\~]^_
 	
 88:?8vzz|q'8LTZZ]fll1o=(Jv||nAN
 	
 q H!!""\%7%7%9$:*F
 	

 flln

2>J
 	
 INN(((TXXZ1_!Q&;+<+<Q+?4::a=+P I$**UV- Y""-//"3!44KKL]L]^_L`Kac 
 !Q&;+<+<+>!+C I+J[J[I\]  T669lL r*   c           	      ^   |j                         dk7  rt        d|j                                |j                         dk7  rt        d|j                                |j                  d   |j                  d   k(  r>|j                  d   |j                  d   k(  r|j                  d   |j                  d   k(  s%t        d|j                   d	|j                         |j                         dk7  r*t        d
|j                   d|j                          d      t	        | ||||||      S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: r   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r"   r!   r=  r>  r@  z ( z, elements))rN   r,  r  r   r*  )rk   rz   r   r   r   r"  r#  s          r+   nll_loss2d_backwardrE  `  s;    xxzQabfbjbjblamn
 	
 zz|qcdjdndndpcqr
 	

 	

1a(JJqMV\\!_,JJqMV\\!_,(Jv||nM
 	
 q  &&'s<+=+=+?*@M
 	

 T669lL r*   c           	      "   |dz
  t        j                  t        j                  |        | j                  dd            z  |t        j                  t        j                  |       | j                  dd            z  z
  }|||z  }t        ||      S )Nr!   r)   i)r>   maximumlog1pr   logr   )rz   r   r   r   r   s        r+   binary_cross_entropyrJ    s     QJ%--TEDMM"d3 uyyb$0GHHID f}i00r*   c                     d}| ||z
  z  t        j                  |d|z
  z  |      z  }|||z  }|t        j                  j                  k(  r||j                         z  }|S )Ng-q=r!   r   )r>   r   r    r'   r{   r   )rk   rz   r   r   r   EPSILONr  s          r+   binary_cross_entropy_backwardrM    sg     GD6M*U[[T9JPW-XXF&INN((($**,&Mr*   c                 r    t        j                  t        j                  |  |z              }t        ||      S r4   )r>   rH  rg   r   )r   r   r   r   s       r+   soft_margin_lossrO    s.     ;;uyy%&12Di00r*   c                     || z  t        j                  ||z        dz
  z  }|t        j                  j                  k(  r||j                         z  }|S r[   )r>   r   r    r'   r{   r   )rk   rz   r   r   rW   s        r+   soft_margin_loss_backwardrQ    sM     +%v})E)IJJINN((($**,.
r*   pc                 6    t         j                  | |z
  |      S )N)rR  )r   r   )r   r   rR  s      r+   distrT    s     99UU]a9((r*   x1x2c                    | j                  d      j                  dd      }t        j                  |t        j                        }|j                  d      j                  dd      }t        j                  |t        j                        }t        j
                  | j                  d      ||gd      }t        j
                  |||gd      }|j                  |j                        }|j                  d      j                         S )Nr"   rR   Tmemory_formatr   )r   r   r>   	ones_likecontiguous_formatr0  r   matmulmT	clamp_minsqrt)	rU  rV  x1_normx1_padx2_normx2_padx1_x2_r  s	            r+   _euclidean_distrg    s     ffQimmB%G__WE4K4KLFffQimmB%G__WE4K4KLF
))RVVBZ&12
6C
))R)2
.CZZFA##%%r*   input_sizesstartendstepc                 X    | j                  |      }t        j                  || ||||      S r4   )	new_zerosr>   slice_scatter)rk   rh  rN   ri  rj  rk  rW   s          r+   slice_backwardro    s/     &&{3Jz;UCNNr*   r!   c                    ddl m} | j                         }|dk(  rt        d      t	        j
                  | j                         |      }t        | j                               }t        | j                               }|dk  rt        d      ||nd}	||nt        j                  }
|	dk  r|	||   z  }	|
dk  r|
||   z  }
|	dk  rd}	n|	||   kD  r||   }	 ||
t        j                  k(        r||   }
n|
|	k  r|	}
n|
||   kD  r||   }
| j                         |	||   z  z   }|
|	z
  }||z   dz
  |z  ||<   ||xx   |z  cc<   | j                  rt        d      | j                  |||      S )Nr   statically_known_truez,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver!   z<Slice decomposition for quantized tensors aren't implemented)%torch.fx.experimental.symbolic_shapesrr  rN   RuntimeErrorrC   r-  listr.  stridesysmaxsizestorage_offsetis_quantizedNotImplementedError
as_strided)rz   rN   ri  rj  rk  rr  ndimsizesstrides	start_valend_valry  lens                r+   slice_forwardr    s    L88:DqyIJJ

 
 S
1CE4;;=!Gqy899*I_c#++G1}U3Z	{5:1}		U3Z	#J	W34*	9		5:	*((*Y-EEN
I
C*q.T)E#JCLDL!J
 	
 ug~>>r*   c                 n    | j                   |   dt        ffd} ||dd      } |||      }||fS )zn
    Normalize start and end such that both are in the range
    [0, x.get_size()[dim]] and start <= end.
    rO   c                 L    | |S | dk  r| z   } t        t        | |      |      S rr   r   r   )valr   r   defaultdim_sizes       r+   
clamp_wrapz(_normalize_start_end.<locals>.clamp_wrap.  s0    ;N7.C3sE?E**r*   r   )r  int)r8   rN   ri  rj  r  r  s        @r+   _normalize_start_endr  %  sJ     wws|H+# + ua1-E
S%8
4C#:r*   srcc           	      |   t        j                  | j                  |      }| j                  |   }t	        | |||      \  }}t        | j                        }||z
  |dz
  z   |z  ||<   |j                  |      }|dk(  r||k(  r|dk(  r|j                         S d g| j                         z  }t        j                  || j                        }	|	|z
  |z  ||<   t        j                  || j                  t        j                        }
|dk7  rt        j                  |
|	|k\        }
||k7  rt        j                  |
|	|k        }
|dk7  rt        j                  |
|	|z
  |z  dk(        }
dg| j                         z  }d||<   |
j                  |      }
t         j#                  |
t         j%                  ||
|d      |       S )Nr!   r   devicer  r   rR   )rC   r-  r}  r  r  ru  expandclonerN   r>   aranger  onesboollogical_andviewr   rh   _unsafe_masked_index)r   r  rN   ri  rj  rk  r  src_sizeindicesidxmask
mask_shapes               r+   rn  rn  <  s    
 
 S
1C{{3H%eS%=JE3EKK H5[D1H-$6HSM
**X
CzcXo$!)yy{$(6EIIK#7G
,,x
5C%KD(GCL::hu||5::FDz  se|4
h  sSy1qy  et';q'@Auyy{"JJsO99Z D::dD55c4!LeTTr*   indexc                 T    | j                  |      }t        j                  || ||      S r4   )rm  r>   select_scatter)rk   rh  rN   r  rW   s        r+   select_backwardr  e  s+     &&{3J
KeDDr*   offsetdim1dim2c                 V    | j                  |      }t        j                  || |||      S r4   )rm  r>   diagonal_scatter)rk   rh  r  r  r  rW   s         r+   diagonal_backwardr  l  s-    
 &&{3J!!*k64NNr*   input_dtypec                 F    | j                   |k7  r|j                  |      }|S r4   )r   r7   )rk   rW   r  s      r+   _cast_grad_to_input_dtyper  u  s&     K']];/
r*   outputc                 ~    | |z  }||t        j                  ||d      z  z
  }t        | ||      j                         S NTr  )r>   r   r  
contiguous)rk   r  rN   r  new_grad_outputrW   s         r+   _softmax_backward_datar  }  sK     "F*O 6EIIS$- $ J %[*kJUUWWr*   c                 ~    | t        j                  |      t        j                  | |d      z  z
  }t        | ||      S r  )r>   rg   r   r  )rk   r  rN   r  rW   s        r+   _log_softmax_backward_datar    sA     uyy0599d4   J %[*kJJr*   c                     | |dz  z   ||dz
  z  z
  }t        t        j                  t        j                  |      } |d||      j	                  d      } |d||z  |      j	                  d      }	||	z   S )z/Utility function to implement im2col and col2imr"   r!   r   r  r   rR   )r   r>   r  int64rT   )
input_dkernel_d
dilation_d	padding_dstride_dr  blocks_d	arange_kwblocks_d_indiceskernel_grids
             r+    _im2col_col2im_indices_along_dimr    s     Q&x!|)DDHEKKGI !Hh7AA!D Ax*4jAKKBOK k))r*   kernel_sizedilationpaddingrv  c           
         t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        dd} |d        |d        |d	d
        |d       | j                  t              }t        j                  |dv xr t	        d dd  D              fd       t        d t        dd        D              t        j                  t	        d D              fd       |dk(  }|s| j                  d      } | j                  \  }}	}
}\  }}\  }}\  }}\  }}t        |
||||| j                        }t        |||||| j                        }t        j                  | ||||f      }|j                  d      j                  d      }|d d d d ||f   }|j                  dddddd      }|j                  d      }|j                  d      }|j                  ||	|z  |z  ||z        }|s|j                  d      }|S )Nr"   c                       y)Nz"im2col(): only 2D kernel supportedr)   r)   r*   r+   r   zim2col.<locals>.<lambda>      r*   c                       y)Nz$im2col(): only 2D dilation supportedr)   r)   r*   r+   r   zim2col.<locals>.<lambda>  r  r*   c                       y)Nz#im2col(): only 2D padding supportedr)   r)   r*   r+   r   zim2col.<locals>.<lambda>  r  r*   c                       y)Nz"im2col(): only 2D stride supportedr)   r)   r*   r+   r   zim2col.<locals>.<lambda>  r  r*   c                      |rt        d  D              nt        d  D              }t        j                  | fd       y )Nc              3   &   K   | ]	  }|d kD    ywr   Nr)   .0rR  s     r+   	<genexpr>z1im2col.<locals>.check_positive.<locals>.<genexpr>       (Q1q5(   c              3   &   K   | ]	  }|d k\    ywr  r)   r  s     r+   r  z1im2col.<locals>.check_positive.<locals>.<genexpr>       ;RqAF;Rr  c                       d  S Nz& should be greater than zero, but got r)   param
param_names   r+   r   z0im2col.<locals>.check_positive.<locals>.<lambda>      ZL(NugV r*   r  r>   r   r  r  strictconds   ``  r+   check_positivezim2col.<locals>.check_positive  4    ,2s(%((;RE;R8RV	
r*   r  r  r  Fr  rv  r   rD  c              3   &   K   | ]	  }|d k7    ywr  r)   r  ds     r+   r  zim2col.<locals>.<genexpr>       :!qAv:r  r   c                       dt                S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler  s   r+   r   zim2col.<locals>.<lambda>       --25\N< r*   c              3   \   K   | ]$  \  }}}}}d |d|z  z   ||d z
  z  z
  d z
  |z  z    & yw)r!   r"   Nr)   )r  r   paddilkersts         r+   r  zim2col.<locals>.<genexpr>  sF      "Cc3 	
S1s7]SC!G_,q0R77s   *,rZ  c              3   &   K   | ]	  }|d kD    ywr  r)   )r  cs     r+   r  zim2col.<locals>.<genexpr>  s     'aAE'r  c                  F    dt        dd         d d  d d d dS )	Nz!Given an input with spatial size rZ  , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r  )r  r  output_sizer  r  rv  s   r+   r   zim2col.<locals>.<lambda>  sL    3E%*4E3F G"m;xj 9)9VH -]D	F r*   rD  r   rR   r!   r      T)r>   r   r  r  r  r  ziprT   r  r  r   r  permuter.  r&  squeeze)r   r  r  r  rv  r  r}  batched_input	batch_dimr'  input_hinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wblocks_row_indicesblocks_col_indicespadded_inputr  num_blocks_rownum_blocks_colr  r  s    ````                     @@r+   im2colr    s~    
LL[!Q&(TU	LLX!#%ST	LLW"$QR	LLV!#OP
 ;.8Z(8Yu568$KKEu:D	LL:3:uRSz::	<
  &)"#J;'
 K 
LL';''	F 	F AIM"/4{{,I{GWHh"Iy%J
$Hh9:y(ELL ::y(ELL 55Iy) LML+55b9CCBG!Q 24FFGF^^Aq!Q1-F',,Q/N',,Q/N^^;)H4n~6UF "Mr*   r  c                   !" t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        t        j                  t              dk(  d        dd} |d	        |d
        |dd        |d        |d       | j                  "t        "      }t        j                  |dv xr t	        d "dd  D              "fd       d   d   z  }t        j                  "d   |z  dk(  "fd       t              D 	
cg c]"  \  }	}
}}}d|	d|
z  z   ||dz
  z  z
  dz
  |z  z   $ }}}}
}	}|d   |d   z  !t        j                  "d   !k(  !"fd       t        j                  !dkD  !"fd       |dk(  }|s| j                  d      } | j                  "\  }}\  }}\  }}\  }}\  }}| j                  "d   "d   |z  gz   |z         } | j                  dddddd      } t        |||||| j                        }t        |d      }t        |||||| j                        }t              D cg c]  \  }}|d|z  z    }}}| j                  "d   "d   t              z  g|z         }d d ||f} t        j                  || | d      }t!        j"                  || | | | f      }|s|j%                  d      }|S c c}}}}
}	w c c}}w )Nr"   c                       y)Nzonly 2D output_size supportedr)   r)   r*   r+   r   zcol2im.<locals>.<lambda>  r  r*   c                       y)Nzonly 2D kernel supportedr)   r)   r*   r+   r   zcol2im.<locals>.<lambda>  r  r*   c                       y)Nzonly 2D dilation supportedr)   r)   r*   r+   r   zcol2im.<locals>.<lambda>  r  r*   c                       y)Nzonly 2D padding supportedr)   r)   r*   r+   r   zcol2im.<locals>.<lambda>  r  r*   c                       y)Nzonly 2D stride supportedr)   r)   r*   r+   r   zcol2im.<locals>.<lambda>  r  r*   Tc                      |rt        d  D              nt        d  D              }t        j                  | fd       y )Nc              3   &   K   | ]	  }|d kD    ywr  r)   r  s     r+   r  z1col2im.<locals>.check_positive.<locals>.<genexpr>  r  r  c              3   &   K   | ]	  }|d k\    ywr  r)   r  s     r+   r  z1col2im.<locals>.check_positive.<locals>.<genexpr>  r  r  c                       d  S r  r)   r  s   r+   r   z0col2im.<locals>.check_positive.<locals>.<lambda>  r  r*   r  r  s   ``  r+   r  zcol2im.<locals>.check_positive  r  r*   r  r  r  Fr  rv  r  )r"   r   c              3   &   K   | ]	  }|d k7    ywr  r)   r  s     r+   r  zcol2im.<locals>.<genexpr>   r  r  rZ  c                       dt                S )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r  r  s   r+   r   zcol2im.<locals>.<lambda>!  r  r*   r   r!   c                      dd    d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = rZ  z and kernel_size=r)   )r  r  s   r+   r   zcol2im.<locals>.<lambda>'  s#     ==B2YK H"m% r*   rR   c                  :    d d d d d d  dd    d	S 
NzGiven output_size=r  r  r  r  z , expected input.size(-1) to be 	 but got rR   .r)   Lr  r  r  r  r  rv  s   r+   r   zcol2im.<locals>.<lambda>4  F    $[M} M:Zy	& B))*9U2YKqB r*   c                  :    d d d d d d  dd    d	S r  r)   r   s   r+   r   zcol2im.<locals>.<lambda>:  r"  r*   r   rD  r  
accumulater  )r>   r   r  r  r  r  rT   r&  r  r  r  rV   rm  prodr   _unsafe_index_putr   r  r  )#r   r  r  r  r  rv  r  r}  prod_kernel_sizer   r  r  r  r  colr  out_hout_wr  r  r  r  r  r  r  r  indices_rowindices_colorR  output_padded_sizer  r  r!  r  s#    `````                           @@r+   col2imr0     s    
LL[!Q&(OP	LL[!Q&(JK	LLX!#%IJ	LLW"$GH	LLV!#EF
 ;.8Z(7Ie468$;.KKEu:D	LL:3:uRSz::	<
 #1~A6	LLb	$$)	% '*(K'
 "Cc3 	
S1s7]SC!G_,q0R77C  	AQA	LLb	Q	B 	B 
LL	A	B 	B AIM"KKELE5Hh"Iy%J
$Hh MM58U1X1A%AB[PSVVWEMM!Q1a+E2xY%,,K $K3K2xY%,,K 14K0IJ1!a!e)JJ__	q58tK0014FFF {K
0C##FC4#HFUU6YJ
YJ
KLF"MkV Ks   6'M9Mr  c                 z    | |j                  |       |z  z  j                  t        j                  |             }|S NrX  )type_asr  rC   r   )rk   r  rm   rI   s       r+   native_dropout_backwardr4  c  sB     
[1E9	:AA11+> 	B 	A Hr*   
input_size	dimensionr.  c                    t        |      dk(  rt        j                  | d      S t        j                  t        |      |      }t        j
                  ||   | j                  t        j                        }|j                  d||      j                         }| j                  d|dz         j                  ||dz         } | j                  |      }d|z  |fz   }t        j                  ||| d      j                         S )Nr   r  rR   r!   r4   Tr$  )r  r>   squeeze_copyrC   r-  r  r  int32unfoldflattenmovedimrm  r   r'  r  )	r   r5  r6  r.  rk  rN   r  rW   r  s	            r+   unfold_backwardr=  r  s    
 :!!!$**

 
 Z)
<C
,,z#t{{%++
NC
**Qd
#
+
+
-C<<C!G$,,S#':D 
+JcMSF"E!!*eTd!KVVXXr*   epsc           
      .   |A|}d|z
  }t        j                  t        j                  ||k\  ||k        | |d|z
  z  z  d      S t        j                  t        j                  |dk\  |dk        | |d|z
  z  z  |j                  dt	        d                  S )Nrf   r   r)   nan)r>   rh   r  r   r  )rk   rz   r>  lohis        r+   logit_backwardrC    s    
 2X{{dbj$"*543:./
 	
 {{dck43;743:./MM"eEl+
 	
r*   trainc                 d    |r|dk7  rt         j                  | ||      d   S | j                         S rr   )r   native_dropoutr  )r   rR  rD  s      r+   dropoutrG    s3     a""5!U3A66{{}r*   out0out1c                    |r|dk7  r|dk(  r:t        j                  |       t        j                  | t         j                        fS | j                  j                  st        d      t        j                  |       |kD  }|| z  t        dd|z
  z        z  }||fS | t        j                  | t         j                        fS )Nr   r!   r   z?result type Float can't be cast to the desired output type Longrf   )	r>   r	  r  r   is_floating_pointrt  	rand_liker  r[  )r   rR  rD  	bool_maskress        r+   rF  rF    s     a6$$U+U-=-=e5::-VWW{{,,Q  OOE*Q.	%%sQw"88YuuEJJ?@@r*   half_to_floatc                 B   ddl m} | j                         } |r6| j                  t        j
                  k7  rt        d| j                   d      t        j                  | t        j                  j                        \  }}| j                  |      }  || j                         dk(        rt	        j                  |       }n0t	        j                  | |d      }t	        j                  | |z
        }|t	        j                  ||d      z  }|s|j                  |      }|S Nr   guard_or_falsez%half_to_float is True but x.dtype is z, expected torch.halfr2   T)r  )rs  rS  r  r   r>   halfr,  rC   rD   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr7   r   rg   amaxr   )	r8   rN   rO  rS  r9   r<   unnormalizedx_maxr  s	            r+   _softmaxr[    s     E 	
A77ejj  7y@UV  ',&>&>	uDDLL'#| 	
Aaggi1n%yy|

1c40yyU+EIIlCFFF<(Mr*   )r  c                 F   ddl m} | j                         } |r6| j                  t        j
                  k7  rt        d| j                   d      t        j                  | t        j                  j                        \  }}| j                  |      }  || j                         dk(        r| }nt	        j                  | |d      }| |z
  }t	        j                  t	        j                  t	        j                   |      |d            }||z
  }	|s|	j                  |      }	|	S rQ  )rs  rS  r  r   r>   rU  r,  rC   rD   rV  rW  r7   r   rX  rI  r   rg   )
r8   rN   rO  rS  r9   r<   shiftedrZ  shifted_logsumexpr  s
             r+   _log_softmaxr_    s     E 	
A77ejj  7y@UV  ',&>&>	uDDLL'#| 	
Aaggi1n%

1c40e)		%))EIIg,>T"RS((F<(Mr*   r  padding_idxscale_grad_by_freqsparsec                     | j                         dk7  rt        d| j                          d      |j                  dk  r4| j                  d|      }|j                  dk(  r|j	                  d      }|S | |   S )Nr"   z'weight' must be 2-D, got z-Dr!   r   )rN   r,  r}  index_selectr  )r   r  r`  ra  rb  r   s         r+   	embeddingre    ss     zz|q9&**,rJKK||q!!!W-<<1++a.C
gr*   num_weightsc                 t   t        j                  | t         j                  j                        \  }}| j	                  |      } t        |t        j                        }|rZ|j                  |f      }t        j                  |      }t        j                  ||g|d      }||   }	| |	j                  d      z  } t        ||k(  | j                        }
| j                  |
d      }| j                  |f| j                   |j                  d  z         }t        j                  ||g|d      j	                  |      S )NrT  Tr$  rR   r   )rC   rD   rV  rW  r7   r   r>   longrm  r[  r   r'  rT   rV   r}  masked_fillr  )rk   r  rf  r`  ra  r9   r<   countsr  grad_weights_scaler  r   grad_weights                r+   embedding_dense_backwardrm    s,    ',&>&>)N)N)V)V'#| ..!23K%guzz:G""K>2w'''	4D'Q#G_!$6$@$@$DDW3[5E5EFD""4+D''	**7<<>::K !!+y$4!PSS r*   c                 "    d}| D ]  }||z  }	 |S r[   r)   )r8   rI   is      r+   r&  r&  #  s$    	A 	QHr*   tensors
num_chunksc                 Z   g }| D ]  }|j                         }||   |z   dz
  |z  |z  }|||   k7  r;dgdz  |j                  |z
  dz
  z  d|||   z
  gz   }t        j                  ||d      }|d | t	        j
                  |dg      z   }|j                  |j                  |              |S )Nr!   r   r"   rR   )r.  r}  r   constant_pad_ndr>   Sizeappendr&  )	rp  rN   rq  padded_tensorstensortensor_sizepad_along_dimr  	view_sizes	            r+   
_pad_chunkr{  *  s    
 N 9kkm$S)J6:zIJVK,,#'V[[3.23C 007 C ))&#q9F%

J3C(DD	fnnY789 r*   c                 R    | d   j                   }| D ]  }|j                   |k7  s y y)Nr   FTr}  )rp  r}  rw  s      r+   have_same_ndimsr~  ?  s2    1:??D ;;$ r*   c                     | d   j                         d | }| D ]-  }t        j                  |j                         d | |k(  d        / y )Nr   c                       y)NzG_chunk_cat expects same sizes of 0,...,dim-1 dimensions for all tensorsr)   r)   r*   r+   r   z+leading_dimension_matches.<locals>.<lambda>L  r  r*   )r.  r>   r   )rp  rN   leading_dim_sizesrw  s       r+   leading_dimension_matchesr  G  sN    
)$3/ 
KKM$3#44]	

r*   c                    t        j                  |dk\  d        t        j                  t        |       dkD  d        | d   j                  }| d   j                  }| D ]r  }t        j                  |j                         dkD  d        t        j                  |j                  |k(  d        t        j                  |j                  |k(  d        t t        |       r(t        j                  | d   j                         |      }nEt        j                  |dk\  d        | D ]&  }t        j                  ||j                  k  d	        ( t        | |       |S )
Nr!   c                       y)Nz&_chunk_cat expects positive num_chunksr)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>U  r  r*   r   c                       y)Nz0_chunk_cat expects a non-empty input tensor listr)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>W  r  r*   c                       y)Nz#_chunk_cat expects non-empty tensorr)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>\  r  r*   c                       y)Nz8_chunk_cat expects all input tensors with the same dtyper)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>_  r  r*   c                       y)Nz8_chunk_cat expects all inputs tensors on the same devicer)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>c  r  r*   c                       y)NzK_chunk_cat expects non-negative dim when input tensors have different ndimsr)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>j  r  r*   c                       y)Nz3_chunk_cat expects dim < ndim for all input tensorsr)   r)   r*   r+   r   z._preprocess_chunk_cat_inputs.<locals>.<lambda>o  r  r*   )r>   r   r  r   r  r   r~  rC   r-  rN   r}  r  )rp  rN   rq  expected_dtypeexpected_devicerw  s         r+   _preprocess_chunk_cat_inputsr  P  s-   
 
LLq"RS	LLGqT QZ%%Naj''O 	
V\\^a')VWLLN*N	
 	MM_,N	
	
 w$$WQZ^^%5s;1Ha	
  	FLLfkk!M	
 gs+Jr*   r   c                     t        | ||      }t        | ||      }|t        j                  ||dz         S t        j                  ||dz   |       |S )Nr!   )r   )r  r{  r>   r0  )rp  rN   rq  r   rv  s        r+   
_chunk_catr  u  sS     'wZ
@Cj9N
{yyq11		.#'s3
r*   split_sizesc                    t         j                  | ||      }|.|D cg c]"  }|j                  t        j                        $ c}S t        ||      D ])  \  }}t        ||j                         t        ||d       + y c c}w )Nr   rX  Tr  )	r   split_with_sizesr  r>   r\  r  r   r  r   )rz   r  rN   r   splitssr  splits           r+   split_with_sizes_copyr    s     ""4#">F
{HNO1e&=&=>OO f- 	NMFEfekk2UFM	N  Ps   'B
split_size.c                 D    t         j                  j                  | ||      S r4   )r   r  r   )r   r  rN   s      r+   unsafe_splitr    s    ::UJ44r*   c                 D    t         j                  j                  | ||      S r4   )r   r  r  )r   r  rN   s      r+   unsafe_split_with_sizesr    s       ((SAAr*   c                 .   | j                   }||   }|dk(  r%|dk7  rt        d| d      | j                         fS ||z   dz
  |z  }ddlm}  ||      }t        |      D cg c]  }| }}|||z  |z
  z
  |d<   t        j                  | ||      S c c}w )Nr   z split_size is 0 but dim_size is z, expected 0r!   )	guard_intrR   )r  r,  detachrs  r  rS   r>   r  )	rz   r  rN   rh  r  chunksr  ro  r  s	            r+   r  r    s    **K3HQq= 28*LI  #a'J6F @vF',V}5!:5K5 J$7($BCKO;;t[#.. 6s   "	Btensor_indices_or_sectionsc                   	 |j                   j                  dk7  rt        d|j                          |j                  t        j
                  k7  rt        d|j                         |j                         	t	        j                  	dk(  xs 	dk(  	fd       	dk(  rS|j                         }t        |t              s!t        dt        |      j                         | j                  ||      S t        }t        j                  j                         x}r|j                   x}r|j"                  } |       5  |D cg c]  }|j                          }}d d d        | j                  |      S c c}w # 1 sw Y    xY w)Ncpuz/tensor_indices_or_sections must be on CPU, got z.tensor_indices_or_sections must be int64, got r!   r   c                      d  dS )Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr)   )	split_dims   r+   r   zAtensor_split_tensor_indices_or_sections_py_impl.<locals>.<lambda>  s     <<E;eM r*   z%Expected sections to be IntLike, got )r  typer,  r   r>   r  rN   r   itemr6   r   r#   tensor_splitr   _guardsdetect_fake_mode	shape_envignore_fresh_unbacked_symbols)
rz   r  rN   sectionsctx	fake_moder  ro  r  r  s
            @r+   /tensor_split_tensor_indices_or_sections_py_implr    s    "((--6=>X>_>_=`a
 	
 "''5;;6<=W=]=]<^_
 	
 +..0I	LLQ()q.	M
 A~-224(G, 7X8O8O7PQ    3//7799I9",,,I,99C
 U 	E)CDAqvvxDGD	E   #.. E	E 	Es   :E7?E2E72E77F mat1mat2c                     | j                         s&| j                         st        |      }t        |      }|t        j                  ||      z  }|dk(  r|S ||| z  z   S rr   )rK  
is_complexr  r>   mm)rz   r  r  rb   rl   r   s         r+   addmmr    s]     !!#DOO,=4yE

%((4&
&Cqy
 r*   use_geluc                     t        | ||||      }|r8| j                  rt        j                  |d      S t        j                  |      S t        j	                  |      S )Nr   )r   )r  is_cudar   gelurelu)rz   r  r  rb   rl   r  r   s          r+   _addmm_activationr    sO     dD$
.C<<99Sf95599S>!99S>r*   vecc                     | j                         s&| j                         st        |      }t        |      }|t        j                  ||      z  }|dk(  r|S |j                         dk(  r|| z  S ||| z  z   S rr   )rK  r  r  r>   mvr   )rz   r  r  rb   rl   r   s         r+   addmvr    ss     !!#DOO,=4yE

%((4%
%Cqy

yy{ad{r*   r   rstdgammaNCHxWgroupoutput_maskc
           	      D   t        j                  | ||d       t        j                  || d       t        j                  |d       t        j                  |j                         z  z  k(  fd       t        j                  j                  fk(  fd       t        j                  d u xs j                         k(  fd       z  }
t        j                  |
z  k(  fd       t        j                  | |      j                        j                  dg      }| j                        j                  dg      }d }d }d }|	d	   r*d
|
z  z  }t        j                  |j                  d	            j                  |
      j                  d      }t        j                  |j                  d	            j                  |
      j                  d      }t        j                  |j                  d      j                  d|
            }n|j                  |
      j                  d      }|j                  |
      j                  d      }t        j                  |j                  d      t        j                  d|
f|j                              }|z  |z
  |z  |z  |z  |z  }| z  ||z  |z  z
  }|j                  d      }t        |d      }t        |d      }t        j                  | j                  |
      |      t        j                  |j                  |
      |      z   |z   }|j                  |j                        j                  |j                         }|	d   rk|j                  |
      |j                  |
      j                  d      z  z
  |j                  d      z  j                  d	g      j                        }|	d   r|j                  d	g      }|||fS )NF)allow_cpu_scalar_tensorsc                      d z  z   dS )NzExpect input to have z	 elementsr)   )r  r  r  s   r+   r   z,native_group_norm_backward.<locals>.<lambda><  s    'A}I> r*   c                  .    d  d dj                    S )NzExpect mean to have shape (, z
, but got r  )r  r  r   s   r+   r   z,native_group_norm_backward.<locals>.<lambda>@  s    -aS5'DJJ<P r*   c                  <    d  dj                          S d S )NzExpect gamma to have z elements but got rR   )r   )r  r  s   r+   r   z,native_group_norm_backward.<locals>.<lambda>D  s-    's*<eN_U[[]<hi eg<hi r*   c                      d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r)   )r  r  s   r+   r   z,native_group_norm_backward.<locals>.<lambda>J  s    ,QC/[\a[bc r*   r"   r   r   rf   rR   r!   r  rD  )rC   check_same_devicecheck_same_shaper>   r   r   r  r   r  r   rT   r&  r  r  rV   r7   r   )rk   r   r   r  r  r  r  r  r  r  cpgdsdbd_inputd_gammad_biasr  ds_valdb_valc1c2c3s     ` `````             r+   native_group_norm_backwardr  '  s    
UD$ 
5+N	4F	LLQ$> 
LL

q%j P 
LL+!+i
 u*C	LL	S5[c 
;	&	+	+Aq#	6	:	:s	:	CB			!Q	$	(	(aS	(	1B!G!G F1~39YYr5??1#56>>q%MQQRSTFYYr5??1#56>>q%MQQRSTFr"a,B
 ZZ5#.2215FZZ5#.2215Fr"

Auc?4;;?B tmf$,t3d:Q>S4Z&4-!++\\"r1%r1%IIk))!UC=rBiiaS92>? 	
 //%++.11%++>1~ E3'"''!UC*@4>>RTCU*UU..$% SaSS\WQZ 	 1~QCWf%%r*   out2c
                    t        | |||||||||	
      }|
||f}t        |      D ]2  \  }}|	t        ||   |j                         t	        |||   d       4 |S r  )r  	enumerater   r  r   )rk   r   r   r  r  r  r  r  r  r  rH  rI  r  r  rW   ro  rI   s                    r+   native_group_norm_backward_outr    sz    " (UD$q!S%F d#J&! Q1=jmQWW5Q
14PQ
 r*   c                 ,    | | j                  |      S | S r4   r7   )r8   r   s     r+   _maybe_castr    s    }ttE{Hr*   grad_outnormalized_shapebiasc                 ~  " |j                   }|j                         }	t        j                  |j                        ""fd| |||fD        \  }
}}}|
t        d      |	t        |      z
  }||d  }|d | }g }g }t        |	      D ]*  }||k\  r|j                  |       |j                  |       , t        |      }t        |      }ddl
m}  ||dk(        s ||dk(        rN|d   r|j                  |      nd |d   r|j                  ||d        nd |d   r|j                  ||d        fS d fS t        ||j                               }t        ||j                               }|t        d      ||z
  |z  }||
|z  }n|
}||z  }t        j                  ||d      }t        j                   ||      }t        j                  ||d      }t        j                   ||      }||z
  |z
  }d }d } d }!|d   r||z  |z  }|d   r0|.t        |      dkD  rt        j                  |
|z  |d	      } n|
|z  } |d   r8|6t        |      dkD  rt        j                  |
|d	      }!n|
j#                         }!t%        ||j                        t%        | ||j                  nd       t%        |!||j                        fS d       fS )
Nc              3   h   K   | ])  }|!|j                  t        j                        n| + y wr2  )r7   r>   r\  r  r8   r9   s     r+   r  z-native_layer_norm_backward.<locals>.<genexpr>  s>      9  = 	
e.E.EF	9s   /2 grad_out_cast should not be Noner   rq  r!   r"   zinput_cast should not be NoneTF)r  rN   rC   get_computation_dtyper   r,  r  rS   ru  r&  rs  rr  rm  rV   r>   r   r   r  r  )#r  r   r  r   r  r   r  r  input_shape
input_ndimgrad_out_cast
input_castweight_cast	bias_castaxis
inner_dims
outer_dimsinner_dim_indicesouter_dim_indicesro  r  Mrr  x_hat
grad_x_hatabr  r  r  rJ   r  d_weightr  r9   s#                                     @r+   native_layer_norm_backwardr    s    ++KJ33EKK@9 E640	95M:{I ?@@,--DTU#JUd#J#%#%: (9$$Q'$$Q'	( 	ZAZAKQ!V$(=a1f(E,7NEOOK(3>q>EOOK./t3>q>EOOK./
 	
 HL
 	

 T:>>#34DT:>>#34D<==$$&E"[0
"
QA		*/6A	:u	%B	2($	/B	5"	BEBJE!G"H F1~!8u$1~+1 !A%yy!68I5QH$u,H1~)/ !A%YY}.?GF"((*F 	GU[[)Hf.@flldKF$*:DJJE  AEE r*   c          
          t        | |||||||      }||	|
f}t        |      D ]2  \  }}|	t        ||   |j                         t	        |||   d       4 |S r  )r  r  r   r  r   )r  r   r  r   r  r   r  r  rH  rI  r  r  rW   ro  rI   s                  r+   native_layer_norm_backward_outr    sw     (%)4vt[F d#J&! Q1=jmQWW5Q
14PQ
 r*   c                 &   g }t        t        |            D ]'  }|j                  | j                         |z
  dz
         ) t	        j
                  | j                        }| j                  |      }|~|t        j                  t        j                  fv r.t        j                  t        j                        j                  }n0t        j                  t        j                        j                  }n|}t        j                  t        j                  j                   j"                  j%                  t        j&                  |d      j)                  |d      |            }	|j+                  |	      }
||
j+                  |      }
| j,                  xs |d uxr |j,                  }t	        j.                  |       }|t        j0                  t        j2                  fv }|s"|s |
j5                         }
|	j5                         }	|
j7                  |       }||	fS )Nr!   r"   Tr  )rS   r  ru  rN   rC   r  r   r7   r>   r   r   finfor>  r   rsqrtopsr   addScalarr   r   r   	is_nestedr   channels_lastchannels_last_3dr  r3  )r   r  r   r>  dims_to_reducero  r9   upcasted_inputeps_valrqrst_inputupcasted_resultr  rY  is_channels_lastr  s                  r+   _fused_rms_normr    s    !#N3'() 3eiikAo123 33EKK@XX/0N { @@kk%--044Gkk%--044G++ 			!!IIna(--.$-OQX	
K %((5O)--f5 LF$$6$K6;K;KI//6M$) 
 -)446!,,. $$U+F;r*   c                 L   |j                   }|j                         }t        j                  |j                        }| j                  |t        j                        }	|j                  |t        j                        }
|!|j                  |t        j                        nd }|	t        d      |t        |      z
  }||d  }|d | }g }g }t        |      D ]*  }||k\  r|j                  |       |j                  |       , t        |      }t        |      }ddlm}  ||dk(        s ||dk(        r4|d   r|j                  |      nd |d   r|j                  ||d        fS d fS t!        ||
j                               }||	|z  }n|	}d }d }|
|z  }|d   r)t        j"                  ||z  |d      }|||z  |z  z
  |z  }|d   r0|.|	|z  }t        |      dkD  rt        j"                  ||d      }n|}t%        ||j                        t%        ||j                        fS )	NrX  r  r   rR  r!   Tr  F)r  rN   rC   r  r   r7   r>   r\  r,  r  rS   ru  r&  rs  rS  rm  rV   r   r  )r  r   r  r  r   r  r  r  r9   r  r  r  r  r  r  r  r  ro  r  r  rS  r  r  r  r  sum_vald_weight_full_shapes                              r+   _fused_rms_norm_backwardr  L  sa    ++KJ33EKK@KK)@)@   M +5;R;RSJ  			#53J3J	K 
 ?@@,--DTU#JUd#J#%#%: (9$$Q'$$Q'	( 	ZAZADa1fQ!7,7NEOOK(3>q>EOOK./
 	
GK
 	

 T:>>#34D"[0
"
!G"HE1~))EJ.4EtTg 55=1~+1+e3 !A%yy#):EH +H 	GU[[)Hekk* r*   running_meanrunning_varmomentum
functionalc	                 D   dgt        t        d| j                                     z   }	t        j                  | j
                        }
|}|}|r"t        j                  | j
                        }
| j                  |
      }t        j                  ||	dd      \  }}t        j                  ||z         }| |z
  |z  }t        j                  ||	      }t        j                  ||	      }|!||z  d|z
  |z  z   }|s|j                  |       |;| j                         | j                  d   z  }t        j                  ||	      }|||dz
  z  z  }||z  d|z
  |z  z   }|s|j                  |       n||t        d      |j                  |
d      }|}|j                  |
d      }|}|}dt        j                  ||z         z  }| j                   j"                  d	k7  r|}|}n"| j%                  d
      }| j%                  d
      }t'        || j                         dz
        }t'        || j                         dz
        }| |z
  |z  }|2|j)                         }t'        || j                         dz
        }||z  }|2|j)                         }t'        || j                         dz
        }||z   }| j                   j"                  d	k(  r8|j                  | j
                        }|j                  | j
                        }|j                  | j
                        ||||fS )Nr   r"   r   T)rN   
correctionr  r!   z:running_mean and running_var must not be None in eval mode)r   r   r  r   )ru  rS   rN   rC   r  r   r7   r>   var_meanr  r  copy_r   r  r,  r`  r  r  rm  rV   r;  )r   r   r  r  r  r   r  r>  r  reduction_dimsr9   new_running_meannew_running_var	input_acc
biased_varr   r  r  	save_mean	save_rstdnsqueezed_varunbiased_varinvstds                           r+   native_batch_norm_helperr'    s    S4a 566N33EKK@#!O!77DHH#4H5	 >>>a

D {{:+,$,$&MM$7	MM$7	#')3q8||6SS""#34"A.A !==^DL'1A;7L&5X8TTO!!/2;#6 L  $->TJ'!nn+<4nH%ejjs!234<<%$II-I-I uyy{Q7"6599;?;$,&(!"6599;?;&||~ uyy{Q7$||E!LLu{{L3	LLu{{L3				$ r*   r!  save_invstdc                 >    t        | |||||||d	      \  }}	}
}}||	|
fS NFr'  r   r   r  r  r  r   r  r>  r  r!  r"  rU   s               r+   native_batch_normr-    s=     *Bvt\;(CQV*&FIy!Q 9i''r*   c           
          ||t         j                  | |||||      S |t        d      |t        d      |rt         j                  | |||||||      S t         j                  | ||||||      S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)r   _native_batch_norm_legitrt  $_native_batch_norm_legit_no_training)r   r   r  r  r  r   r  r>  s           r+   native_batch_norm_decompositionr1  	  s      3,,648S
 	
 <
 	
 <
 	
 ,,64{HhPS
 	
 8864{Hc
 	
r*   c                 ~   | j                  |      }||z   dz
  |z  }|dk(  rc|dk(  r^t        |      D cg c]  }| }}|||z  |z
  z
  ||dz
  <   t        j                  j                  j
                  j                  | ||      S t        j                  j                  j                  j                  | ||      S c c}w Nr!   r   )	r.  rS   r>   r  r   r  r  r  r   )rw  r  rN   r  r  rU   r  s          r+   unsafe_chunk_py_implr4  .  s    {{3HV#a'F2JQ8q=+0=9az99",
V0Ch0N"OFQJyy~~55==fkSVWW99>>&&--fj#FF :s   	B:c           
      N    t         j                  j                  | ||||d||      S r*  )r   r/  r  )r   r   r  r  r  r  r>  s          r+   r0  r0  :  s5     ((00	 	r*   c                 >    t        | |||||||d	      \  }}	}
}}||	|
fS r*  r+  r,  s               r+   r/  r/  P  s=     *Bvt\;(CQV*&FIy!Q 9i''r*   c                 >    t        | ||d d |||d	      \  }}}}	}	|||fS r*  r+  )
r   r   r  r   r  r>  r  r!  r"  rU   s
             r+   !_native_batch_norm_legit_no_statsr8  a  s<     *BvtT48S%*&FIy!Q 9i''r*   c                 v    t        | |||||||d	      \  }}	}
}}|t        d      |t        d      ||	|
||fS )NT#new_running_mean should not be None"new_running_var should not be None)r'  r,  )r   r   r  r  r  r   r  r>  r  r!  r"  r  r  s                r+   #_native_batch_norm_legit_functionalr<  p  sl    " 	!vt\;(CQU	 BCCABB9i)9?JJr*   c           	      T   t         j                  j                  | ||||d|      }d}|t         j                  j                  j                  k(  r t         j                  j                  | |      }t        j                  |t         j                  | j                  | j                        S )a  
    Return a reserve tensor for batch norm, used only by cudnn to pass forward state to the
    backward pass. This is needed for `_batch_norm_with_update` and `_batch_norm_no_update`,
    which support a variety of backends including cudnn. We create this tensor here to get
    the correct shape in the traced graph if we detect that will call the cudnn kernel,
    and rely on DCE to avoid materializing this tensor.
    Tr   )r   layoutr  )
r>   _C_select_batch_norm_backend_BatchNormBackendCudnn(_get_cudnn_batch_norm_reserve_space_sizeemptyuint8r>  r  )	r   r   r  r  r  r>  r   backendreserve_sizes	            r+   _get_batch_norm_reserve_tensorrH    s      hh11vt\;cG L%((,,222xxHH8
 ;;EKKU\\ r*   c                 d    t        | ||||d||d	      \  }}}	}
}
t        | |||||d      }|||	|fS )NTFr   r'  rH  r   r   r  r  r  r  r>  r  r!  r"  rU   reserves               r+   _batch_norm_with_updaterN    sa     *B
*&FIy!Q -vt\;dG 9i00r*   c                     t        | ||||d||d	      \  }}}	}
}t        | |||||d      }|
t        d      |t        d      |||	||
|fS )NTrJ  r:  r;  )r'  rH  r,  )r   r   r  r  r  r  r>  r  r!  r"  new_rmnew_rvrM  s                r+   "_batch_norm_with_update_functionalrR    s      	!vt\;hT	 -vt\;dG ~BCC~ABBIy'66BBr*   c                 d    t        | ||||d||d	      \  }}}	}
}
t        | |||||d      }|||	|fS )NFrJ  rK  rL  s               r+   _batch_norm_no_updaterT    sa     *B
*&FIy!Q -vt\;eG 9i00r*   c                     |t        d|       t        j                  |       |k  j                  t        j                        }|j                  |       | z  d|z  z  }||fS )Nz=generator must be None for _fused_dropout decomposition, got r   rf   )r,  r>   rL  r7   rE  r3  )r   rR  	generatorr  rN  s        r+   _fused_dropout_decompositionrW    sn     KI;W
 	
 OOE"Q&***=D
,,u

%q
1C;r*   )r   r>  r  
pin_memorynon_blockingrY  r  rX  rY  rY  c                   |r!|t         j                  k7  rt        d|       |rt        d      t        | t         j                  t
        t        t        t        f      s!t        dt        |       j                         |0|.|,t        | t         j                        r| j                         S | S d}t        | t         j                        r| }nt        j                  |       }|c||j                  k7  rT|1|j                  dk(  r"t         j                  j                  ||      }d}t         j                  j!                  |||      }|$|s"t         j                  j                  ||      }d}|t        j                  ||      S |S )Nz*layout must be None or torch.strided, got z:pin_memory=True is not supported in _to_copy decompositionz x must be Tensor or scalar, got Fr  TrX  )r>   stridedr,  r6   r   r  r  r  complexr  r#   r  scalar_tensorr  _primsconvert_element_type
device_put)	r8   r   r>  r  rX  rY  rY  dtype_convertedx_tensors	            r+   _to_copyrc  	  sZ    &EMM)I&RSSH
 	
 a%,,UD'BC?Q@P@P?QRSS~%-M,Aa&779HO!U\\"&&q)f7!5||885IH"O<<**8V\J<<44XuE {{8=AAOr*   c                 ,    t         j                  |       S r4   )r   alias)r8   s    r+   nop_decompositionrf  ;	  s     ::a=r*   out3exponential_average_factorepsilonc           
         t         j                  | |||||||      \  }}	}
|r%||	|
| j                  dt        j                        fS ||j                  d      |j                  d      | j                  dt        j                        fS )Nr  r   )r   r-  rm  r>   rE  )r   r   r  r  r  r   rh  ri  r  r  r  s              r+   cudnn_batch_normrk  C	  s     $$"	GAq! 1aU[[ABB	EKK0	 r*   c                     t        |      D ]>  \  }}|dk(  s|| j                  k  r| j                  |   |k(  r.| j                  |      } @ | S r[   )r  r}  r  rT   )r8   broadcast_maskr  r  s       r+   _broadcast_batch_norm_backwardrn  e	  sO    / "
d19dQVVm0ED!A" Hr*   rM  c                 *    t        | |||||||||	
      S r4   )native_batch_norm_backward)r  r   r   r  r  r!  r(  rD  r>  r  rM  s              r+   batch_norm_backwardrq  l	  s/     & r*   c
                   & |j                   }
||j                   }n|
}t        j                  |j                         &&fd| ||||||fD        \  }}}}}}}|j                  }|j	                         }|dk  rt        d|       d}t        t        |            ||   z  }|}|}|r||4t        d      ||t        d      |}t        j                  ||z         }dg|z  }||   ||<   g }t        |      D ]  }||k7  s	|j                  |        t        ||      }d|z  }t        j                  ||      }t        j                  |||z
  z  |      }t        ||z  |      }t        t        j                  ||z  ||z        |      } |t        ||      dz  }!nt        ||z  |      }!|r||z
  | z  }"||"z
  |z
  |!z  }#n||!z  }#|	d   r||z  }$nd }$|	d   r|}%nd }%|#j                  |
      t!        |$|      t!        |%|      fS )Nc              3   H   K   | ]  }||j                        n|  y wr4   r  r  s     r+   r  z-native_batch_norm_backward.<locals>.<genexpr>	  s,      	 $%=a7	s   "r"   z*rank of the input must be at least 2, got r!   z1mean and invstd must not be None in training modezDrunning_mean_cast and running_var_cast must not be None in eval moderf   )r   rC   r  r  rN   r,  r&  ru  r>   r  rS   ru  rn  r   r   r7   r  )'r  r   r   r  r  r!  r(  rD  r>  r  r  weight_dtyper  r  r  running_mean_castrunning_var_castsave_mean_castsave_invstd_castr  
input_rankr  num_featuresr   r&  rm  reduction_axesro  r   grad_output_sumdot_p	grad_mean
proj_scale
grad_scaleprojrW   rl  	grad_biasr9   s'                                         @r+   rp  rp  	  s    ++K||"33EKK@	 
	 ++KJA~I*VWWD[)*[->>LDF<6> !TUU $(8(@ V  !-34!"j 0N&t,N4 "N: %9!!!$% *$?DDii~>OIImzD'89>JE./E~VI/		%$,0J
 3FNKcQ
3[ .

 T!Z/$t+y8JF
"Z/
1~fn1~#		 	k"K.I|, r*   c
                    t        | |||||||||	
      }|
||f}t        |      D ]2  \  }}|	t        ||   |j                         t	        |||   d       4 |S r  )rp  r  r   r  r   )r  r   r   r  r  r!  r(  rD  r>  r  rH  rI  r  r  rW   ro  rI   s                    r+   native_batch_norm_backward_outr  	  s    " (F d#J&! Q1=jmQWW5Q
14PQ
 r*   save_varc                 B    t         j                  || |||||d|g d
      S NT)TTTr   rp  )r   rk   r   r  r  r!  r  ri  s           r+   miopen_batch_norm_backwardr  
  s5     ** r*   reserveSpacec	                 B    t         j                  || |||||d|g d
      S r  r  )	r   rk   r   r  r  r!  r  ri  r  s	            r+   cudnn_batch_norm_backwardr  6
  s5     ** r*   c                    | j                   | j                  t              t        j                  dv fd       | j                  dd  D ]  }t        j                  |dk7  fd         d   |d   z  dk(  rxd   |d   z  dk(  rjt        d t        dd  |      D              }t        d t        dd  ||      D              }t        j                  j                  j                  | ||      S d	 d
 fd} |d   |d         \  }}}}	 |d   |d         \  }
}}}| dt        |d      |
f   }|	s|st        j                  |d      S d } |||||	d      \  }} |||||d      \  }}d }t        t        |j                  d         t        |j                  d               D ]!  \  }}||d|d d |f   }||d|d d |f   z   }# |||z  z  S )Nr  c                      d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r)   r}  s   r+   r   z%adaptive_avg_pool2d.<locals>.<lambda>[
  s    KD6R r*   rZ  r   c                  "    dt                dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape r  r  r  s   r+   r   z%adaptive_avg_pool2d.<locals>.<lambda>`
  s     99>uaI r*   rR   c              3   ,   K   | ]  \  }}||z    y wr4   r)   )r  ro  r.  s      r+   r  z&adaptive_avg_pool2d.<locals>.<genexpr>f
  s     G$!QqAvG   c              3   :   K   | ]  \  }}}||d z
  |z  z
    ywr!   Nr)   )r  ro  r.  r  s       r+   r  z&adaptive_avg_pool2d.<locals>.<genexpr>g
  s'      
 '1aAQ!O
s   c                 8    t        j                  | |z  |d      S )Ntruncrounding_moder>   divr  r  r  s      r+   start_indexz(adaptive_avg_pool2d.<locals>.start_indexl
  s    yyQ99r*   c                 J    t        j                  | dz   |z  |z   dz
  |d      S )Nr!   r  r  r  r  s      r+   	end_indexz&adaptive_avg_pool2d.<locals>.end_indexo
  s&    yy!a%1q1,awGGr*   c                    t        j                  |t         j                        } |||       }| |z  dz   }| |z  }|dk(  xs ||z  dk(   }|r|dz  }n
|dk(  r|dz  }t        j                  |t         j                        }|j                  d      |z   }|rUt        j                  | dz
  |j
                  |j                        }	t        j                  ||	      } |||       }
|
|z
  }n|}||||fS )Nr  r!   r   rR   r  )r>   r  r  rT   r]  r   r  minimum)in_sizeout_sizeorangei0	maxlengthin_size_modadaptive	range_maxr  maxvali1lengthr  r  r  s               r+   compute_idxz(adaptive_avg_pool2d.<locals>.compute_idxr
  s   hvU[[I73 x'!+	(#q(GH{,Ba,GHNIANILL6M	ll2* ((!399SZZF --V,C 68W5B"WFFFIx//r*   .rD  )r   rR   r   c                     t        |t              r| |fS |dk\  rt        d|       ||j                  d      k\  }|dk(  rt	        |d      }t        j                  | |d      } t	        ||       }| |fS )Nr   z)dim should be negative when masking, got rR   rZ  rD  r   )r6   r   r,  rT   rV   r>   ri  )valsr  r  r  rN   r  s         r+   
maybe_maskz'adaptive_avg_pool2d.<locals>.maybe_mask
  s    fg&< ax$'PQTPU%VWW 0 0 44Dby(q1$$T45D&vt4F<r*   )r  rN   r   )r  r  r  r>   r   r  r  nnr  
avg_pool2drV   r   r
   rS   )r   r  r  rv  kernelr  idxhlength_hrange_max_h
adaptive_hidxwlength_wrange_max_w
adaptive_wr  r  retro  jr  r  r}  r  r  s                      @@@@@r+   adaptive_avg_pool2dr  Q
  s.   
 \\FKKEu:D	LLR [[ 
FI	

 Ry;r?"a'E"IB,G1,LG#eBCj+*FGG 
+.uRSz;+O
 
 xx""--eVVDD:H0@ /:%)[QS_.U+D(K.9%)[QS_.U+D(K'a0$67Djzz$H--    hjbND(  hjbND(
 Cdjjn-uTZZ^/DE +1;sAq!|$CS!Q\**C	+
 (X%&&r*   c                    t        j                  d| d       t        | j                  d |        t        |      z   }t	        d |D              r| j                  |      S t        t        j                  | j                  d |        }t        t        j                  |      }dg| j                  z  }| j                  d |  |d |  |t        j                  || j                        j                  |      |z  z   j                  d      }| j                  |      }	t        j                  |	j                  d      |g| j                  d      d      j                  |	j                        S )	Nmax_unpoolingd_forward_outc              3   &   K   | ]	  }|d k(    ywr  r)   )r  r  s     r+   r  z _max_unpoolnd.<locals>.<genexpr>
  s     
(a16
(r  r!   r  rR   Fr$  )rC   alert_not_deterministicru  r  anyrm  r   operatorr   r}  r   r  r  r  r&  r'  )
rz   r  r  rN   output_shapenchwindices_nc_shapeindices_flatr  s
             r+   _max_unpoolndr  
  s=    
!!M#m"DE

5SD)*T+->>L

(<
((~~l++	djj3$/	0B	k	*BsTYY"jj3$/Usd$++b+5::;KLrQQgbk  ^^L)F!!r\NDLL,< " 
d6<<r*   c                     t        j                  j                  t         j                  k(  fd       t        j                  t	              dk(  fd       t        j                   j
                  dv  fd       t        j                   j                  j                  k(   fd       t        d j
                        D ].  t        j                   j                        dkD   fd	       0 t         d      S )
Nc                  "    d j                    S )Nz2elements in indices should be type int64 but got: r   )r  s   r+   r   zmax_unpool2d.<locals>.<lambda>
  s    DW]]OT r*   r"   c                  "    dt                dS )NzMThere should be exactly two elements (height, width) in output_size, but got 
 elements.r  r  s   r+   r   zmax_unpool2d.<locals>.<lambda>
      ;'(
4 r*   r  c                  $    d j                    dS )NzLInput to max_unpooling2d should be a 3d or 4d Tensor, but got a tensor with  dimensions.r}  r   s   r+   r   zmax_unpool2d.<locals>.<lambda>
  s    %%)YYK|= r*   c                  <    dj                    d j                    S NzBExpected shape of indices to be same as that of the input tensor (z%) but got indices tensor with shape: r  )r  rz   s   r+   r   zmax_unpool2d.<locals>.<lambda>
  s,    PQUQ[Q[P\ ]229--B r*   r!   r   c                  *    dj                    d  dS )NzZmax_unpooling2d(): Expected input to have non-zero size for non-batch dimensions, but got  with dimension  being empty.r  )ro  rz   s   r+   r   zmax_unpool2d.<locals>.<lambda>
  s%    ::,&6qcH r*   )
r>   r   r   r  r  r}  r  rS   r.  r  )rz   r  r  ro  s   ```@r+   max_unpool2dr  
  s     
LL$T 
LLKA	
 
LL		V	
 
LL

gmm#	
 1dii  
IIaL1	

 wQ77r*   c                     t        j                  j                  t         j                  k(  d        t        j                   j                  dv  fd       t        j                  t              dk(  fd       t        j                  t              dk(  fd       t        j                  t              dk(  fd       t        j                   j                  j                  k(   fd       t        d	 j                        D ].  t        j                   j                        d
kD   fd       0 t        j                  d
   d
kD  xr d	   d
kD  xr d   d
kD  fd       t         d      S )Nc                       y)Nz(elements in indices should be type int64r)   r)   r*   r+   r   zmax_unpool3d.<locals>.<lambda>  r  r*   rD  r  c                  $    d j                    dS )NzLInput to max_unpooling3d should be a 4d or 5d Tensor, but got a tensor with r  r}  r   s   r+   r   zmax_unpool3d.<locals>.<lambda>  s    ^_d_i_i^jjvw r*   r   c                  "    dt                dS )NzVThere should be exactly three elements (depth, height, width) in output_size, but got r  r  r  s   r+   r   zmax_unpool3d.<locals>.<lambda>  r  r*   c                  "    dt                dS )NzRThere should be exactly three elements (depth, height, width) in stride, but got: r  r  rv  s   r+   r   zmax_unpool3d.<locals>.<lambda>  s    dehioepdqq{| r*   c                  "    dt                dS )NzSThere should be exactly three elements (depth, height, width) in padding, but got: r  r  )r  s   r+   r   zmax_unpool3d.<locals>.<lambda>"  s    efijqfress}~ r*   c                  <    dj                    d j                    S r  r  )r  r   s   r+   r   zmax_unpool3d.<locals>.<lambda>&  s,    PQVQ\Q\P] ^229--B r*   r!   r   c                  *    dj                    d  dS )NzZmax_unpooling3d(): Expected input to have non-zero size for non-batch dimensions, but got r  r  r  )ro  r   s   r+   r   zmax_unpool3d.<locals>.<lambda>/  s%     ;;-'7s-I r*   r"   c                      d  S )Nz5strides should be greater than zero, but got stride: r)   r  s   r+   r   zmax_unpool3d.<locals>.<lambda>8  s    GxP r*   )
r>   r   r   r  r}  r  r  rS   r.  r  )r   r  r  rv  r  ro  s   `````@r+   max_unpool3dr    sD    
LL$&X 
LL

fw 
LLKA	
 
LLFq| 
LLG~ 
LLw}}$	
 1ejj! 
JJqMA	

 
LLq	A9&)a-9F1IMP
 a88r*   )rl   rw  c                $    t        | |||d|      S )NTinplacerl   
_index_addr8   rN   r  rw  rl   s        r+   
index_add_r  >  s     aeVTGGr*   c                $    t        | |||d|      S )NFr  r  r  s        r+   	index_addr  J  s     aeVU%HHr*   r  c                6   t        j                  | j                        t        j                  j                  dk  fd       j                  dk(  rj                  d      nd|j                  dkD  r|j                        ndt        j                  k(  fd       dk7  ret        j                  | j                        t        j                  t        u xs t        j                  t                    fd       |z  }| j                  dk(  }|r| j                  d      n| }dz  fz   }|rt        j                  nt        j                  }	 |	|||d      }
|r| S |r|
j                  d      S |
j!                         S )	Nr!   c                  $    d j                    dS Nz(Index should have dimension 1 or 0 (got r?  r}  r  s   r+   r   z_index_add.<locals>.<lambda>c      :5::,aH r*   r   c                      d d d S )NzNumber of indices (z') should be equal to tensor.size(dim) (z), for dim=r)   )rN   
index_sizerx  s   r+   r   z_index_add.<locals>.<lambda>i  s!    %j\1XYdXeeqmplrs r*   c                  (    dt                d dS )Nzalpha argument of type z cannot be safely cast to type !)r  )rl   python_types   r+   r   z_index_add.<locals>.<lambda>p  s    -d5k]:YZeYffgh r*   r4   Tr$  )rC   canonicalize_dimsr}  r>   r   r.  dtype_to_typer   r  is_weakly_lesser_typer  rT   r   
index_put_	index_putr  r  )r8   rN   r  rw  r  rl   zero_dimrU  r  r  r   r  r  rx  s    ``  `     @@@r+   r  r  W  sM    
!
!!&&#
.C	LL

aH #(**/AqJ&,kkAo&++c"1K	LLz!s z))!''24 E**4;Dh	

 %vv{H#QB
C-5(
"C#*I
BV
5C!)s{{1~?s~~/??r*   c           
         t        j                  t        |       dkD  d        t        |       }| d   j                         }|dd  }t	        d | D              }|r||f}n||f}||z   }| d   j                  ||      }dt        |      z  }	t        |      D ]j  }
| |
   }t        j                  ||	d||j                  d      z
  fz   |      }|rt        j                  ||d|
      }Rt        j                  ||d|
      }l |S )Nr   c                       y)Nz#received an empty list of sequencesr)   r)   r*   r+   r   zpad_sequence.<locals>.<lambda>  r  r*   r!   c              3   >   K   | ]  }|j                  d         ywr  r.  )r  r8   s     r+   r  zpad_sequence.<locals>.<genexpr>  s     /!&&)/s   )r   r   rN   r  )
r>   r   r  r.  r   r   rS   r   rs  r  )	sequencesbatch_firstpadding_valuesequences_sizemax_sizetrailing_dimsmax_lenout_dimsr   dim_paddingsro  currseqrows                r+   pad_sequencer    s#    
LLY!#%RS^N|  "HQRLM/Y//G"G,^,-'H
A,

-
8CC..L>" @A,""\Q',,q/(A$BBM
 %%c3AQ%?C%%c3AQ%?C@ Jr*   c                 "    t        | |||d      S )NTr  _index_copyr8   rN   r  rw  s       r+   index_copy_r    s    q#ufd;;r*   c                 "    t        | |||d      S )NFr  r  r  s       r+   
index_copyr    s     q#ufe<<r*   c                   t        j                  | j                  |      }t        j                  j                  dk  fd       | j                  dk(  }|r| j                  d      n| }j                  dk(  rj                  d      nd|z  fz   }|rt        j                  nt        j                  } ||||      }	|r| S |r|	j                  d      S |	j                         S )Nr!   c                  $    d j                    dS r  r}  r  s   r+   r   z_index_copy.<locals>.<lambda>  r  r*   r   r4   )rC   r  r}  r>   r   rT   r   r  r  r  r  )
r8   rN   r  rw  r  r  rU  r  r  r   s
     `       r+   r  r    s     
!
!!&&#
.C	LL

aH
 vv{H#QB"'**/EOOAuE
C-5(
"C#*I
BV
$C!)s{{1~?s~~/??r*   c                 *   t        j                  | j                  d      |       }t        j                  t        j                  |              }| j
                  s| j                  r| j                  d      }n|}|t        j                  |      z
  |fS )Nr)   r  )r>   r  rm  rg   r   r  is_xpurH  )rz   r   ri   r   s       r+   log_sigmoid_forwardr    sn     --r*D
1C		599T?"#A||t{{%Q''r*   lowhighrV  c           	          t        j                  | j                  | j                         t	        |      t	        |      | j
                  | j                  |      S )N)rv  r  r  r   r  rV  )prims_uniform_helperr  rv  r   r   r  )r8   r  r  rV  s       r+   uniformr"    sF       	xxzcNt_ggxx r*   c                 <    | j                  t        | |||            S r4   )r  r"  )rz   r  r  rV  s       r+   uniform_r$    s    ::gdCy9::r*   c                 "   t        |       dz
  }|>t        j                  |d u d        t        j                  t        |      |k(  d        |S |t        j                  |d u d        t        j                  t        |      |k(  d        g }t        |      D ]Z  \  }}t	        |      |k(  r$|j                  | |dz      t	        |      z         8|j                  t        | |dz      |z               \ |S t        j                  dd        y )Nr"   c                       yNz9Must specify exactly one of output_size and scale_factorsr)   r)   r*   r+   r   z.upsample_compute_output_size.<locals>.<lambda>  r  r*   c                       yN r)   r)   r*   r+   r   z.upsample_compute_output_size.<locals>.<lambda>  r  r*   c                       yr'  r)   r)   r*   r+   r   z.upsample_compute_output_size.<locals>.<lambda>  r  r*   c                       yr)  r)   r)   r*   r+   r   z.upsample_compute_output_size.<locals>.<lambda>  r  r*   Fc                       yr'  r)   r)   r*   r+   r   z.upsample_compute_output_size.<locals>.<lambda>  r  r*   )r  r>   r   r  r  ru  r   )r5  r  scale_factorsspatial_dimensionsro  r  s         r+   upsample_compute_output_sizer0    s   Z1,T!O	
 	S%);;ZH 4O	
 	S'+==zJm, 	CDAq1v{"":a!e#4s1v#=>""7:a!e+<q+@#AB		C
 	LLRr*   c                     | y | |   S r4   r)   )scalesr  s     r+   get_scale_valuer3    s    ~#;r*   r.  c                 x    t        | j                         ||      }|r|nd gt        |      z  }t        | ||      S r4   r0  r.  r  _upsample_nearestr   r  r.  osizer2  s        r+   _upsample_nearest_vecr9    s@     ){MRE&TFSZ,?  UE622r*   c                 |    t        | j                         ||      }|r|nd gt        |      z  }t        | ||d      S NTexactr5  r7  s        r+   _upsample_nearest_exact_vecr>    sB     ){MRE&TFSZ,?  UE6>>r*   c                    g }t        |      }|rdnd}t        |      D ]  }||   }| j                  | |z      }	||   ||   dkD  r|	|	||   z  z  n|	|z  }
t        j                  |t        j
                  | j                        }||z   |
z  j                  t        j                        }t        |dz
  |z
        D ]  }|j                  d      } |j                  |        |S )Nr   r   r   r  r!   rR   )r  rS   r  r>   r  r   r  r7   r  rT   ru  )r   r  r2  r=  r  num_spatial_dimsr  r  r8  isizerm   output_indicesinput_indicesrU   s                 r+   !_compute_upsample_nearest_indicesrD  -  s    G;'SsF#$ & A--12
 ay$Q UVAY&' 	 e5==V(61U:>>u{{K'!+a/0 	8A)33B7M	8}%7&8 Nr*   )preserve_memory_formatr  r2  c                     t        | ||g      S r4   r6  r   r  r2  s      r+   upsample_nearest1drI  S  s     UK&::r*   c                 "    t        | ||gd      S r;  rG  rH  s      r+   upsample_nearest_exact1drK  _  s     UK&FFr*   scales_hscales_wc                      t        | |||g      S r4   rG  r   r  rL  rM  s       r+   upsample_nearest2drP  m  s     UK(H1EFFr*   c                 $    t        | |||gd      S r;  rG  rO  s       r+   _upsample_nearest_exact2drR  z  s     UK(H1ETRRr*   scales_dc                 "    t        | ||||g      S r4   rG  r   r  rS  rL  rM  s        r+   upsample_nearest3drV    s     UK(Hh1OPPr*   c                 &    t        | ||||gd      S r;  rG  rU  s        r+   _upsample_nearest_exact3drX    s!     {Xx:$ r*   r=  c                 D   t        | |||      }d d g|z   }t        j                  | |      }|j                  dk(  rdt	        j
                  |       }| j                  d   }| j                  j                  dk(  r|dk  rt        j                  }|j                  |      }|S )Nr<  rD  r!   cudarX  )rD  r   _unsafe_indexr}  rC   r   r  r  r  r>   r\  r  )	r   r  r2  r=  spatial_indicesr  r  rY  
n_channelss	            r+   r6  r6    s     8{F%O Tl_,Gw/F{{a33E: [[^
<<&:>!33M"""?Mr*   c           	          |r|rd}n|rd}n|rd}nd}t        |       |z  dk7  rt        dt        |        d|       t        dt        |       |      D cg c]  }t        | |||z           c}S c c}w )Nr  rD  r   r"   r   zlen(params)=z  is not divisible by group_size=)r  r,  rS   r  )params
has_biaseshas_projections
group_sizero  s        r+   gather_paramsrc    s    o
	
	


6{Z1$3v;-'G
|T
 	
 49CK3T./fQZ()  s   A2c                 ~    |r'| d|z     |d|z     }}| d|z  dz      |d|z  dz      }}n| |   ||   }}d\  }}||||fS )Nr"   r!   NNr)   )r_  hiddensro  bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddens           r+   params_hiddensrl    sk    !'AAJ
%+AEAI%6A	8Jl!'GAJJ
%/"lz<==r*   c                     ||k  rt        d| d| d      |j                  | j                  d|||z
               | j                  dd|      S )Nlast_batch_size (z) must be > batch_size (r?  r   )r,  ru  r/  )ri  last_batch_size
batch_sizerf  s       r+   update_hidden_for_packedrq    sc    *$00HTUV
 	
 NN:$$Q
Oj4PQRQ:..r*   c           	          ||k(  r| S ||k\  rt        d| d| d      t        j                  | |j                  d|||z
        f      S )Nrn  z) must be < batch_size (r?  r   )r,  r>   concatr/  )ri  ro  rp  
inp_hiddens       r+    update_hidden_for_packed_reverseru    sn     *$*$00HTUV
 	
 <<a*2NO	
 r*   c           	      X   |d   }|d   }|r|d   nd }	|r|d   nd }
g }g }|r|d   n|d   }|j                  dd|      }t        j                  | t        |            }|r|d d d   }|D ]V  } | j                  d   }||k(  rn|rt        ||||      }nt        ||||      } || |||	||
      }|}|j                  |       X |r|j                          n!|j                  |       |j                          t        j                  |d      }|st        j                  |d      n|}||fS )Nr   r!   r"   r   rR   )
r/  r>   r  ru  r  ru  rq  ru  reverser0  )inphiddenr_  r`  	hidden_fnbatch_sizesrw  	ih_weight	hh_weightih_biashh_biasstep_outputrf  ro  ri  	split_inpro  r   
hidden_outs                      r+   one_layer_rnn_datar    sR    q	Iq	I%fQi4G%fQi4GK"$G)0k"ok!nOq!_5JCk!23IddO	 'IIaLa9OQJ 2OQJ sJ	7IwW
:&#'& z"
))K
#C.57A&:J
?r*   c                       fd}|S )Nc                 D     t        j                  |||      | z         S r4   r   linearro  ri  r|  r~  r}  r  nonlinearitys         r+   rJ   zrnn_cell.<locals>.inner+  s     AHHZGDqHIIr*   r)   r  rJ   s   ` r+   rnn_cellr  *  s    J Lr*   c                       fd}|S )Nc                 r    t        j                  | ||      }  t        j                  |||      | z         S r4   r  r  s         r+   rJ   zrnn_cell_data.<locals>.inner2  s2    HHQ	7+AHHZGDqHIIr*   r)   r  s   ` r+   rnn_cell_datar  1  s    J Lr*   c           	      x   |d   }|d   }|r|d   nd }|r|d   nd }	t        j                  | ||      }
|r|
j                  d      n|
}
|j                  d      }g }|
D ]   } |||||||	      }|j	                  |       " |r|j                          t        j                  |d      }||j                  d      fS )Nr   r!   r"   r   )	r   r  fliprT   ru  rw  r>   r0  r  )rx  ry  r_  r`  rz  rw  r|  r}  r~  r  precomputed_inputri  r  ro  r   s                  r+   one_layer_rnnr  9  s    q	Iq	I%fQi4G%fQi4Gi95<)..q1BS!!!$JK 'q*i)WU
:&' 
))K
#C
""1%%%r*   c                    |d   }|d   }|r|d   }|d   }nFt        j                  |j                               }t        j                  |j                               }|d   j                  d      }	|d   j                  d      }
g }d}|	j                  d      }d}d}d}d}| j	                         } |	j	                         }	|
j	                         }
t         j
                  j                  j                  j                  | |||||	|
|||||||||      }|d   |d   |d   }}}||j                  d      |j                  d      ffS )Nr   r!   r"   r   F)
r>   r  r.  rT   r  r  r   mkldnn_rnn_layerr  r  )rx  ry  r_  r`  rw  w0w1w2w3hxcxr{  modehidden_size
num_layersrg  r  rD  outputsrY   hycys                         r+   mkldnn_one_layer_lstmr  O  sS   	B	BAYAY[[#[[#			Q	B			Q	BKD''!*KJ MKE ..
C	B	Biinn--55





!G$ 
GAJ
2rArzz!}bjjm,,,r*   c
                    |r| j                  dd      n| } g }
t        |      D ]  }t        ||||      \  }}}}|r
||dz
  k  r|nd} |	| |||      \  }}|
j                  |       |r! |	| |||d      \  }}|
j                  |       |r*t	        j
                  |g|j                         dz
        } n|} |dk7  s|s||dz
  k  st	        j                  | |d      }  |r| j                  dd      n| } | |
fS )Nr   r!   r   T)rw  )rD  )	transposerS   rl  ru  r>   r0  rN   rG  )r   ry  r_  r`  r  rG  rD  rg  r  layer_fnfinal_hiddensro  rh  ri  rj  rk  fwd_inp
fwd_hiddenbwd_inp
bwd_hiddens                       r+   _rnn_helperr    s"    &1EOOAq!eEM: >=KFA}>
:
Jl $
QU(:'&uj*jQZ("*|\:t#GZ   ,IIw0'++-!2CDEEa<Ea*q.&8MM%=E)>, &1EOOAq!eE-r*   c	                     |j                  d      }	t        ||d      }t        | |	|||||||t        t        t        t        j                              
      \  }
}|
t        j                  |d      fS Nr   Frz  )	unbindrc  r  r   r  r  r>   r   stackr   r  r_  r`  r  rG  rD  rg  r  ry  r   r  s               r+   rnn_tanh_inputr    t     YYq\F6:u5F$%**)=>C M1---r*   c	                     |j                  d      }	t        ||d      }t        | |	|||||||t        t        t        t        j                              
      \  }
}|
t        j                  |d      fS r  )	r  rc  r  r   r  r  r>   r  r  r  s               r+   rnn_relu_inputr    r  r*   c	                     |j                  d      }	t        ||d      }t        | |	||||||dt        t        |t        t        j                              
      \  }
}|
t        j                  |d      fS Nr   Fr{  rz  )	r  rc  r  r   r  r  r>   r  r  datar{  r  r_  r`  r  rG  rD  rg  ry  r   r  s               r+   rnn_relu_datar    {     YYq\F6:u5F$##EJJ/	
C  M1---r*   c	                     |j                  d      }	t        ||d      }t        | |	||||||dt        t        |t        t        j                              
      \  }
}|
t        j                  |d      fS r  )	r  rc  r  r   r  r  r>   r   r  r  s               r+   rnn_tanh_datar    r  r*   c                 l   t        j                  |||      | z   }|j                  d|      }|d   j                         }	|d   j                         }
|d   j	                         }|d   j                         }|
|z  |	|z  z   }||j	                         z  }||nt        j                  ||d       }||fS )NrD  r   r!   r"   r   r   r  chunkr   r   )rx  r  r  r}  r  	hr_weight	chunk_dimgateschunked_gatesin_gateforget_gate	cell_gateout_gater  r  s                  r+   	lstm_cellr  .  s    HHRG,s2EKK9-MA&&(G"**,Ka %%'IQ'')H	r	Wy0	1B	BGGI	B ahhr9d&CBr6Mr*   c           
      (   |d   }|d   }|r|d   nd }|r|d   nd }t        |      dk(  r|d   nt        |      dk(  r|d   nd }	|d   j                  d      }
|d   j                  d      }t        j                  | ||      }|r|j	                  d      n|}g }|D ](  } t        | |
||||	d      \  }
}|j                  |
       * |r|j                          t        j                  |d      }||
j                  d      |j                  d      ffS )Nr   r!   r"   r   r  rD  r  )r  rT   r   r  r  r  ru  rw  r>   r0  r  )rx  ry  r_  r`  rw  r|  r}  r~  r  r  r  r  r  r  r   s                  r+   one_layer_lstmr  <  s*   q	Iq	I%fQi4G%fQi4G[A%q	Fq8H6!9d  
		Q	B			Q	Bi95<)..q1BSK  3B	7IQRSB2 
))K
#CA

1...r*   c           
         |d   }|d   }|r|d   nd }|r|d   nd }	t        |      dk(  r|d   nt        |      dk(  r|d   nd }
g }g }|r|d   n|d   }t        j                  | t        |            }|r|d d d   }|d   }|d   }|j	                  dd|      |j	                  dd|      }}|D ]  } | j
                  d   }t        j                  | ||      } ||k  ra|j                  |j	                  d|||z
        |j	                  d|||z
        f       |j	                  dd|      |j	                  dd|      }}||kD  rXt        j                  ||j	                  d|||z
        fd      }t        j                  ||j	                  d|||z
        fd      }t        | ||||	|
d      \  }}|}|j                  |        |r|j                          ||f}nZ|j                  ||f       |j                          t        | \  }}t        j                  |d      t        j                  |d      f}t        j                  |d      }||fS )	Nr   r!   r"   r   r  rD  rR   r  )r  r>   r  ru  r/  r  r   r  ru  rs  r  rw  r  r0  )rx  ry  r_  r`  r{  rw  r|  r}  r~  r  r  r  rf  ro  r  orig_hxorig_cxr  r  ro  r  hidden0hidden1r   s                           r+   one_layer_lstm_datar  W  sy   q	Iq	I%fQi4G%fQi4G[A%q	Fq8H6!9d  KG)0k"ok!nOCk!23IddO	QiGQiGq!_-q!_- 	B
  IIaLhhsIw/ NNIIaOa$78IIaOa$78 YYq!Q'1a);B W^^AO8KLMqB W^^AO8KLMqB 3B	7IQRSB236 "X
Bx =YYw*EIIgq,AA

))K
#C
?r*   c                 4    d } || ||      rt         S t        S )a*  Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._get_mkldnn_enabled()`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c                 N   t         j                  j                         sy| gt        |      z   t        t	        j
                  |            z   }|D ch c]  }|j                   }}t        |      dk7  ry|j                         }|t        j                  d      k7  ry|D ch c]  }|j                   }}|D ]&  }|t         j                  t         j                  fvs& y | j                  ry|d   j                  d      |d   j                  d      k7  }	|	ryyc c}w c c}w )NFr!   r  r   r"   T)r>   r?  _get_mkldnn_enabledru  r	   from_iterabler  r  popr   r  bfloat16requires_gradr.  )
r   r  r_  rp  tdevicesr  dtypesr   ra  s
             r+   
use_mkldnnz2select_one_layer_lstm_function.<locals>.use_mkldnn  s    xx++-'DH$tE,?,?,G'HH%,-188--w<1U\\%((#*+a!''++ 	EU[[%..99	 Q%**Q-2a5::a=8) . ,s   D#D")r  r  )r   r  r_  r  s       r+   select_one_layer_lstm_functionr    s!    : %V$$$r*   c	                    t        |      dk7  rt        dt        |             t        |||d   j                  d      |d   j                  d      k7        }t	        t        |d   |d               }	t        | ||      }
t        | |	||||||||

      \  }}t	        t        |       }|t        j                  |d   d      t        j                  |d   d      fS )Nr"   $lstm expects two hidden states, got r   r!   )
r  r,  rc  r.  ru  r  r  r  r>   r  )r   r  r_  r`  r  rG  rD  rg  r  ry  r  r   r  s                r+   	lstm_implr    s     2w!|CCG9MNN6:r!uzz!}1

1/MNF#beRU#$F-eR@H$C m,-MM!,a0%++mA>NPQ2RRRr*   c	                    t        |      dk7  rt        dt        |             t        |||d   j                  d      |d   j                  d      k7        }t	        t        |d   |d               }	t        | |	||||||dt        t        |      
      \  }
}t	        t        |       }|
t        j                  |d   d      t        j                  |d   d      fS )Nr"   r  r   r!   F)r{  )r  r,  rc  r.  ru  r  r  r   r  r>   r  r  s               r+   lstm_data_implr    s     2w!|CCG9MNN6:r!uzz!}1

1/MNF#beRU#$F$#=C m,-MM!,a0%++mA>NPQ2RRRr*   c                 &   | j                  dd      }t        j                  |||      j                  dd      }|d   |d   z   j                         }|d   |d   z   j                         }	|d   |d   |z  z   j	                         }
||
z
  |	z  |
z   S )Nr   r!   r"   r   )r  r   r  r   r   rx  ri  r|  r~  r}  r  chunked_igateschunked_hgates
reset_gate
input_gatenew_gates              r+   gru_cellr    s    YYq!_NXXj)W=CCAqIN #nQ&77@@BJ #nQ&77@@BJq!^A%6%CDJJLH!Z/(::r*   c                 P   t        j                  | ||      j                  dd      }t        j                  |||      j                  dd      }|d   |d   z   j                         }|d   |d   z   j                         }	|d   |d   |z  z   j	                         }
||
z
  |	z  |
z   S )Nr   r!   r   r"   r  r  s              r+   gru_cell_datar    s    XXc9g6<<QBNXXj)W=CCAqIN #nQ&77@@BJ #nQ&77@@BJq!^A%6%CDJJLH!Z/(::r*   c	                     t        ||d      }t        | |j                  d      ||||||dt        t        |t
              
      \  }	}
|	t        j                  |
d      fS )NFr   r  )rc  r  r  r   r  r  r>   r  )r  r{  r  r_  r`  r  rG  rD  rg  r   r  s              r+   gru_impl_datar     si     6:u5F$
		!"}UC M1---r*   c	                     t        ||d      }t        | |j                  d      |||||||t        t        t
              
      \  }	}
|	t        j                  |
d      fS )NFr   r  )rc  r  r  r   r  r  r>   r  )r   r  r_  r`  r  rG  rD  rg  r  r   r  s              r+   gru_implr  >  sf     6:u5F$
		!2C M1---r*   c                     t        | j                         ||      }t        |d      }t        |d      }t        j                  j
                  j                  | ||||      S Nr   r!   )r0  r.  r3  r>   r  r   _upsample_bilinear2d_aar   r  align_cornersr.  r8  scale_hscale_ws          r+   upsample_bilinear2d_aa_vecr  \  sV     ){MREmQ/GmQ/G99>>11umWg r*   c                     t        | j                         ||      }t        |d      }t        |d      }t        j                  j
                  j                  | ||||      S r  )r0  r.  r3  r>   r  r   _upsample_bicubic2d_aar  s          r+   upsample_bicubic2d_aa_vecr  h  V     ){MREmQ/GmQ/G99>>00umWg r*   c                     t        | j                         ||      }t        |d      }t        |d      }t        j                  j
                  j                  | ||||      S r  )r0  r.  r3  r>   r  r   _upsample_lanczos2d_aar  s          r+   upsample_lanczos2d_aa_vecr   t  r  r*   c                 z    t        | j                         ||      }|r|nd gt        |      z  }t        | |||      S r4   )r0  r.  r  _upsample_linear)r   r  r  r.  r8  r2  s         r+   _upsample_linear_vecr    s=     ){MRE+]$#e*1DFE5-@@r*   r  c                      t        | |||g      S r4   r  )r   r  r  rM  s       r+   upsample_linear1dr    s     E;zJJr*   c                 "    t        | ||||g      S r4   r  )r   r  r  rL  rM  s        r+   upsample_bilinear2dr    s     E;(?STTr*   c                 $    t        | |||||g      S r4   r  )r   r  r  rS  rL  rM  s         r+   upsample_trilinear3dr
    s!     {MHh+I r*   c                 L    |r|dkD  r| dz
  |dz
  z  S dS |
|dkD  rd|z  S | |z  S )Nr!   rf   r   r)   )r  r  r  rm   s       r+   _compute_scaler    sB    5=\#(S.1HqH#/EAIsU{U7XCUUr*   c                 &    |r| |z  S | |dz   z  dz
  S Nr   r)   )rm   	dst_indexr  s      r+   _compute_source_indexr    s$    y  	C(3..r*   weightsweights_precisionc                     t        d t        | |      D              d|dz
  z  z   }||z	  }t        j                  |dd      j	                  t        j
                        S )Nc              3      K   | ]F  \  }}|j                  t        j                        |j                  t        j                        z   H y wr4   )r7   r>   r9  )r  r  r  s      r+   r  z%_sum_tensors_uint8.<locals>.<genexpr>  s8      26!QU[[ADD--s   AAr!   r      )_sum_tensorsr  r>   r   r7   rE  )r  r  r  r  s       r+   _sum_tensors_uint8r    sd      :=c7:K 	
 1$	%'F ((F;;vq#&))%++66r*   c                     t        j                  |       j                         }d}t        j                  ||j                        }d|d|dz   z  z  z   }|dk\  }||j                         z
  S )N   r  r   r!   i   )r>   r  r   r  r  r   )r  
max_weightmax_weight_precision
precisionsvaluesr  s         r+   _compute_weight_precisionr    si    W%))+J2:;L;LMJ:zA~!677FgD$((*,,r*   c                 L     j                   d   } j                   dd  }t        |      }t        j                   t        j                  j
                        \  } fd}t        t        |||            D 	
cg c]  \  }	\  }
}} ||
|||dz
  |	z
         }}}
}	}t        t        |       \  }}}g }t        ddgg|z   D ]c  }d d gt        |      D cg c]  }||   dk(  r||   n||    c}z   }t        j                   |      }t        |      }|j                  |       e t        t        |            D ]p  }	||	   ||	   z
  j!                  dd      j#                        }t        |d d d   |dd d         D cg c]!  \  }}|t%        j&                  ||z
  |      z   # }}}r t        |      dk7  rt)        dt        |             |d   }t        j*                         } j,                  j.                  d	k(  r|d
k  rt$        j0                  }t3        |t$        j4                        s!t)        dt/        |      j6                         |j9                  |      } j;                         s|j=                         }|S c c}}}
}	w c c}w c c}}w )Nr!   r"   rT  c                 |   t        | |	|      }t        j                  |j                        j	                  
      }t        ||	      j                  d      } |j                  |j                  d   gdg|z   }|j	                  t        j                        }|dz   j                  | dz
        }|||fS )Nr  r   r   r   r   r!   r   )
r  r>   r  r  r7   r  r   r&  r  r  )inp_sizer  r2  nsqueezescale_factorro  x_f32r8   xp1r  r   r   s            r+   
get_valuesz$_upsample_linear.<locals>.get_values  s    %h-P LL%,,7:::G%lA}EKKPSKTekk!n@sh/?@HHU[[!1umm1m-a}r*   r   r   rf   z+Expected vs to have exactly 1 element, got rZ     z$Expected result to be a Tensor, got rX  )r  r  rC   rD   rV  INT_TO_FLOATr  r  ru  r
   rS   r   r[  r   ru  reversedr   r7   r>   r   r,  r   r  r  r\  r6   r   r#   r  rK  round)r   r  r  r2  r]  	inp_sizesn_dimsrU   r&  ro  r!  r  r  xs_f32xsxp1svsr  kr  vxscalev1v2r  rY  r   s   ` `                       @r+   r  r    s    QJABI^F''!AANNHAu
 09	;/0
 +A+(F 	8XvvzA~>F  CL)FB	B1vh'( TluV}U!qtqybed1g=UUuc*#Au-
		!	 eFm$ 
)be#**3477> b1gr!$Q$x0
 B 27F++
 

 2w!|J3r7)TUUUF //6M ||F"zB//fell+24<3H3H2IJ
 	
 ];F""$MY V
s   J
J&J r  r  c                 4    | j                   |j                   k(  S r4   r  )r  r  s     r+   is_same_sizer7  (  s    77aggr*   c                 .    t         j                  | |      S r4   )r   r  )r8   r  rE   s      r+   _reshape_aliasr9  -  s     99Qr*   c                 .    t         j                  | |      S r4   )r   r  )r8   r  s     r+   r[  r[  3  s    ::a!!r*   c                 2    t         j                  | |||      S r4   )r   r  )r8   r  r{   r%  s       r+   r'  r'  8  s    >>!WeZ88r*   c                    |D ]F  }|t        j                  |j                  t         j                  t         j                  fv d        H t        j                  |j                  t         j
                  k(  d        ddlm}  || j                         dk(        r<t         j                  j                  | |      }| j                  |j                  |      S t        t        |            D ]2  }||   }||j                  d| j!                  |      dz
        ||<   4 t"        j%                  | |      j'                  | |      S )Nc                       yNz3tensors used as indices must be long or int tensorsr)   r)   r*   r+   r   z&_unsafe_masked_index.<locals>.<lambda>C  r  r*   c                       yNz*tensors used as masks must be bool tensorsr)   r)   r*   r+   r   z&_unsafe_masked_index.<locals>.<lambda>H  r  r*   r   rR  r!   r  )r>   r   r   rh  r  r  rs  rS  r   _meta_registrationsmeta_index_Tensorr   r  rS   r  r   r.  r   r[  ri  )r8   r  r  fillr  rS  meta_resultro  s           r+   r  r  =  s    LL

EII66M 
LL

ejj <
 Eaggi1n%//AA!WMzz+++T223w<  ?
q	A>GAJ?
 a)55teTBBr*   c                 L   |D ]F  }|t        j                  |j                  t         j                  t         j                  fv d        H t        j                  |j                  t         j
                  k(  d        | j                         dk(  r| j                         S t        t        |            D ]B  }||   }||j                  | j                  |       | j                  |      dz
        ||<   D |j                  | d      }t        j                  | ||d      S )Nc                       yr>  r)   r)   r*   r+   r   z5_unsafe_masked_index_put_accumulate.<locals>.<lambda>_  r  r*   c                       yr@  r)   r)   r*   r+   r   z5_unsafe_masked_index_put_accumulate.<locals>.<lambda>d  r  r*   r   r!   r  Tr$  )r>   r   r   rh  r  r  r   r  rS   r  r   r.  ri  r   r'  )r8   r  r  r  r  ro  masked_values          r+   #_unsafe_masked_index_put_accumulaterI  Y  s     LL

EII66M 
LL

ejj <
 	wwyA~wwy3w<  H
!&&)QGGAJH
 %%teQ/L!!!Wlt!LLr*   c                    | j                         }d}|dk  rd}|6|dkD  r*dg|z  }|j                  d   ||<   |j                  |      }n|}| |z  } t        j                  ||k7  |d      }	|	j                  |      }
t        j                  | ||
      j                  |       }t        j                  ||k7  |d      }|t        j                  j                  k(  r|dkD  r| j                  dd      }||fS |lj                  | j                        }t        j                  |||
      j                  |      }t        j                  ||k7  |d      }|j                         }n"||k7  j                         j                  |       }|t        j                  j                  k(  r|j                         }||fS |t        j                   j                  k(  r|j                         |z  }||fS )Nr!   r"   r   r)   r   )rN   r  r  r>   rh   rT   gatherr  r    r&   r{   r   r  r   r7   r(   r'   )rz   r   r   r   r"  r,  r'  r  wr(  safe_target_r  r#  wsums                 r+   _nll_loss_forwardrO  s  s    XXZFKzA:E "(aE+E"AAax++f4fa@K((5L ll4l;CCKPPF[[</;FINN(((VaZ}}R-|##HHTZZ ||A{L9AA+N{{6\14;xxz,.33588>IMM''' < 
inn**	*,<r*   c                 p     j                         dkD  r j                         dk  st        d j                          d      j                         dkD  rt        dj                          d       j                         dk(  xr j                         dk(  }|s7t        j                   j                  d   j                  d   k(   fd        j                  d   }|A|j                         dk(  r|j                         |k(  st        d	| d
|j                         t         |||      S )Nr   r"   r:  r;  r!   r<  c                  >    d j                    dj                    dS )Nr=  r>  r?  r  )rz   r   s   r+   r   z"nll_loss_forward.<locals>.<lambda>  s     0Jv||nTUV r*   rR   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rN   r,  r>   r   r  r   rO  )rz   r   r   r   r"  rA  	n_classess   ``     r+   nll_loss_forwardrS    s#    HHJNtxxzQDTXXZLPQRSSzz|aOPVPZPZP\~]^_
 	
 88:?8vzz|q'8LJJqMV\\!_,V	

 

2I6::<1#499T=i[ I//5||n>
 	

 T669lKKr*   c                      t        | ||||      S r4   )rO  )rz   r   r   r   r"  s        r+   nll_loss2d_forwardrU    s     T669lKKr*   Ac                 0    |dz   | z  |dz   z
  | z  | z  dz   S )Nr"   r   r!   r)   r8   rV  s     r+   _upsample_cubic_convolution1rY    s(    UaK1q5!Q&*Q..r*   c                 <    || z  d|z  z
  | z  d|z  z   | z  d|z  z
  S )Nr     rD  r)   rX  s     r+   _upsample_cubic_convolution2r\    s0    UQU]a!a%'1,q1u44r*   r  c                    d}| j                   t        j                   d      k(  rt        j                  | d| z
  gd      }t        j                  | dz   d| z
  gd      }t        ||      }t	        ||      }t        j
                  |d      \  }}t        j
                  |d      \  }}	|||	|fS t        | dz   |      t	        | |      t	        d| z
  |      t        d| z
  |      fS )Ng      r  rf   r   r   r   )r  r>   r  r\  rY  r  )
r  rV  tt1tt2w03w12r  r  r  r  s
             r+    _upsample_get_cubic_coefficientsrb    s    Axx5<<&&kk1cAg,A.kk1s7C!G,!4*32*32cq)Bcq)B2r2~ )S!4(A.(q!4(q!4	
 	
r*   coeffstsc                 P    t        |      }t        d t        | |      D              S )Nc              3   ,   K   | ]  \  }}||z    y wr4   r)   r  r  r  s      r+   r  z+_upsample_cubic_interp1d.<locals>.<genexpr>  s     EHRREr  )rb  r  r  )rc  rd  coeffs2s      r+   _upsample_cubic_interp1dri    s$    .r2GEFG0DEEEr*   c                 6    t        t        j                  |       S r4   )r   r>   r  )rd  s    r+   r  r    s    %))R  r*   	num_stepsc                     | dk  rt        j                  d||      S |s| dz
  | z  nd}t        j                  | || ||      S )Nr!   r   r  )stepsr  r   )r>   rw  linspace)rk  r  r   r  r  s        r+   _linspace_from_neg_onero    sI     A~||AfE::-:)a-9	$A>>1"ayuMMr*   thetahrL  c                    | j                   }| j                  }t        ||||      j                  d|d      }t        ||||      j                  |dd      }t	        j
                  d||      }t        j                  j                  j                  |ddd      }t        j                  j                  j                  |ddd      }t        j                  j                  j                  |d	dd      }||z   |z   S )
Nr!   )r!   r!   r!   r  )r   r"   constantr   r  r  r{   r!   r!   )r"   r   	r   r  ro  r  r>   r  r  r  r  )	rp  rq  rL  r  r   r  grid_xgrid_ygrid_ones	            r+   _make_base_grid_4drz  	  s    KKE\\F $A}eVDII!QPQRF#A}eVDII!QPQRFzz)5@H XX  $$VjPQ$RFXX  $$VjPQ$RFxx""&&xV*TU&VHF?X%%r*   r  c                    | j                   }| j                  }t        ||||      j                  dd|d      }t        ||||      j                  d|dd      }t        ||||      j                  |ddd      }	t	        j
                  d||      }
t        j                  j                  j                  |ddd      }t        j                  j                  j                  |ddd      }t        j                  j                  j                  |	d	dd      }	t        j                  j                  j                  |
d
dd      }
||z   |	z   |
z   S )Nr!   )r!   r!   r!   r!   r  )r   r   rs  r   rt  )r!   r"   )r"   r!   )r   r   rv  )rp  r  rq  rL  r  r   r  rw  rx  grid_zry  s              r+   _make_base_grid_5dr}    s5   KKE\\F#A}eVDII!QPQSTUF#A}eVDII!QPQSTUF#A}eVDII!QPQSTUFzz,eFCH XX  $$VjPQ$RFXX  $$VjPQ$RFXX  $$VjPQ$RFxx""&&xV*TU&VHF?V#h..r*   c                     |\  }}}}t        | |||      }|j                  ddd      | j                  j                  d      z  j	                  d      }|j                  |||d      S )Nr  rR   r   r!   rZ  r"   )rz  r  r^  rT   r   )	rp  r.  r  r#  rU   rq  rL  	base_gridgrids	            r+   _affine_grid_generator_4dr  +  sg    JAq!Q"5!QmLI NN2q!$uxx'9'9!'<<AA"ED99Q1a  r*   c                     |\  }}}}}t        | ||||      }|j                  ddd      | j                  j                  d      z  j	                  d      }	|	j                  ||||d      S )Nr  rR   rD  r!   rZ  r   )r}  r  r^  rT   r   )
rp  r.  r  r#  rU   r  rq  rL  r  r  s
             r+   _affine_grid_generator_5dr  5  sm    MAq!Q"5!QOI NN2q!$uxx'9'9!'<<AA"ED99Q1a##r*   c                     t        j                  t        |      dv d        t        |      dk(  rt        | ||      S t	        | ||      S )Nr  c                       y)NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r)   r)   r*   r+   r   z'affine_grid_generator.<locals>.<lambda>E  r  r*   rD  r  )r>   r   r  r  r  )rp  r.  r  s      r+   affine_grid_generatorr  ?  sJ     
LLD	VU 4yA~(MRR(MRRr*   r  interpolation_modepadding_mode_expand_gridc           	          !"#$%&'()*+,- t        j                  dv fd       t        j                  dv fd       dt        dt        dt        ffd-dt        dt        d	t        dt        fd
+dt        dt        dt        f+fd dt        dt        dt        f -fd} j                  \  $%|j                  \  })*}|dk7  rt        d|       r(|j                  d)*|      j                  )*d      }dt        dt        dt        f$%fd&t        j                   j                        j                  ddd      t        j                   j                        j                  ddd      dt        dt        dt        dt        f&)*fddt        dt        dt        f fd"|d   }	|d   }
dk(  r ||	%      } ||
$      }|j                         |j                         c'('dz   (}}'(dz   }}||}}||z
  ||z
  z  }||z
  ||z
  z  }||z
  ||z
  z  }|'z
  |(z
  z  }t        "fd'(|f|||f|||f|||ffD              S dk(  r< ||	%      } ||
$      }|j                         }|j                         } "||d      S  -|	%      } -|
$      }|j                         '|j                         (|'z
  ,|(z
  }s",j                  d      ,|j                  d      }dt        dt        dt        f "$%fd#dt        dt        f#'(,fd!t        !fd t!        d!      D              }t#        ||      S )"N)r   r!   r"   c                      d  S )NzInvalid interpolation mode r)   )r  s   r+   r   z"_grid_sampler_2d.<locals>.<lambda>^  s    -.@-AB r*   c                      d  S )NzInvalid padding mode r)   )r  s   r+   r   z"_grid_sampler_2d.<locals>.<lambda>a  s    -B<.+Q r*   coordsr.  rO   c                 B    r|dz  dz
  n|dz  }|dz  dz
  }| |z  |z   S r  r)   )r  r.  r   ofsr  s       r+   unnormalizez%_grid_sampler_2d.<locals>.unnormalized  s8     %2tczCs
Sj3|c!!r*   	twice_low
twice_highc                 P   ||k(  rt        j                  |       S |dz  }||z
  dz  }| |z
  j                         }t        j                  ||      }||z  j	                         j                  t         j                        }t        j                  |dz  dk(  ||z   ||z   |z
        S )Nr"   r   r!   r   )r>   r	  r   fmodfloorr7   int8rh   )r  r  r  
coords_mincoords_spancoords2extraflipss           r+   reflect_coordinatesz-_grid_sampler_2d.<locals>.reflect_coordinateso  s    
"##F++]
!I-2J&++-

7K0;&--/222D{{AINEJ.j0H50P
 	
r*   c                     dk(  r| S dk(  rt        j                  | d|dz
        S r | dd|dz
  z        }n | dd|z  dz
        }t        j                  |d|dz
        S )Nr   r!   r"   rR   r   )r  r.  coords_reflectedr  r  r  s      r+   compute_coordinatesz-_grid_sampler_2d.<locals>.compute_coordinates{  sx    1MQ;;vq$(33#6vq!tax.#Q #6vr1t8a<#P ;;/D1H==r*   c                 (     | |      } ||      S r4   r)   )r  r.  	coords_unr  r  s      r+   compute_source_indexz._grid_sampler_2d.<locals>.compute_source_index  s    -	"9d33r*   r"   z4grid last dimension must be 2 (for x,y coords), got r!   r.  ysc                     t        j                  d| k  t        j                  | k  t        j                  d|k  |k                    S rr   r>   r  )r.  r  iHiWs     r+   in_bounds_condz(_grid_sampler_2d.<locals>.in_bounds_cond  sF      GU&&rBw0A0A!r'2PR70ST
 	
r*   r  wsc                      | |      rndt        	
fd| j                  t        j                        |j                  t        j                        |fD              S )Nr!   c              3   p   K   | ]-  }t        j                  |d       j                         / ywr  )r>   rh   r  )r  r  r  r  r  oHoWs     r+   r  z1_grid_sampler_2d.<locals>.clip.<locals>.<genexpr>  s7      
 KKa#((Ar26
s   36r   )r  r7   r>   r  )r.  r  r  r  r  r  r  r  r  r  r  s      @@r+   clipz_grid_sampler_2d.<locals>.clip  sY    b"%
 A1 
ee%++e.EKK0H"M
 
 	
r*   ixiyc                 8     	| ||      \  }}}||f   |z  S r4   r)   )
r  r  rL  idx_xidx_yw_C_idxN_idxr  r  s
         r+   get_summandz%_grid_sampler_2d.<locals>.get_summand  s0    B?ubue+,r11r*   ).r   ).r!   r   c              3   :   K   | ]  \  }}} |||        y wr4   r)   )r  r  r  rL  r  s       r+   r  z#_grid_sampler_2d.<locals>.<genexpr>  s(      
R B"
s   c                 <     |       } |      } ||d      S r[   r)   )r  r  r8   rY   r  r  r  r  s       r+   get_value_boundedz+_grid_sampler_2d.<locals>.get_value_bounded  s*    #B+A#B+Aq!Q''r*   r  c                     | dz
  z   } dz
  |       |       dz   |       dz   |      f}t        |      S )Nr!   r"   )ri  )r  iy_ofscsr  ix_nwiy_nwtxs      r+   	get_coeffz#_grid_sampler_2d.<locals>.get_coeff  s[    cAg&F!%!)V4!%0!%!)V4!%!)V4	B ,B33r*   c              3   .   K   | ]  } |        y wr4   r)   )r  r  r  s     r+   r  z#_grid_sampler_2d.<locals>.<genexpr>  s     :#y~:   rD  )r>   r   r   r  r  r,  r  r  r  r  r   r  r  r*  rT   r  rS   ri  ).r  r  r  r  r  r  r  rU   twor8   rY   r  r  ix_neiy_neix_swiy_swix_seiy_sew_nww_new_sww_se
ix_nearest
iy_nearesttyrc  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  s.   ` ````                     @@@@@@@@@@@@@@@@@@@r+   _grid_sampler_2dr  M  s    
LLi'B 
LL	!#Q"F "# "& "

F 

s 

 

PV 


>F 
># 
>& 
>4V 43 46 4 77LAq"bZZNAr2s
axB3%H
 	
  yyAr2s+221aRC
6 
v 
& 

 LL188,11!Q1=ELL188,11!Q1=E

 

V 

 

4F 

 

2 2F 2& 2 2
 	VAVAQ!!R(!!R(xxz288:uqy%ueaiueu
urz*U
urz*
rEz*U
rEz* 
 t$t$t$t$	 
 
 	
 
q	 !!R(!!R(XXZ
XXZ
:z155BB

%Z%ZaBaB	(& 	(f 	( 	( 	(
	43 	46 	4 	4 :q::'33r*   c                 "    t        | ||||      S )N)r  r  r  r  )r  )r  r  r  r  r  s        r+   grid_sampler_2dr    s      	-!# r*   c                 &    t        j                   j                         dk(  xr j                         dk(   fd       t        j                   j                  d      j                  d      k(   fd        z  j	                  d      S )Nr"   r!   c                  L    d j                          dj                          S )Nzmatrix @ vector expected, got r  r   rz   r  s   r+   r   zmv.<locals>.<lambda>  s!    0BswwykJ r*   r   c                  v    d j                  d       d j                  d       dj                  d       dS )Nzsize mismatch, got input (r   r8   r!   z), vec (r?  r  r  s   r+   r   zmv.<locals>.<lambda>  s<    ,TYYq\N!DIIaL>RURZRZ[\R]Q^^_` r*   r   )r>   r   rN   r.  r   r  s   ``r+   r  r  	  ss     
LL
a*CGGINJ 
LL		!#` 3J""r*   c                     |-|dz
  |z  dz   }d|z
  | z  |t        j                  |       z  z
  }nd|z
  | z  t        j                  |       z
  }|||z  }|j                  |j                        }t	        ||      S r[   )r   
logsigmoidr7   r   r   )rz   r   r   
pos_weightr   
log_weightr   s          r+    binary_cross_entropy_with_logitsr    s    
  1n.2
F
d"j1<<3E&EFF
d"Q\\$%77f} 776<< Di00r*   tensor1tensor2is_outc                   	 | j                   |j                   k\  r| |fn|| f\  }}ddlm	 |j                   dk\  r|j                   dk  sy|j                  r|sy| j                   dk(  ry 	|j	                         dk(        ry|j
                  }|j                         }dg}t        |dd        D ]  }|j                  ||d   z          t        	fd	t        |t        t        |            |      D              S )
Nr   rR  r   r"   FTr!   rR   c              3   V   K   | ]   \  }}} |d k(        xs  ||k(         " ywr  r)   )r  r   r   r.  rS  s       r+   r  zshould_fold.<locals>.<genexpr>D  s9      D% 	tqy!B^DEM%BBs   &))r}  rs  rS  r  r   r  rv  r)  ru  r  r  ru  )
r  r  r  t1t2t1_shape	t1_strideexpected_strider.  rS  s
            @r+   should_foldr  ,  s     $+<<7<<#?gwgwEWFBDGGqLRWW\	||qbhhjAo&xxH		I cO!"& ;tob&99:; !$tH_56"
  r*   )pass_is_out)r  c                	   ddl m}m} | j                         }|j                         }|dk(  s|dk(  rt	        d| d|       |dk(  r|dk(  rt        j                  | |      S |dk(  r|dk(  rt        j                  | |      S |dk(  rC|dk(  r>t        j                  t        j                  t        j                  | d      |      d      S |dk(  r|dk(  rt        j                  | |      S t        | ||      r>||kD  }|r|j                  n| }|s|n|dk(  r| j                         n| }	|j                  }
t        |
d d       }t!        t"        j$                  |      }|	j                         dk(  }|r|j'                  |	j                  d          |j)                  ||
d         }|rWt
        j*                  j,                  j/                  |j                  |	      |      }|r|j                  j1                         S |S t
        j*                  j,                  j/                  |j                  |	      |      S |dk\  rp|dk\  rj|dkD  r| j3                  d      nd}| j3                  d      }| j                  d d }|dkD  r|j3                  d      n|j3                  d      }|dkD  r|j3                  d      nd}g }t5        |dz
        D ]"  }|j'                  |j3                  |             $ |d	k(  r|d	k(  r{ ||d   |d   k7        rj ||d   dk(        r'| j6                  rt9        | j                  d      |      S  ||d   dk(        r'|j6                  rt9        | |j                  d            S t        t        j:                  ||            }|||gz   }t=        |      }| j?                  |      j)                  |||      }|dk(  }|r7||gz   }|j?                  |      j)                  ||      j                  d      }n)|||gz   }|j?                  |      j)                  |||      }|}|dkD  r|j'                  |       |dkD  r|j'                  |       |r/|jA                  |      j                  d      jC                  |      S |jA                  |      jC                  |      S t        jD                  d
d        y )Nr   )rS  guard_or_truez9matmul does not support 0-dimensional tensors, got dims: z and r!   r"   rR   rZ  r   Fc                       y)Nz/both arguments to matmul need to be at least 1Dr)   r)   r*   r+   r   zmatmul.<locals>.<lambda>  r  r*   )#rs  rS  r  rN   r,  r>   dotr  r  r  rT   r  r^  r  r  ru  r   r  r   ru  r&  r  r   _unsafe_viewr  r.  rS   r  r]  broadcast_shapesr&  r  bmmr  r   )r  r  r  rS  r  dim_tensor1dim_tensor2r  r  r  sizes_1r  folded_dim1t2_is_matrix	t1_foldedr  r#  m1batch_tensor1m2rR  batch_tensor2ro  expand_batch_portiontensor1_expand_sizeexpand_batch_producttensor1_expanded
vector_rhstensor2_expand_sizetensor2_expandeds                                 r+   r]  r]  L  s    T++-K++-Ka;!+G}TYZeYfg
 	
 aK1,yy'**		kQ.xx))		kQ.}}UXXeoogq&A7KQOO		kQ.xx))	Wgv	.  +-	$WZZ'$G+:J799;PW 	 ((GCRL)X\\<8 vvx1}, JJ{GBK8	 YY^^00b1A<PF-6699'')BFB99>>..y||B/?NN		kQ. !,aGLLQ\\"cr*!,qW\\"gll26F +aGLLQ#%{Q' 	2A  a1	2 1q mA.-2BBCmA.!349N9Ngooa0'::mA.!349N9Ngwq'9::  $""=-@ 
 3aW<#$89 #>>*=>FF !R
 !A%
"6""=23-r21  #7"a"@&~~.ABJJ$b!  ,?"?"#''(89AA"EJJ<XX#''(89>>|LLUUVr*   r  r  c                 n     j                   \  }}t        |d   ||      }t        |d   ||      }t        j                   t        j                  j
                        \  }}t        j                  |d    j                        j                  |      }	t        j                  |d    j                        j                  |      }
t        ||
|      }t        ||	|      }|j                  d      }|j                         }|j                         }||z
  j                  dd      }||z
  j                  dd      }|j                  t        j                        }|j                  t        j                        }|dz
  ||dz   |d	z   f}|dz
  ||dz   |d	z   ft        |      t        |      }d
\  } j                   t        j"                  k(  rt%              t%        |      }D cg c]@  }|dz  z  t        j&                  |      dz  z   j                  t        j(                        B c}|D cg c]@  }|d|z  z  t        j&                  |      dz  z   j                  t        j(                        B }} fd fdt+        fd|D              } j                   t        j"                  k(  r|t-        d      t/        |||      }nt1        d t3        ||      D              }t        j4                         }|j7                  |      }|S c c}w c c}w )Nr   r!   rT  r  r   rR   r   rf   r"   re  r   c                     t        j                  | ddz
        }t        j                  |ddz
        }t        j                  d d ||g      }|S r  )r>   r   r   r[  )r  r.  y_idxx_idxr2  in_hin_wr   s        r+   load_boundedz0upsample_bicubic2d_default.<locals>.load_bounded  sO    B4!8,B4!8,utT5%&@Ar*   c                      t         fdD              }j                  t        j                  k(  rt	        d      t        |      S t        d t        |      D              S )Nc              3   0   K   | ]  } |        y wr4   r)   )r  x_ofsr	  rY   s     r+   r  zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>  s     Bl1e,Bs   z4weights_precision_x must not be None for uint8 inputc              3   ,   K   | ]  \  }}||z    y wr4   r)   rg  s      r+   r  zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>  s     JRBGJr  )r  r   r>   rE  r,  r  r  r  )rY   src_xr   ixs_ofsr	  weights_precision_x	weights_xs   ` r+   get_x_interpz0upsample_bicubic2d_default.<locals>.get_x_interp  sc    B'BB;;%++%"*$J  &eY8KLLJCy4IJJJr*   c              3   .   K   | ]  } |        y wr4   r)   )r  y_ofsr  s     r+   r  z-upsample_bicubic2d_default.<locals>.<genexpr>  s     ;%,u%;r  z4weights_precision_y must not be None for uint8 inputc              3   ,   K   | ]  \  }}||z    y wr4   r)   rg  s      r+   r  z-upsample_bicubic2d_default.<locals>.<genexpr>  s     L(2rb2gLr  rX  )r  r  rC   rD   rV  r(  r>   r  r  r7   r  rT   r  r   r  rb  r   rE  r  r   int16r  r,  r  r  r  r   r  )r   r  r  r  r  rU   h_scale_factorw_scale_factorr   ro  r  x_floaty_floatr8   rY   yscaler3  iys_ofs	weights_yweights_precision_yrL  src_yr  rY  r  r  r  r  r	  r  r  s   `                       @@@@@@@r+   upsample_bicubic2d_defaultr     s    {{Aq$ $D+a.-QN#D+a.-QN''5#H#H#U#UHAu 	[^ELL9<<5<IA[^ELL9<<5<IA#NA}EG#NA}EG#GAA k  c*Fk  c*F	U[[A	U[[A1uaQA&G1uaQA&G08I08I/9,,{{ekk!7	B7	B 
 !**+ejjmc.AAEEekkR
	 
 !**+ejjmc.AAEEekkR
	 

K K ;7;;E{{ekk!& !WXX#E96IJLc%6KLL //6M];FMI

s   9AL-AL2c                 6   t        j                  t        |      t        |      z   dk(  d        |Q|t        d      t	        t
        t        t        f   t        d t        | j                  dd  |      D                    }|r|nd\  }}t        | ||||      S )Nr!   c                       y)Nz:Must specify exactly one of output_size and scale_factors.r)   r)   r*   r+   r   z(upsample_bicubic2d_vec.<locals>.<lambda>3  r  r*   z7scale_factors must not be None when output_size is Nonec              3   P   K   | ]  \  }}t        t        |      |z           y wr4   )r   r   )r  rL  rm   s      r+   r  z)upsample_bicubic2d_vec.<locals>.<genexpr><  s*      Au 	!u,-s   $&r"   re  )
r>   r   r  r,  r   r  r  r  r  r   )r  r  r  r.  r  r  s         r+   upsample_bicubic2d_vecr$  &  s     
LL[D//14L   I  #s(O  #AGGABK ? 
 )6}<GW%amWgVVr*   c                 (      fd}t         ||      S )Nc                     t        j                  |  ||z   j                        }|dz
  |dz
  |j                         z
  j                         z
  S )Nr  r!   )r>   r  r  r   r   middler   dim_idxr  s       r+   r  z_reflection_pad.<locals>.idxK  sF    ,,ufunQXXFzVaZ'++-7<<>>>r*   _reflection_or_replication_padr  r  r  s   `  r+   _reflection_padr-  E  s     ? *	 r*   c                 (      fd}t         ||      S )Nc                     t        j                  |  ||z   j                        }t        j                  |d|dz
        S )Nr  r   r!   )r>   r  r  r   r'  s       r+   r  z_replication_pad.<locals>.idx\  s6    ,,ufunQXXF{{7Avz22r*   r*  r,  s   `  r+   _replication_padr0  V  s     3 *	 r*   idx_fnc                 n   t        |      dz  t        j                  | j                         dz   dz   fv fd       | j                   d  }| j                         z
  }t              D cg c]  }|ddz
  |z
  z      }}t              D cg c]  }|ddz
  |z
  z  dz       }}| }t              D ]E  }d g|j                         z  }	 |||   ||   ||         |	||z   <   t        j                  ||	      }G t        j                  |      }
|j                  |
      }|S c c}w c c}w )Nr"   r!   c                  (    d  d dz    d dz    dS )Nreflection_padzd requires r!   zD or r"   zD inputr)   r   s   r+   r   z0_reflection_or_replication_pad.<locals>.<lambda>o  s$    .[q	sQwiwO r*   rX  )r  r>   r   rN   r  rS   r   r[  rC   r   r  )r  r  r1  	inp_shapenc_dimro  padding_leftpadding_rightr  r  rY  rN   s              @r+   r+  r+  g  sH   
 g,!
C	LL	C!GS1W%%O IUUWs]F8=c
C1GAq1-.CLC=B3ZHWQ#'A+.23HMHF3Z 1&**,. a)A,a@PQAJ##FC01 //7M];FM DHs   1D-D2c           
          t        |      dz  |j                   d  D cg c]  }|dz
  	 }}t              D cg c]  }|ddz
  |z
  z      }}t              D cg c]  }|ddz
  |z
  z  dz       }}g }t        |j                        D ]c  }dg|j                  z  }	d|	|<   |j	                  t        j                  |j                  |   |j                        j                  |	             e |d   | d  }
d t              D cg c]  }|
|   ||   z    }}t              D cg c]  }||   |
|   z
   }}t              D cg c]  }d||   z  ||   z   |
|   z
   }}t              D cg c]  }||   d||   ||   z   ||   z   f }}t        j                  t        j                  t              D cg c]  } ||          c}      }t        j                   ||z   d      } fd}t        j                  t              D cg c]  }g d	 c} D ]  }|t!        dgz        k(  rg }g }t              D ]t  }||   dk(  r||   }||   }n=||   dk(  r||   }|
|   d||   f}n$||   dk(  r||   }|
|   ||   ||   z
  ||   dz
  f}|j	                         |j	                         v  ||||      } |S c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w )
Nr"   r!   rR   r  c                 F    | \  }}}t        j                  ||k\  ||k        S r4   r  )index_rangero  lbubs       r+   index_range_conditionz7_reflection_pad_backward.<locals>.index_range_condition  s(    	2r  b!r'22r*   r   r   c           	      .   t        	      D ]*  }||   d   ||   d   k  }t        |t              s%|s(| c S  t        j                  t
        j                  |D cg c]
  } |       c}      }t
        j                  
||z   d      }| |z   S c c}w )Nr"   r!   r   )rS   r6   r  rK   r   r   r  r  )r   r   index_rangesro  upper_less_than_lowerr;  r  gr  rN   rk   r>  s           r+   r%  z,_reflection_pad_backward.<locals>.accumulate  s     s 	A$0OA$6a9K$K!/6;P	
 COPK";/P
 %%k4S#Fax Qs   B
)rR   r   r!   )r  r  rS   r}  ru  r>   r  r  r  rK   r   r   r  r  	itertoolsr
   r  )rk   r8   r  rq  dhwro  r7  r8  r  
view_shapexyzcenterleft_reflectright_reflectrange_cr  r   r%  rU   areaoutsr@  r   r;  r  rN   r>  s   `                       @@@r+   _reflection_pad_backwardrM    s`   
 g,!
C''3$%.
)Q1q5
)C
)8=c
C1GAq1-.CLC=B3ZHWQ#'A+.23HMHG166] SS166\

1u||AGGAJqxx@EEjQRS
 	#A
3$%.C3 16c
;1c!f|A&;F;6;CjALOc!f,ALADI#JOqQQZ,q/1CF:OMO
 NSSVZHIAs1vQ/-2BBCG  eCjQ0<QD $$[$F
CHD !!c
#C1J#CD 45!s##s 	-AAw!|Qi%ajaB"1o"1vq,q/:aA#A&"1vs1va0@'@#a&1*MKK,	- $l3-40 Kc *CH2 <AO R& $Ds5   K K$$K)K.8K3K8K=L
&Lr   r   r  c                j    t        j                  | ||      }t        j                  | ||      }||fS )Nr  )r>   aminrX  )rz   rN   r  rO  rX  s        r+   aminmaxrP    s2     ::dW5D::dW5D:r*   r   c                    t         j                  t        j                  t        j                  |       d|       |||      S )Nr   r   )r   r   r>   rh   isnan)rz   rN   r  r   s       r+   nansumrS    s2     88EKKD 11d;S'QV8WWr*   r   r>  r  rX  r>  c          	      N    t         j                  j                  d| d||||      S )Nr   r!   rT  r   r  
start_step)rj  r   r>  r  rX  s        r+   arange_defaultrX    s/     ;;!!	3vf "  r*   c          	      N    t         j                  j                  | |d||||      S )Nr!   rT  rV  )ri  rj  r   r>  r  rX  s         r+   arange_startrZ    s/     ;;!!sAU6&Z "  r*   c                      ddl m}  || i |S )Nr   )out_dtype_dense)!torch._higher_order_ops.out_dtyper\  )rE   rF   r\  s      r+   out_dtype_decompr^    s    AD+F++r*   marginc                 t   	
 t        j                          t        j                         j                  d   
 j                  d   	t        j                  |dk(  xs |dk(  d        t        j                   j
                  dk(  xr 	dk7   fd       t        j                  j
                  dk(  xr j                         
k(  
fd       Qt        j                        t        j                  j
                  dk(  xr j                         	k(  	fd       j                  d      t        j                   d      }||z
   z   }|j                  d      }|dk(  r|n||z  }|   z  }t        j                  	 j                  	      }t        j                  |k7  |d      }|t        j                  j                  k(  r|j!                         S |t        j"                  j                  k(  r |j%                         |j                  d   z  S |j!                  d
      S )Nr   r!   r"   c                       y)Nz only p == 1 and p == 2 supportedr)   r)   r*   r+   r   z#multi_margin_loss.<locals>.<lambda>"  r  r*   c                  "    d j                    S NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: r  r  s   r+   r   z#multi_margin_loss.<locals>.<lambda>%  s    _`e`k`k_lm r*   c                  (    d  dj                    S )Nz#inconsistent target size, expected r  r  )nframer   s   r+   r   z#multi_margin_loss.<locals>.<lambda>)  s    5fXYv||nU r*   c                  (    d  dj                    S )Nz#inconsistent weight size, expected r  r  )rN   r   s   r+   r   z#multi_margin_loss.<locals>.<lambda>/  s    9#i~V r*   r  r  r   )r>   
atleast_2d
atleast_1dr  r   r}  r   rT   rK  r_  r  r  rh   r    r'   r{   r   r(   r   )r   r   rR  r_  r   r   uri   r  rN   re  s   ``  `    @@r+   multi_margin_lossrj    s    U#Ef%F[[^F
++a.C	LLa!16#MN	LL

a$C1Hm 
LLq5V\\^v5U !!&)KK163!6V	
 a FU0A
UA	AA!VQAv
,,s5<<
0CC6M1a(AINN(((vvx	imm))	)uuw##vv!v}r*   	is_targetc                    | j                   |j                   t        j                  |       } t        j                  |      }| j                   d   }t        j                  t	              dk  xr |dk7  fd       t        j                  t	              dk  xr k(  fd       t        j
                  ||j                        }|dk(  }t        j                  t        j                  |||      dd	      }||k  }t        j                  ||d      }t        j                  | d|
      }	t        j                  ||d      }
t        j                  ||
j                  d      k(  d      }d|	j                  j                  d      z
  | z   }|j                  d      }||z  }t        j                  |d|      }|t        j                  j                   k(  r!|j#                  d      j%                         }n@|t        j&                  j                   k(  r|j#                         }n|j#                  d      }|j)                  | j*                        j-                        }||fS )Nr!   r"   r   c                      d  S rc  r)   )orig_input_shapes   r+   r   z0multilabel_margin_loss_forward.<locals>.<lambda>Q  s    _`p_qr r*   c                      d d  S )Nzinconsistent target size: z for input of size: r)   )rn  orig_target_shapes   r+   r   z0multilabel_margin_loss_forward.<locals>.<lambda>U  s    ,->,??STdSef r*   r  rR   Tr  r  r   rf   )r   rR   )r  r>   rg  r   r  r  r  rO  rh   rK  r  rT   Tr_  r    r'   r{   r   r   r(   r7   r   r&  )r   r   r   rN   r  is_endend_idxtarget_masktidx0ri  tidx1rk  ri   rn  rp  s                @@r+   multilabel_margin_loss_forwardrw  B  s    {{U#Ef%F
++a.C	LL"/saxr 
LL!#M(9=M(Mf
 ,,s6==
1Cr\FjjVS#6BMG-KKKVQ/EU%0AKKVR0E		#R!88a@Iaccmmm##e+A	AA	CAIq!$AINN(((EEgE##%	imm))	)EEGEEgEU[[)112CDIi<r*   )	attn_maskrm   querykey	dropout_p	is_causalrx  c                    t        j                  t        j                          fd       t        j                   j                         dk(  xr( j                         dk(  xr j                         dk(   fd       t        j                  dk(  fd       t        j                   j                  d   j                  d   k(  xr j                  d   j                  d   k(  d        t
        j                  j                   ||d | j                  d      j                  d      k7  		      \  }}|j                  d
ddd      j                  t         j                        j                  dd
dd      }||fS )Nc                  "    d j                    S )Nz-query must be FP32, FP64, BF16, FP16 but got r   )ry  s   r+   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  s    ?}M r*   rD  c                  n    dj                          d j                          dj                          S )Nz,q, k, v must be a 4 dimensional tensor, got r  r   )rz  ry  r{   s   r+   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  s3    >uyy{m2cggi[XZ[`[d[d[fZgh r*   r   c                      d  S )Nz&dropout probability must be zero, got r)   )r{  s   r+   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  s    $J9+"V r*   r   c                       y)Nz&q, k, v should have the same head sizer)   r)   r*   r+   r   z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>  r  r*   r!   )rx  r{  r|  dropout_maskrm   
enable_gqar"   r   rX  )r>   r   rK  rN   r  r   "_scaled_dot_product_attention_mathr  r.  r  r  r\  )	ry  rz  r{   r{  r|  rx  rm   r  attns	   ````     r+   *scaled_dot_product_flash_attention_for_cpur    sV    
LL&M 
LL		q@SWWY!^@		q0@h 
LLSV 
LLA%++a.(KSYYq\U[[^-K8
 ::BB::a=CHHQK/ C 
LFDF 	q!Q"	%"9"9	:	Aq!	 
 4<r*   c                 .    t        |       fd       }|S )Nc                  <     | i |}| d   j                  |      S rr   )r  )rE   rF   r   outplace_ops      r+   
inplace_opz$register_inplace.<locals>.inplace_op  s%    4*6*Aw}}S!!r*   r   )aten_opr  r  s    ` r+   register_inplacer    s"    G$" %" r*   c                 B   | j                         s&| j                         st        |      }t        |      }t        j                  ||      }t        |t        j                        r|dk7  r||z  }|dk(  r|S t        |t        j                        r|dk7  r| |z  } | |z   S r3  )rK  r  r  r>   r  r6   numbersNumber)rz   batch1batch2rb   rl   r  s         r+   baddbmmr    s     !!#DOO,=4yE
YYvv&FeW^^,
%qydGNN+tqyd{&=r*   c                 2    t        j                  | |d      S )Nr  r  r  )rz   r   s     r+   floor_divider    s     99T588r*   c                 `    t        j                  t        j                  | j                  d      S r[   )rK   r   r  r   r  )r  s    r+   	sym_numelr    s    HLL!''155r*   r   r   c                    |"t         j                  j                  | g |      S t         j                  j                  | g ||      S )Nr   r  )r   r   dim_IntListIntList_out)rz   r   r   s      r+   sum_defaultr    sC     {xx##D"E#::xx##D"Es#CCr*   c           	          t        | t        j                        s| S |@t        j                  j                  | t        t        | j                                           S t        j                  j                  | |g      S r4   )	r6   r>   r   r   r  dimsru  rS   rN   )rz   rN   s     r+   squeeze_defaultr    s\     dELL)
{||  tE$((*,='>??||  u--r*   c                 2   t        fdt        t        | j                              D              }|j                  t
        j                  k(  rt
        j                  nd }| j                  d|d|      }| ||j                  |j                        z  z  |fS )Nc              3   .   K   | ]  }|k7  s	|  y wr4   r)   )r  ro  rN   s     r+   r  z)_weight_norm_interface.<locals>.<genexpr>  s     @1qCxQ@s   
r"   T)r  r   )
r  rS   r  r  r   r>   r  r  r   r7   )r2  rB  rN   keep_dim
norm_dtyper   s     `   r+   _weight_norm_interfacer    st     @c!''l 3@@H !5>> 9tJ66!Xt:6>DDGGAGG$$%t++r*   assume_uniqueinvertc                   t        | t        j                        s!t        j                  | |j                        } t        |t        j                        s.|rt        j
                  | |      S t        j                  | |      S ddlm}  ||j                         dt        | j                         d      z  k        rt        | ||      S t        | |||      S )Nr  r   rR  g      $@g(\?r  r  )r6   r>   r   r]  r  ner  rs  rS  r   r   isin_defaultisin_sorting)elementstest_elementsr  r  rS  s        r+   isinr    s     h-&&x8L8LMmU\\288Hm4488Hm44Dm))+dS9I55Q.QQRHmFCCm=
 	
r*   )rV  c                B   |?t        j                  | j                         t         j                  | j                        }n?t        j                  | j                         |t         j                  | j                        }|| k  j                  | j                        }|S )Nr  )rV  r   r  )r>   randr.  r   r  r7   r   )rz   rV  raw_prR  s       r+   	bernoullir  $  sq     

499;emmDKKP

IIK--;;	
 
$**%AHr*   r  c                   | j                         dk(  r%t        j                  | t        j                        S |j                  dk(  r| |k(  }|r| S |S | j
                  d|j                  z  z   }| j                  |      }t        t        d|j                   dz
  d            }||k(  j                  |      }|r| S |S )Nr   r   r!   rR   r!   r   )
r   r>   
empty_liker  r}  r  r  r  rS   r  )r  r  r  rN  expanded_elem_shaper8   rN   s          r+   r  r  7  s    ~~1

;;Q-'t&3&"..4-2D2D+DD)*A
b=---126
7C
"
"s
"
+CC4"s"r*   c                   | j                         }|j                         }|rt        j                  ||g      }t        j                  |d      \  }}|dd  |d d k(  }	t        j                  |	ddgd      }	|r|	j                         }	t        j                  |	      }
|
j                  d||	      }
|
d| j                          j                  | j                        S t        j                  |      \  }}t        j                  ||      }t        j                  ||j                         k  |d      }||   |k(  }|r|j                         n|}|j                  | j                        S )NT)stabler!   rR   r   F)r;  r>   r0  sortrs  logical_notr  r  r   r&  r  searchsortedrh   )r  r  r  r  elements_flattest_elements_flatall_elementssorted_elementssorted_orderduplicate_maskr  sorted_test_elementsrU   r  test_idxcmps                   r+   r  r  D  sN   $$&M&..0 yy-1C!DE(-

<(M%(,0DD..~1vuM+779N/q,?A()11(..AA"'**-?"@a  !5}E;;s%9%?%?%AA3J"8,=#)coos{{8>>**r*   c                 .    | j                  d      }||   S rQ   )r&  )rz   r  	flatteneds      r+   taker  a  s     R IUr*   c                     |t         j                  }|t         j                  k(  rt        |      }t        j                  | |j                  |      S r2  )r>   r\  preserve_formatr   r   resizer  )rz   r   rY  s      r+   	resize_asr  h  sD    //----e4;;tU[[;FFr*   	ceil_modec                 X   t        j                         rt        S | j                  j                  dk(  rt        S |j                  d      }|j                  d      }	| j                  d      }
| j                  d      }|j                         dk(  }|s3|j                  d      }| j                  d      } |j                  d      }|j                  d      }|j                  d      }| j                  t         j                  t         j                  fv }|rt         j                  n| j                  }t        j                  ||z  ||	z  || j                        }| j                  ||z  |
|z        }|j                  ||z  |
|z        }|r|j                  t         j                        }|j                  d||      }|j                  ||||	      }|r|j                  | j                        }t!        j"                  |      }|j%                  |      }|s|j'                  d      }|S )	u  
    Decomposition of max_pool2d_with_indices_backward using scatter_add.

    This replaces the native implementation with a high-level decomposition
    that uses scatter_add for gradient accumulation. The scatter-based approach
    provides automatic optimization opportunities for Inductor and handles all
    pooling configurations without requiring specialized fallback paths.

    Algorithm:
        For each output gradient position, use the corresponding index from the
        forward pass to scatter the gradient to the input position. When multiple
        output positions select the same input position as max, scatter_add
        automatically accumulates their gradients.

    Complexity: O(B * C * H_out * W_out)
        Independent of kernel size, unlike traditional O(B * C * H_in * W_in * K²)
        approaches that iterate over input positions and kernel windows.

    Known Limitations:
        - FP16/BF16: Uses FP32 accumulation internally to preserve precision when
          many gradients accumulate to the same position (overlapping pooling windows).
          This adds slight overhead but ensures numerical stability.
        - Deterministic mode: Falls back to native implementation to ensure
          consistent results across runs

    Args:
        grad_output: Gradient w.r.t. pooling output [B, C, H_out, W_out]
        self: Original input tensor (for shape) [B, C, H_in, W_in]
        kernel_size: Pooling kernel size
        stride: Pooling stride
        padding: Pooling padding
        dilation: Pooling dilation
        ceil_mode: Whether to use ceil for output size calculation
        indices: Indices from forward pass (per-channel linear positions)

    Returns:
        Gradient w.r.t. input [B, C, H_in, W_in]
    mpsrZ  rR   rD  r   r!   r  rX  )r>   $are_deterministic_algorithms_enabledNotImplementedr  r  r.  rN   rT   r   r   r  r   r  r&  r7   scatter_addrC   r   r  r  )rk   rz   r  rv  r  r  r  r  	in_heightin_width
out_height	out_width
is_batchedrp  channelsuse_fp32_accumaccum_dtypegrad_input_flatgrad_output_flatr  rW   rY  s                         r+    max_pool2d_with_indices_backwardr  q  s	   d 113 %' 		"Iyy}H!!"%J  $I qJ~~a !++A.##A&1Jyy|H
 !&&5==%..*IIN#1%--{7H7HK kkXH!!	O #**XzI5 ??:#8*y:PQL +..u}}= &11!\CSTO !((Xy(SJ ]];#4#45
 //5M&&]&CJ ''*
r*   window_lengthc                L    t         j                  j                  | d||||      S )a  hann_window(window_length, *, dtype=None, layout=None, device=None, pin_memory=False) -> Tensor

    Returns a Hann window of size :attr:`window_length` with ``periodic=True``.

    Equivalent to :func:`torch.hann_window` with ``periodic=True``.

    Args:
        window_length (int): the size of returned window.

    Keyword args:
        dtype (:class:`torch.dtype`, optional): desired dtype. Default: global default.
        layout (:class:`torch.layout`, optional): desired layout. Default: ``torch.strided``.
        device (:class:`torch.device`, optional): desired device. Default: current device.
        pin_memory (bool, optional): if ``True``, pins the returned tensor. Default: ``False``.
    TrT  )r   hann_windowperiodic)r  r   r>  r  rX  s        r+   r  r    s4    2 $$ %  r*   r  c                   ||nt        j                         }| dk(  rt        j                  d||||      S | dk(  rt        j                  d||||      S t	        j
                  |      }|r| dz   n| }t        j                  |||||      }|dt         j                  z  |dz
  z  z  }t        j                  |      }|dz  dz   }|r|j                  dd|       n|}	|	j                  |      S )	a@  hann_window(window_length, periodic=True, *, dtype=None, layout=None, device=None, pin_memory=False) -> Tensor

    Returns a Hann window of size :attr:`window_length`.

    .. math::
        w[n] = 0.5 - 0.5 \cos\!\left(\frac{2\pi n}{N-1}\right)

    where :math:`N` is ``window_length + 1`` when ``periodic=True`` (for spectral analysis),
    or ``window_length`` when ``periodic=False`` (symmetric window).

    Low-precision dtypes (``bfloat16``, ``float16``) are computed in ``float32`` then cast.

    Args:
        window_length (int): the size of returned window.
        periodic (bool, optional): if ``True``, returns a periodic window for use with STFT.
            Default: ``True``.

    Keyword args:
        dtype (:class:`torch.dtype`, optional): desired dtype. Default: global default.
        layout (:class:`torch.layout`, optional): desired layout. Default: ``torch.strided``.
        device (:class:`torch.device`, optional): desired device. Default: current device.
        pin_memory (bool, optional): if ``True``, pins the returned tensor. Default: ``False``.
    r   r  rT  r!   r  r   r   r   )r>   get_default_dtyperD  r  rC   r  r  picosr/  r7   )
r  r  r   r>  r  rX  compute_dtyper#  r  windows
             r+   hann_window_periodicr    s    D &EE,C,C,EE{{fV

 	
 zzfV

 	
 //6M%=A		A 	
S588^q1u%&A		!A	D3A.6QXXaM*AF99Ur*   num_classesc                    |dk(  r*t        | j                         j                               dz   }t        j                  j                  t        j                  | dk\        d       t        j                  j                  t        j                  | |k        d       | j                  d      t        j                  || j                  | j                        k(  j                  t        j                        S )NrR   r!   r   z+one_hot: Class values must be non-negative.z7one_hot: Class values must be smaller than num_classes.r  )r  r   r  r   _assert_asyncmsgr>   r  rT   r  r   r  r7   r  )rz   r  s     r+   one_hotr  b  s    b$((*//+,q0		$!)5 			$$%A
 	r<<4::dkkJ	Kbor*   )FF)noner4   )r"   )r   NNr!   )rR   FFr  r  ru  )r!   r!   F)Fr   )r   rf   N)r   r!   N)Fre  )NNN)r   r   FT)r   r   Fr*  )r   Fr  )rR   (  rK   rC  r  r  rw  collections.abcr   r   
contextlibr   enumr   r   r   r	   r
   typingr   r   r>   torch._meta_registrationstorch._primsr^  r   torch._prims_common_prims_commonrC   torch.nn.functionalr  r  r   r   r   r   torch._decompr   r]  r   r   r   r   r   r   torch._prims_common.wrappersr   r   r   r   torch.utilsr   rA   torch.utils._pytreer   r?  DispatchKeyr   ru  str__annotations___opsr  r   r    rV  r  rM   rW  compute_only_pw_cast_for_opmathpw_cast_for_opmath"pw_cast_for_opmath_non_tensor_argsr(  pw_cast_for_int_to_realr  rV   r_   ra   rj   r  rv   rC  r  r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r   r   r'   r{   r  _safe_softmaxr  r  r  r  rW   r  r  r   r!  r*  r8  rB  rE  rJ  rM  rO  rQ  rT  rg  ro  slicer  r  rn  r  r  r  r  r  r  r  r0  r4  r=  rC  rG  py_implCompositeImplicitAutogradAutogradrF  r[  r_  re  rm  r&  r{  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r'  r-  r1  unsafe_chunkr4  r0  r/  no_statsr8  r<  rH  rN  rR  rT  _fused_dropoutrW  rc  r  rY  r  lift
lift_freshrf  rk  rn  rq  rp  r  r  r  _adaptive_avg_pool2dr  r  r  r  r  r  r  r  r  r  r  r  r"  	Generatorr$  r0  r3  rI  r  rP  rV  r9  _upsample_nearest_exact1drR  rX  r>  rD  rK  r6  rc  rl  rq  ru  r  r  r  r  r  r  rnn_tanhr   r  rnn_relur  r  r  r  r  r  r  r  lstmr  r  r  r  grur  r  r  r  r  r  r  r   r  r
  r  r  r  r  r  r  r  r7  r9  r  r[  r'  r  rI  rO  rS  rU  rY  r\  rb  ri  r  ro  rz  r}  r  r  r  r  r  r  r  r  r]  upsample_bicubic2dr   r$  reflection_pad1dreflection_pad2dreflection_pad3dr-  replication_pad1dreplication_pad2dreplication_pad3dr0  r+  reflection_pad1d_backwardreflection_pad2d_backwardreflection_pad3d_backwardrM  rP  rS  r  r[  r>  rX  ri  rZ  r^  rj  rw  +_scaled_dot_product_flash_attention_for_cpur  r  r  r  r  r   r  r  rN   r  r  r  r  r  r  r  r  r  r  r  periodic_outr  addbmm_addbmmaddmm_addmv_baddbmm_fill_gelu_r  
hardswish_	hardtanh_hardtanhhardsigmoid___iand____and____ilshift__
__lshift__r  r  index_reduce_index_reduce__ior____or____irshift__
__rshift____ixor____xor__ldexp_leaky_relu_
leaky_relulogit_logitrelu_r  renorm_renormround_r*  scatter_r%  scatter_add_r  scatter_reduce_scatter_reducesilu_r  r)   r*   r+   <module>rE     sP       
 . "  % $      #   , , 0 7   * ( hh"" c zz~~   %$)	''99' ' "	'T #*88@@# 
 uDDLL  &-88@@ & "
 "uDDQQ  c f  **+\2F 2v 2   ,2 --.\4v 4& 4   /4 ../\S S6 S S5 S   0S
 ))*\


 
 	

 
 
   +
2 ))*+( ,( ))*+"V " ," (()@f @ @   *@ 112\f F    3 ../\PP%P05P@EP  0P 'GF Gv G   (G //0F & V    1 //0\:F :& :U :  1: 001\LL%L7<LNRL   2L **+\) )f )3 )   ,)< **+5v 5f 5  ,5 		"&v && &   #& **+\>v >V > >   ,>
 **+6 6 66 6 ,6 334%%
% % 66>	% 5% 667



 
 	

 
 
 
 
   8
$ 112\<f <F <F <v <   3< 

#/ /v /& /  $/v #   &3<>>3G3G1
1 1-011   '1 ../\11 &1061CF1   01 **+0 ,0 ++, ^^))		1
	1	1 	1 		1   -	1 44<<=%/5BEMR  > 44??@
R
R

R 
R 	
R
 
R 
R  A
R 00889		%	/5	BE	NS	  :	 00445
R
R

R 
R 	
R
 
R 
R  6
R#$#$
#$ #$ TM	#$
 #$ #$ #$ #$L ))*\Nf NF N N N   +N( ../\..
. . TM	.
 . . . .  0.b 001\$$
$ $ TM	$
 $ $ $ $  2$N 112 !^^))	1
11 TM1 	1
 1   31& ::;\
 !^^))
  TM	
     <  --. ^^))111 1 	1   /1 667\
 ^^))			
	 	 		
 	   8	 		") )v )% )  #) ,,-& &F &v &  .& ++,	O	Oc	O 
	O 		O
 
	O 	O  -	O 

))* 3?
3? 
3? :	3?
 
t3? 3? +3?l #d
14t
38_. **+ $U$U	$U 
$U :	$U
 
t$U $U  ,$UN ,,-E Ed3i Ec ERU E  .E
 ../OO&*3iO9<ODGOORO  0O%+:?++ 334\ XX!'X.1X@EX !  5X 778 KK!'K.1K@EK !  9K*& $MMcM 3iM #Y	M
 IM M  %M` $]]c] c] 3i	]
 #Y] I] ]   %]@ 445
 
v 
e 
  6
 ,,-Y
Y"3iY47Y?BYJMYY  .Y" ++334;?

%
,1DL

  5
& %kCCDk2236 e D4K  4 E & ++,VVA& AU A4$; A  -A & S    '4 ))*F  T   +6 ' $  	
    (( 556  	
   7:DI &\	  
&\	*T&\ 
tF| 
# 
"&\"	" "J 00$//2E2EFG
 	&\	  
$	
  H  	'')C)C)G)GH #	
c 
 
f		
 
&\D  ))0015 5C 5c 5%PSBT 5 25 44<<=67BB $S	B03B
63;B >B 

))*/ /C /c /%:L / +/( --55)) //
// &// 
// 63;	////f 

# f F # #    $" ../
 

  	
     0" 

#	 	f 	6 	 	 	   $	 77??@S&S&S& S& 	S&
 D=S& S& S& 
S& S& dS& 6D=&4-$67S&  AS&n 77;;<  	
 D=   
  d ,, ,, ,, 6D=&4-$67 =86D= FTM  77??@QQQ 3iQ 	Q
 Q TMQ 4-Q dQ 6D=&4-$67Q AQj 77;;< 3i 	
  TM 4- d ,, ,, ,, 6D=&4-$67 =4 ,,445333i3 TM3 
	3
 66>3 63l 55==>HHH 3iH 	H
 TMH dH 6D=&4-'(H ?HVRRTMR 4-R 4-	R
 $R R R 
R R 6666D=&4-?@Rj ../UK/((TM( 4-( 4-	(
 $( ( ( 
( 666!"( 0 0(4 ''(<(<=''(M(MN 
 
TM 
 4- 
 4-	 

 $ 
  
  
 
 
 666!" 
 O > 
F "";#H#HIG4< G JG AAIIJTM 4- 	
   
 666!" K* 55==>((TM( 4-( 	(
 ( ( ( 
( 666!"( ?(  55>>?((TM( 4-( 	(
 ( 
( 666!"( @( @@HHIKKTMK 4-K 	K
 K K K 
K 6666612K JK4TM 4- 	
  
  : 44<<=11TM1 4-1 	1
 1 1 
1 6666)*1 >14 ??GGHCCTMC 4-C 	C
 C C 
C 6666669:C IC8 22::;11TM1 4-1 	1
 1 1 
1 6666)*1 <14 ++,VV   - & !%"&04+
+ ;;+
 LL4+ + + &&-+  '+b diiAB  C &&{';';<--.VVVV, 4- 4-	
 $  !&  - / => 00889 TM 4-	
 $ } $  
 d  66D=&4-/0 :6 77??@iii TMi 4-	i
 $i }i $i i 
i di 66D=&4-/0i AiZ 77;;<""" TM" 4-	"
 $" }" $" " 
" d" ,," ,," ,," 66D=&4-/0" ="J 778VVV$  4-	
 $ } tm  % 90 667VVV$  4-	
 $ } tm   % 82 112d'v d'E#s(O d'   3d'N
)8<S	HK6 ))**8
*8*8 c*8  +*8Z ))*494949 c49 I	49
 #Y49  +49n ( HH	H H 	H H )H ' II	I I 	I I  (I$ %@%@	%@ %@ 	%@ %@ %@P ))112"";#H#HI J 32 (()<: <C <
 <J < *< (=* =3 =z =: =  )=@@@$.@8B@PT@, 001Xx (f (vv~)> (  ! 2( % ""(,		e	 *u
 %	  &" &; ';
8 //334//334//334$$[%J%JK$$[%9%9:$$[%J%JK$$[%9%9:$$[%J%JK$$[%9%9:	3	3cT!	3 ;%	3 		3 ; L ; L ; L 5 5 5	3 66::;66::;66::;##++K,Q,QR##++K,@,@A##++K,Q,QR##++K,@,@A##++K,Q,QR##++K,@,@A	?	?cT!	? ;%	? 		? B S B S B S < < <	?#L 0088$:Q:Q:U:UVW  (()N)NO  (()=)=>Dd;  ;;c; DL; 	; < ? P X; 	##++T-K-K-O-OP ''//0U0UV''//0D0DEDd;  GGcG DLG 	G < F WG 0088$:Q:Q:U:UVW  (()N)NO  (()=)=>Dd; "!	GGcG dlG dl	G
 G < ? P XG 	##++T-K-K-O-OP ''//0U0UV''//0D0DEDd; "!	SScS dlS dl	S
 S < F WS 0088$:Q:Q:U:UVW  (()N)NO  (()=)=>Dd; "!!QQcQ dlQ dl	Q
 dlQ Q < ? P XQ 	##++T-K-K-O-OP ''//0U0UV''//0D0DEDd; "!!		c	 dl	 dl		
 dl	 	 < F W	 
 	c  	
  4&>/$ FK+\&,/-d& R ++,[BBC[112. 3 D -.8 ++,[BBC[112. 3 D -.8 **+KAABK001. 2 C ,.@ **+KAABK001. 2 C ,.@/6=@/d 		(>>?--.S / @ )S@ 		'==>,,-S . ? (S>;; &{<<={++,. - > '.6 '==>,,-. . ? (.6 44889!!))+*O*OP!!))+*>*>? @ Q : 33778  (()N)NO  (()=)=> ? P 9 33778  (()N)NO  (()=)=> ? P 9 0044511556##K$I$IJ##K$8$89%%k&K&KL%%k&:&:;&&{'L'LM&&{';';<A = N < M : K 7 6A //779O9O9S9STU
 "	KKcK K dl	K
 K  VK 	%%t'?'?'C'CD !!))+*>*>?
 "!UUcU U dl	U
 dlU U  @
U 	&&(A(A(E(EF 
 "!!

c
 
 dl	

 dl
 dl
 
 
V/7	&	7$,V$47IO77-'9 -f - IIcI I 	I
 I IZ ))112F v $  3 ,,d.?.?@A  B ++,-" ." //019 29 2234C 5C6 AABCM DM23 
3 3  TM3  	3 
 3  66>3 l --.X~&L
LL TML 	L
 L 66>L ' /L@ //0X~&L
LL TML 	L
 L 66>L ' 1L/F /u / /5F 5u 5 5
 
3E 
(F%7 FV F F!Xf% !& !NN#'N05NEJ\\N&f & & &T &"/f / / / /T /"!V !49 !T !$V $49 $T $ 223S StCy S S   4S  f4f4
f4 f4 	f4
 f4 f4 f4R ,,-  
  	
     .   	#   !	# ==>$)..:N:N1  ?1$  d t @ [BBC>>?', yW  @ DyWx 0088$:Q:Q:U:UVW  (()=)=>
 ! TTsCxT T T\	T
 T\T T   ? XTn //334$$[%J%JK$$[%9%9:
 15	WWsCx4'W W &-	W
 W   ; L 5
W4 --.--.--.	v 	c3h 	F 	   / / /
	 ../../../	 	sCx 	V 	   0 0 0
	38_ c3_f,- 	8 667667667\T  8 8 8Tn %UEu   & $X4 X  %X ,,dkkoo>? !% =="&
	
 ;;
 LL	

 LL4
 
  @
 **+,
 !% =="&	 ;;	
 LL LL4  - 	", #, ../''(<(<=  ^^))))) ) 	)
 TM) ) )  > 0)X ;;<$$,,44[5I5IJX{#--- - 66>	- $ K =-x HHPPQ
 A  $AA	A A 	A
 A }A 4<A 66>A RAH '   ( ))*9  +9 '6 (6 ))488<<89 !%		D
	D ;;	D 
$		D
 	D :	D --t||/?/?@A.& .sTz . B. 		==>, ?, 		"38 
  #
( ../ )-
,, % \\	 0$ 5: 
# <A +: 		"  #
 'G (G ==>ss
s s s ?sl ))1143C3C3G3GHI !%"&"&" ;; LL4	
 LL4 t   JB ))22D4D4D4Q4QRS 6 !%"&"&"666 ;;	6
 LL46 LL46 t6 6  T6r t{{ + djj ) djj )  - TYY ' TYY ' $.. 1  / ""D$4$4 5  - !!4?? 3 $.. 1 ##T%6%6 7 t{{ + !!4?? 3  - djj ) !!4?? 3 djj ) TYY ' t{{ + djj )  - ""D$4$4 5 %%t':': ; TYY ' kCCD& s F  Er*   