
    9j                     l   d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZ ddlZddlZddlmZ ddlmZ ddlmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z(m)Z) ddl*m+Z+m,Z,m-Z-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z7 ddl8m9Z9 erddl:m;Z; ddl<m=Z= ddl m>Z>  ej~                  e@      ZAej                  j                  e@d      ZD G d dej                        ZF G d de
      ZGdedeFdz  fd ZHd!d"d#ed$eIeJef   deGfd%ZKd#edeGfd&ZLd!d"d#ed$eIeJef   deGfd'ZMd(eGdeNe4   fd)ZOe G d* d+             ZPe G d, d-             ZQe G d. d/             ZRe G d0 d1             ZSePeQz  eRz  eSz  ZTd2edej                  dz  fd3ZVd!d"d4e9e.   deWfd5ZX	 	 dYd!d"d6ed(eGd7d8d4e9e.   dz  d9eWdeWfd:ZYd!d"d(eGd4e9e.   de,dz  fd;ZZd<eNe.dz     d=eNe.dz     deIe.e.f   fd>Z[d!d"d?d@d(eGdAe-deWf
dBZ\d!d"d2edeWfdCZ]d!d"d2ed(eGde-dz  fdDZ^	 	 	 dZd!d"d2ed(eGdEeJdFej                  jd                  dGe0dz  dHe`edIf   d6edJedKead?dLdMeadz  ddfdNZbd!d"dOed#dPd$eIeJef   deaf
dQZcd!d"d2edMeade-dz  fdRZdd!d"d(eGdSe-defdTZed!d"dHe`edIf   dUeNe`eFef      deNeT   fdVZf G dW dXe!      Zgy)[z
This module contains the InvokeSubgraphHigherOrderVariable class and its
supporting helpers for subgraph reuse (auto-cache) in Dynamo's invoke_subgraph
higher-order operator.
    N)	dataclass)Anycast
NamedTupleTYPE_CHECKING)graph_break_hints)unimplemented)extract_tensor_metadataGUARD_VALUE_DISPATCHGuardCheckSpec
SKIP_GUARDUnsupportedGuardCheckSpec)SyntheticLocalSource)VariableTracker)ConstantVariable)UserFunctionVariable)WrapHigherOrderVariable)ListVariableTupleVariable)UnspecializedNNModuleVariable)SymNodeVariableTensorVariable)GuardInvokeSubgraphReuseConditionInvokeSubgraphReuseEntrySource)NestedCompileRegionOptions)GraphModule)Proxy)_pytree)
OrderedSet)Sequence)InstructionTranslator)SubgraphTracingInfohierarchical_compilec                       e Zd ZdZdZdZdZy)InputTagtensorsymnodeconstantmoduleN)__name__
__module____qualname__TENSORSYMNODECONSTANTMODULE     g/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/_dynamo/variables/invoke_subgraph.pyr'   r'      s    FGHFr4   r'   c                   t    e Zd ZU eeeef      ed<   eedz     ed<   dZ	e
ed<   dZej                  dz  ed<   y)InputFingerprintflat_vtsNarg_sourcesFhas_unknowntreespec)r,   r-   r.   listtupler'   r   __annotations__r   r:   boolr;   pytreeTreeSpecr3   r4   r5   r7   r7      sC    5?2344ftm$$K'+Hfoo$+r4   r7   vtreturnc                    t        | t              rt        j                  S t        | t              rt        j
                  S t        | t              rt        j                  S t        | t              rt        j                  S y)z5Return the tag for a leaf VT, or None if unsupported.N)

isinstancer   r'   r/   r   r0   r   r1   r   r2   )rB   s    r5   classify_vtrF      s[    "n%	B	(	B(	)   	B5	6r4   txr#   
fn_args_vtkwargsc                 j    |s%d}|D ]  }t        |      d} n |rt        |      S t        | ||      S )a  Build an InputFingerprint by flattening (args, kwargs) via pytree.

    Uses _make_inlined(tx, pytree.tree_flatten) to recursively flatten
    the argument structure into leaf VTs, classifying each leaf as
    tensor/symnode/constant/module. Also records the TreeSpec so that
    cache lookups can verify structural equivalence.

    Fast path: when kwargs is empty and all args are already leaf VTs
    (tensor/symnode/constant/module), skip the expensive pytree flatten.
    TF)rF   build_fingerprint_fastbuild_fingerprint_with_pytree)rG   rH   rI   all_leafrB   s        r5   build_input_fingerprintrN      sL       	B2& 	 )*55(Z@@r4   c                     g }g }| D ]@  }t        |      }|J |j                  ||f       |j                  t        |dd             B t        ||      S )zCBuild fingerprint for the common case of flat leaf args, no kwargs.Nsource)rF   appendgetattrr7   )rH   r8   r9   rB   tags        r5   rK   rK      sf    79H')K 8"ob	" 	72x678 Hk22r4   c                    ddl m} ddlm} |j	                  | t        |      |f      }  || t        j                        |      j                  |       \  }}|j                         }g }	g }
d}|j                  |       D ]B  }t        |      }||	j                  ||f       nd}'|
j                  t        |dd             D t        |	|
||      S )z=Build fingerprint via pytree flatten for nested/kwargs cases.r   SourcelessBuilder_make_inlinedFNTrP   )torch._dynamo.variables.builderrV   (torch._dynamo.variables.higher_order_opsrX   creater<   r@   tree_flattenunpack_var_sequenceas_python_constantrF   rQ   rR   r7   )rG   rH   rI   rV   rX   container_vtflat_list_vttreespec_vtr;   r8   r9   r:   rB   rS   s                 r5   rL   rL      s     BF$++Bj1A60JKL Fb&2E2E F!" L+ --/H79H')KK..r2 	8"o?OOS"I&K 	72x67	8 Hk;IIr4   fingerprintc                 "   t               }g }| j                  D ]s  \  }}|t        j                  t        j                  fv s)|j                         }|j                  |vsH|j                  |j                         |j                  |       u |S )z8Collect deduplicated proxies from tensor/symnode leaves.)	setr8   r'   r/   r0   as_proxynodeaddrQ   )rb   seenflat_proxiesrS   rB   proxys         r5   get_flat_proxiesrk     s{    "uD "L'' +R8??H$4$455KKMEzz%$##E*+ r4   c                       e Zd ZU dZeed<   y)LiftedUserArgzVLifted arg that came from a user argument (intermediate activation or explicit input).indexN)r,   r-   r.   __doc__intr>   r3   r4   r5   rm   rm   "  s
    `Jr4   rm   c                       e Zd ZU dZeed<   y)LiftedCapturedSourcezRLifted arg that is a captured variable (e.g. a weight or parameter) with a Source.rP   Nr,   r-   r.   ro   r   r>   r3   r4   r5   rr   rr   )  s
    \Kr4   rr   c                   J    e Zd ZU dZeed<   eedf   ed<   eedf   dz  ed<   y)LiftedSyntheticObjectzCLifted arg that is a TorchScriptObject with a SyntheticLocalSource.ctor_fn.	ctor_argsNctor_arg_sources)r,   r-   r.   ro   r   r>   r=   r3   r4   r5   ru   ru   0  s+    MLS#XCHo,,r4   ru   c                       e Zd ZU dZeed<   y)LiftedBoundSymbola  Lifted arg that is a SymInt already bound as a graph input.

    SymInt graph inputs are created during tensor wrapping (not through
    VariableBuilder.wrap_symint), so they aren't registered in
    unspec_variable_map or variable_tracker_cache. Using LiftedCapturedSource
    for these would resolve the source to a concrete Python int via
    source.get_value() instead of reusing the existing symbolic proxy.
    exprNrs   r3   r4   r5   rz   rz   9  s     Ir4   rz   fn_varc                     t        | t              r| j                         j                  S t        | t              r*| j
                  j                  j                  j                  S y N)rE   r   get_function__code__r   valueforward__func__)r|   s    r5   get_fn_coder   L  sP    &./""$---	F9	:LL  ))22	
 r4   traced_sourcesc                 |    | j                   j                  j                  |z  }|rt        j	                  d|       yy)a  Check if any source accessed by the subgraph has been mutated.

    SideEffects.mutated_sources records the exact AttrSource for every
    store_attr call. A simple set intersection with traced_sources tells
    us whether any source the subgraph read was later written to.
    z.subgraph_reuse: mutated sources detected -- %sTF)outputside_effectsmutated_sourceshc_logdebug)rG   r   overlaps      r5   has_mutated_varsr   V  s;     ii$$44~EG<	
 r4   body_rtracing_infor$   has_reuse_hash_fnc                 h   |sc|j                   Hddj                  t        j                  |j                               z   }t        j                  d|       y|rt        | |      ryt        |t              rnt        |t        t        f      rU|j                  D cg c]'  }t        |t              st        |      j                  ) }}|rAt        j                  d|       yt        j                  dt        |      j                         y|j                  rt        j                  d       yyc c}w )	uA  Best-effort check for whether a traced subgraph result can be reused.

    It is possible that a subgraph is morally reusable but does not fall
    into the limited support that Dynamo has today. Current limitations:
      - The subgraph must not have side effects.
      - No sourceful variable accessed by the subgraph may have been
        mutated, because guards are snapshotted on source values at trace
        time — if the underlying object changed since then, the cached
        guards would silently evaluate against stale values.
      - Output must be a single tensor, or a tuple/list of plain tensors.
      - All flattened inputs must be one of: tensor, symnode, constant,
        unspecialized NN module — for sourceless or other input types we
        rely on the treespec and tags for structural matching, so only
        types with well-defined comparison semantics are supported.

    When ``has_reuse_hash_fn`` is True, side-effect and mutation checks are
    skipped because the hash key replaces guards — there are no guards to
    go stale from mutations.
    
 z;subgraph_reuse: not eligible -- subgraph has side effects%sFzDsubgraph_reuse: not eligible -- output contains non-tensor types: %szJsubgraph_reuse: not eligible -- output type %s is not tensor or tuple/listz:subgraph_reuse: not eligible -- unsupported input VT typesT)side_effect_stackjoin	tracebackformat_listr   r   r   rE   r   r   r   itemstyper,   r:   )	rG   r   rb   r   r   r   	stack_msgitem
non_tensors	            r5   is_reuse_eligibler   j  s   6 ))5rww%%l&D&DE  I LLM .r>B&.)	F]L9	: 
dN3 J

 

 LLV XL!!	
 H	
 1
s   ,D/c           	         ddl m} g }|j                  D ]  \  }}|t        j                  k(  rt        |t              sJ |j                  j                  j                  j                  dd      }|t        j                  d        y|j                  t        j                  t        |      f       |t        j                  k(  r>t        |t               sJ |j                  t        j                  |j"                  f       |t        j$                  k(  r?t        |t&              sJ |j                  t        j$                  |j(                  f       C|t        j*                  k(  r#|j                  t        j*                  df       yt-        d| dt/        |      j0                   d       t3        |      }|j5                  d	 |j6                  D               t3               }	|D ]6  }
|	j5                  | j8                  j:                  j=                  |
             8 g }|	D ]  }|j>                  }
|jA                         }tC        j                  |      }|tD        u r=|t        |tF              rtI        d
| d|
jJ                   d      	 | j8                  jM                  |
      }tQ        tR        |      }|jU                  ||      }|j                  |
|||f        t        j                  dtW        |              ||||jX                  |      S # tN        $ r tI        d|
jJ                   d| d      dw xY w)a  Build an InvokeSubgraphReuseCondition from a traced subgraph.

    A reuse condition is a mix of two kinds of checks:

    1. **Input tag checks** (from flat_vts): For each flattened leaf VT,
       we record its tag (_VtTag.TENSOR/SYMNODE/CONSTANT/MODULE) and
       metadata (e.g. tensor shape/stride/dtype/device/requires_grad).
       At lookup time, the treespec ensures structural equivalence, and
       then we compare tags and metadata leaf-by-leaf.

    2. **Guard checks** (from traced_sources): During the subgraph trace,
       every source accessed via VariableBuilder is recorded. We look up
       all guards installed on those sources (and on the arg_sources) to
       build the set of guards that must be re-evaluated on cache hit.
       This is more robust than guard diffing because it catches guards
       that were already installed before the subgraph trace began.

    Raise if any guard type is unsupported, as a feedback for compiler
    developers to support that guard type.
    r   )r   example_valueNzKsubgraph_reuse: cannot build condition -- tensor input has no example_valuezUnexpected input tag 'z' for z/ -- is_reuse_eligible should have rejected thisc              3   &   K   | ]	  }||  y wr~   r3   ).0ss     r5   	<genexpr>z(build_reuse_condition.<locals>.<genexpr>  s     KQQ]qKs   z(subgraph_reuse: unsupported guard type 'z' on source ''z*subgraph_reuse: failed to resolve source 'z guardzNumber of guards %s)input_checksguardsr;   r   )-torch._guardsr   r8   r'   r/   rE   r   rj   rf   metagetr   r   rQ   r
   r0   r   sym_numr1   r   r   r2   AssertionErrorr   r,   rd   updater9   r   r   get_guards_for_sourceoriginating_sourcecreate_fn_namer   r   r   RuntimeErrornameresolve_source_value	Exceptionr   r   get_metadata_fnlenr;   )rG   rb   r   r   r   rS   rB   exampleall_sourcesall_relevant_guardsrP   guard_tuplesguardtype_strhandlerr   expecteds                    r5   build_reuse_conditionr     s   2 ;24L'' R(//!b.111hhmm((,,_dCGa 2I'2R STH$$$b/222
 !1!12:: >?H%%%b"2333!2!2BHH =>HOO#$ 78 (VDH4E4E3F G> ? /< n%KK+"9"9KK&)e S""299#3#3#I#I&#QRS HJL$ @))'')&**84j ?j2KL:8*MRXR]R]Q^^_` 	II226:E ~w/**5%8VWh>?7@: LL&L(9:'!%%%	 !  	<V[[MPXzY_`	s   L'Mold_arg_sourcesnew_arg_sourcesc                 `    t        | |      D ci c]  \  }}|
|||k7  r|| c}}S c c}}w )zOMap old arg sources to new arg sources for remapping captured variable sources.)zip)r   r   oldnews       r5   build_source_replacementr     sB     O_=C?s3#: 	S  s   *	conditionr   cached_entryc                    |j                   /|j                   |j                   k7  rt        j                  d       yt        |j                        t        |j
                        k7  r>t        j                  dt        |j                        t        |j
                               yt        t        |j                  |j
                              D ]  \  }\  \  }}\  }}||k7  rt        j                  d|||        y|t        j                  k(  rt        |t              sJ |j                  j                  j                  j                  dd      }	|	t        j                  d|        yt!        |	      }
|
|k7  st        j                  d|        y|t        j"                  k(  r#t        |t$              sJ |j&                  |us y|t        j(                  k(  st        |t*              sJ |j,                  |k7  s5|t        |j.                        k  r|j.                  |   nd}|t        |j.                        k  r|j.                  |   nd}|| y t1        |j.                  |j.                        d	t2        d
t2        ffdrt5        fd|j6                  D              }n|j6                  }t9        | |      rysy| j:                  j<                  j>                  | j:                  j<                  j@                  d}i }i }|jB                  D ]  \  }}}}|jE                        }||k(  r	 |jG                  |||      }|jU                  ||      rFt        j                  d|jK                         ||jL                  |||jN                  r/djQ                  |jN                  jS                                       yd        y y# tH        $ rj t        j                  d|jK                         ||jL                  |jN                  r)djQ                  |jN                  jS                               nd       Y  yw xY w)u  Check if a cached subgraph can be reused for the current call.

    Three-phase check:
    (1) Verify that intermediates (tensor metadata, symnode types, constant
        values) match the cached input_checks — these are lightweight
        structural comparisons that don't require source resolution.
    (2) Check for mutations on the remapped traced_sources — if any source
        the subgraph read has been mutated since the original trace, the
        cached guards would evaluate against stale values.
    (3) Build a source replacement mapping (old sources → new sources) and
        re-evaluate the snapshotted guards under the new sources.
    Nz1subgraph_reuse: reuse failed -- treespec mismatchFzMsubgraph_reuse: reuse failed -- input count mismatch: cached %d vs current %dzRsubgraph_reuse: reuse failed -- input %d tag mismatch: cached '%s' vs current '%s'r   zDsubgraph_reuse: reuse failed -- input %d tensor has no example_valuezAsubgraph_reuse: reuse failed -- input %d tensor metadata mismatchr   rC   c                 (    j                  | |       S r~   r   r   source_replacements    r5   replacement_fnz#is_reusable.<locals>.replacement_fn  s    !%%a++r4   c              3   @   K   | ]  }|j                          y wr~   )clone)r   r   r   s     r5   r   zis_reusable.<locals>.<genexpr>  s     X!aggn5Xs   TGLzsubgraph_reuse: reuse failed -- cannot resolve source
  guard type: %s
  guard source: %s
  guard source name: %s
  user stack:
%sr   z
<no stack>zsubgraph_reuse: reuse failed --
  guard type: %s
  guard source: %s
  guard source name: %s
  expected: %s
  got: %s
  user stack:
%s)+r;   r   r   r   r   r8   	enumerater   r'   r/   rE   r   rj   rf   r   r   r
   r0   r   r   r1   r   r   r9   r   r   r!   r   r   r   root_tx	f_globalsf_localsr   r   	get_valuer   r   r   
user_stackr   formateval_fn)rG   r   rb   r   i
cached_tag
cached_valcur_tagcur_vtr   cur_meta
cached_srcnew_srcremappedresolve_globalsresolve_localsresolve_cacherP   r   r   r   
new_sourcer   r   r   s                          @@r5   is_reusabler   %  s   & %+*>*>)BTBT*T?	
  9!!"c+*>*>&??[	&&'$$%	

 <EI""K$8$89= /!88$Z&7w  LLd	 (fn555ll'',,00$GGZ .w7H:%W 8+++fo666~~Z/8,,,f&6777||z)
 3|7788 !,,Q/  3{6677  ++A. 
 % _/!b 2  +"9"9,& ,V , Xy?W?WXX++H% 
 YY((YY'''O &(N')M,5,<,< +(5\\.1
 	((.-XE" uh/LL$ $$&## ((//12"  "" W+Z I  	LL$
 $$&## ((//12! 	s    OA/Q
Qc                     ddl m} | j                  j                  j                  j                  t        j                  j                        }t        ||      syt        |      }|duxr ||j                  v S )z?Cheap check: does the cache have any entries for this function?r   InvokeSubgraphCacheFN)r   r   r   tracing_contexthop_dispatch_set_cache	get_cachetorch_higher_order_opsinvoke_subgraphrE   r   subgraph_reuse_cache)rG   r|   r   invoke_subgraph_cachefn_codes        r5   has_reuse_entriesr     sn    
 2II55LLVV// +-@A&!G$X7.C.X.X#XXr4   c                 $    ddl m}  j                  j                  j                  j                  t        j                  j                        }t        ||      sy t        |      }|y dddt        dt        f fd}|j                  ||      S )Nr   r   condr   entryrC   c                      t        | |      S r~   )r   )r   r   rb   rG   s     r5   	evaluatorz#find_reuse_match.<locals>.evaluator  s     2t[%88r4   )r   r   r   r   r   r   r   r   r   rE   r   r   r?   find_reuse_entry)rG   r|   rb   r   r   r   r   s   ` `    r5   find_reuse_matchr     s    
 2II55LLVV// +-@A&!G
9,95M9	9
 !11'9EEr4   	body_name	body_gmodconfigp_args.r   max_reuse_entriesz#InvokeSubgraphReuseCondition | Nonehash_keyc           
         ddl m} |
du |du k7  sJ d       | j                  j                  j                  j                  t        j                  j                        }t        ||      syt        |      }|yt        | ||j                        }t        |t              }g t        j                  fd|       t!              }|D cg c]Y  }t        |t        j"                        r=|j$                  |j'                         |j(                  |j*                  |j,                  f[ }}t/        |||||||j0                  |      }|
|j3                  ||
||	       y|J |j5                  ||||	       yc c}w )a<  Save a traced subgraph into the reuse cache for future cache hits.

    Builds an InvokeSubgraphReuseEntry with the freevar mapping (how each
    lifted arg maps back to user inputs or captured variables), output
    metadata, and arg sources. On a future cache hit, stamp_out_subgraph
    uses this entry to emit a new invoke_subgraph call without re-tracing.

    Exactly one of ``condition`` or ``hash_key`` must be provided.
    ``condition`` stores the entry in the guard-based cache (linear scan);
    ``hash_key`` stores it in the hash-key cache (O(1) lookup).
    r   r   Nz5Exactly one of condition or hash_key must be providedc                 j    | j                         st        | t              rj                  |       S d S r~   )	is_tensorrE   r   rQ   )rB   user_output_vtss    r5   <lambda>z"save_reuse_entry.<locals>.<lambda>-  s/    <<>ZO< #))"-  r4   )r   r   r   subgraph_input_mappingsingle_tensor_outputoutput_metadatar9   num_user_outputs)r   r   r   r   r   r   r   r   r   rE   r   build_subgraph_input_mappingr8   r   r   visitr   Tensorshapestridedtypedevicerequires_gradr   r9   add_reuse_entryadd_reuse_entry_by_key)rG   r|   rb   r   r   r   r   r   r   r  r   r  r   r   r   r  r	  r  tr
  r   r  s                        @r5   save_reuse_entryr    s   2 28t#34 ?4 II55LLVV// +-@A&!G9
FK(( &fn=
 .0O	 		 ?+ a& 
!((*aggqxxAO  %51'  ++)E --Y'8	
 ###44Xu&7	
3s   AE8reuse_hash_fnSequence[VariableTracker]c                    ddl m} ddlm} | j                  j
                  j                  j                         5  	   || |      |i |}	 ddd       t        t              rt        |j                  t              st        d|       |j                  S # |$ r}t        d|       |d}~ww xY w# 1 sw Y   ixY w)u   Trace the user's reuse_hash_fn to get a constant integer hash key.

    Guards installed during the hash function tracing are skipped — the hash
    key itself is the reuse condition, not the guards.
    r   )UnsupportedrW   zAreuse_hash_fn must be fully traceable without graph breaks. Got: Nz2reuse_hash_fn must return a constant integer, got )torch._dynamo.excr  torch._dynamo.utilsrX   r   r   guards_contextskip_guard_installr   rE   r   r   rp   )rG   r  rH   rI   r  rX   resultes           r5   trace_reuse_hash_fnr"  X  s     .1		"	"	1	1	D	D	F 	5]2}5zLVLF f./z&,,PS7T@I
 	
 <<  	STUSVW	 s(   B5BB2B--B22B55B>c                     ddl m} | j                  j                  j                  j                  t        j                  j                        }t        ||      sy t        |      }|y |j                  ||      S )Nr   r   )r   r   r   r   r   r   r   r   r   rE   r   find_reuse_entry_by_key)rG   r|   r  r   r   r   s         r5   r$  r$  v  sm    
 2II55LLVV// +-@A&!G 88(KKr4   cachedc                     ddl m} ddlm}m} t        |      }|j                  }t        |j                  |       g }| j                  j                  j                  | j                  j                  j                  d}	i }
i }|j                  D ]+  }t        |t              r|j                  ||j                             3t        |t"              rddlm} | j                  j(                  j*                  |j,                     }t        ||      r4 |       }|| j                  j(                  j*                  |j,                  <   |j                  |       t        |t.              r|j0                  }|j2                  }|r} r{g }g }t5        ||      D ]R  \  }}|)|j7                   fd      }|j9                  |	|
|      }|}|j                  |       |j                  |       T t;        |      }t;        |      }| j                  j=                  |j>                  ||      }|j                  |jA                                t        |tB              s|jD                  } r|j7                   fd      }|j9                  |	|
|      }  || |      |      }|j                  |jA                                . | jF                  J | jF                  5  t;        d	 |jH                  D              }ddd        || |jJ                        }||jJ                  g|} || tL        jN                  jP                  t;        |      i |jR                        }|jT                  rD|jV                  }t        |d   tX              s J d
t[        |d         j\                          |d   S |jV                  }|j^                  }|dkD  r2|ta        |      k  r$ddl1m2} |jg                  | t;        |d|             S |S # 1 sw Y   xY w)aB  Emit a new invoke_subgraph call by stamping out a cached subgraph.

    Sources in the cached entry are parameterized: they refer to the original
    call's sources and must be rewritten to the current call's sources via
    source replacement before we can look up or create the corresponding
    graph placeholders.
    r   )VariableBuilder)add_call_function	make_attrr   )	LazyProxyNc                 (    j                  | |       S r~   r   r   s    r5   r  z$stamp_out_subgraph.<locals>.<lambda>  s    :L:P:PQRTU:V r4   c                 (    j                  | |       S r~   r   r   s    r5   r  z$stamp_out_subgraph.<locals>.<lambda>  s    8J8N8NqRS8T r4   c              3   \   K   | ]$  \  }}}}}t        j                  |||||        & yw))r  r  r  N)r   empty_strided)r   r  r  r  r  req_grads         r5   r   z%stamp_out_subgraph.<locals>.<genexpr>  sC      	
 7vufh & 	
s   *,zExpected tensor output but got    rU   )4rY   r'  rZ   r(  r)  rk   r9   r   r   r   r   r   r  rE   rm   rQ   rn   rz   torch._dynamo.output_graphr*  current_tracerbound_symbolsr{   ru   rw   rx   r   r   r   r=   synthetic_graph_inputrv   re   rr   rP   	fake_moder
  r   r   r   r   r   r	  r   r   r   r,   r  r   builderrV   r[   )!rG   rb   r%  r'  r(  r)  ri   r   new_lifted_argsr   r   r   subgraph_inputr*  rj   rw   rx   new_ctor_argsnew_ctor_arg_sourcesvalarg_srcr   rB   r   r   r   	body_noder   flat_variabler   nrV   r   s!                                   @r5   stamp_out_subgraphr@    s    @U#K0L!--O1&2D2DoVO YY((YY'''O &(N')M !77 (2nm4""<0D0D#EF(9:<II,,::>;N;NOE%+NS		((66~7J7JK""5)(=>&00I->>$6 "')$$'	3C$D 9LC*")--0V"W%//+^] #*!((-(//89 "-0	#()=#> 00&&	3CB ""2;;=1(<='..J!'--.TU
 ((.-XE0Z07B""2;;=1Q(2V <<###	 

 	
 ;A:P:P	
 	


 "f../I))<O<F%
//f
M ""##%(N3 	
-d58n.E.E-FG	
3 QxEA1uSZ. ''E%),<==Q

 

s   O;;Pr8   c                    i }d}|D ]Q  \  }}|t         j                  t         j                  fv s)|j                         j                  }||vsH|||<   |dz  }S g }|dd D ]  }	|j                  |	j                  d      }
|
dk\  r|j                  t        |
             @|	j                  j                  j                  dd      }||j                  nd}||j                  n%|	j                  j                  j                  dd      }t        |t        j                        r/|j                  t        |j                  j                               |6J d|	j                  j                    d	|	j                  j"                   d
       t        |t$              rK| j&                  j(                  j                  |      }|$|\  }}}|j                  t+        |||             |j                  t-        |              |S )aS  Build a mapping that records the origin of each lifted arg for a subgraph.

    On a cache hit, we stamp out a new invoke_subgraph call and need to
    reconstruct its argument list in the correct order. Each lifted arg
    (p_args[2:], skipping body_node and body_name) comes from one of:

    - LiftedUserArg: a user argument (intermediate activation or explicit input)
    - LiftedCapturedSource: a captured variable (e.g. a weight or parameter)
    - LiftedSyntheticObject: a TorchScriptObject with a SyntheticLocalSource
    - LiftedBoundSymbol: a SymInt already bound as a graph input
    r   r0     Ngraphargr   zFreevar has no source: node.op=z node.name=zP -- this likely means a function argument was not included in the proxy matching)r'   r/   r0   re   rf   r   rQ   rm   r   rP   r   rE   r   SymIntrz   r{   opr   r   r   synthetic_source_ctor_inforu   rr   )rG   r   r8   proxy_node_to_idxidxrS   rB   rf   r  outer_proxymatched_idxrD  rP   r   	ctor_inforv   rw   rx   s                     r5   r  r    s     35
C R8??H$4$455;;=%%D,,*-!$'q 57abz #H'++K,<,<bA!"))-*DE"'',,00TBH(0(<X__$F '    %%**..E 
 '5<<0&--.?@Q@Q.RS% 1+2B2B2E2E1F G(--223 4KL%
 &"67II@@DDVL	(;D8GY(8*11-gyBRS "))*>v*FGG#HH "!r4   c                        e Zd ZdZdZdZdZdZdZddde	ddd	e
ee	f   d
ededef fdZddddd	e
ee	f   de	fdZ xZS )!InvokeSubgraphHigherOrderVariablez&torch.ops.higher_order.invoke_subgraphFTrG   r#   fn_vtrH   r  rI   r   	attr_namerC   c           	         t        |t        t        f      s(t        dt	        |      dg t
        j                         |j                  j                  j                  j                  t        j                  j                        }t        |t              r5|j                         j                  }|j                         j                   }	n\t        |t              sJ |j"                  j$                  j&                  j                  }|j"                  j$                  j                   }	g }
|r|j)                  |      }
|}t+        |
      D ]b  }||j                  j,                  v sJ |j                  j,                  |   }|j.                  sJ ddlm}  ||	|||j.                        s`|c S  t4        | m  |||||d      }t8        j;                  d|	||	t=        |
      dz          |r|j?                  ||       |S )	NzIEncountered non user function variable during invoke_subgraph HOP tracingz;invoke_subgraph does not support non user function variablegb_typecontextexplanationhintsr   )are_same_graph_modulessubgraphzZ%s: Installing subgraph with identifier '%s', bringing total count for '%s' function to %sr0  ) rE   r   r   r	   strr   SUPPORTABLEr   r   r   r   r   r   r   r   r   r,   r   r   r   get_dynamo_installed_submodulesreversed
nn_modulesr5  rZ   rW  super install_subgraph_in_output_graphr   r   r   add_dynamo_installed_submodule)selfrG   rO  rH   rI   r   rP  r   r   fn_namepreviously_installed_submodulescurrent_modsubmodule_nameprevious_modrW  r   	__class__s                   r5   r_  zBInvokeSubgraphHigherOrderVariable.install_subgraph_in_output_graphK  s    %"?AU!VWcE
Y6)556	 II%%<<FF''77 	 e12((*33G((*33Ge%BCCCkk))22;;Gkk))22G*,' %EEgN , $K #++J"K *%)=)====!yy33NC||#| *\; *)* G<z69j
	 	h/014	
 !!@@)Tr4   argsc                    ddl m} ddlm} |d   }|dd  }d }d}	d }
t	        |d      r8	 |j                         }t        |dd       }t        |dd      }	t        |d	d       }
|j                  j                   }|r~|
| |d      5  t        ||
||      }d d d        t        ||      }|t        j                  d|||j                          t#        |||      } |d      5  t%        |||      cd d d        S |ryt'        ||      rm |d      5  t#        |||      }t)        |||      }d d d        @t        j                  d||j                           |d      5  t%        ||      cd d d        S | j*                  J  |d      5  | j-                  ||||| j*                        \  }}}}}}}}d d d        t/              dkD  r%t1        dd| d| dg t2        j4                         t7        |t8              r|j:                  d<   d   g|dd  }|rt#        |||      }|
?j<                  }t?        ||||d      stA        d      tC        ||||||||	       n@j<                  }t?        ||||      r%tE        |||      }|tC        ||||||||	|        ||tF        jH                  jJ                  tM        |      ||      S # t        $ r t        j                  d
d        w xY w# 1 sw Y   nxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w)Nr   )dynamo_timed)*_call_function_with_auto_output_flatteningr0     r    __marked_compile_region_config__+__marked_compile_region_max_reuse_entries__'__marked_compile_region_reuse_hash_fn__zYFailed to extract nested_compile_region() config from InvokeSubgraphHigherOrderVariable. T)exc_infoinvoke_subgraph_reuse_hash_fnz?subgraph_reuse: hash key %d hit for '%s', reusing subgraph '%s'invoke_subgraph_reuse_stamp_outinvoke_subgraph_reuse_lookupz9subgraph_reuse: cache hit for '%s', reusing subgraph '%s'invoke_subgraph_tracez"invoke_subgraph: kwargs unexpectedzargs: z
, kwargs: z3kwargs should have been flattened into lifted args.rR  nested_region_config)r   zreuse_hash_fn was provided but the subgraph is not eligible for reuse. Check the logs with TORCH_LOGS='+hierarchical_compile' for details.)r  )r   )r   )'r  rj  rZ   rk  hasattrr   rR   r   logwarningr   exportr"  r$  r   r   r   rN   r@  r   r   	_HOP_NAMEcreate_wrapped_noder   r	   r   
DYNAMO_BUGrE   r   r   r   r   r   r  r   r   r   r   r=   )ra  rG   rh  rI   rj  rk  r|   rH   r   r  r  fnreuser  r%  rb   matchr   p_kwargsr   r   r   r   body_graph_output_vtsr   r   r   s                              r5   _call_functionz0InvokeSubgraphHigherOrderVariable._call_function  s&    	5	
 a!"X
6>*((* %GN$+Eq%! !(A4! II$$$ ].=> V.r=*fUV -RBF!U$$	 6b*fM!"CD G-b+vFG G (V4<= 5b*fM(  OOO
 ""CD F-b+uEF F ~~)))12 
	Y ((VZX	%
	Y x=1< j9Q&11	 f895;INN12 1I
 ABZ
 1"j&IK(!-!<!<( "&* 'J 
 !!%% ".!<!<$\> !6#&!I
 !,("'%%""")-&/ :##33&M!	
 		
S  o!   V VG G F F
	Y 
	YsG   7K K?!LLL&	)L3"K<?L	LL#&L03L=)r,   r-   r.   rz  _ALLOW_FALLBACK_TO_EAGERsupports_input_mutationsupports_aliasingallow_side_effectsfilter_aliased_intermediatesr   dictrY  r   r_  r  __classcell__)rg  s   @r5   rN  rN  ?  s    8I$" $( F#F F 0	F
 S/)*F F F 
FPm
#m
 *m
 S/)*	m

 
m
r4   rN  )NF)rl  NN)hro   enumloggingr   typesdataclassesr   typingr   r   r   r   r   torch._higher_order_opstorch._dynamor   r  r	   torch._dynamo.guardsr
   r   r   r   r   torch._dynamo.sourcer   torch._dynamo.variables.baser    torch._dynamo.variables.constantr   !torch._dynamo.variables.functionsr   rZ   r   torch._dynamo.variables.listsr   r   !torch._dynamo.variables.nn_moduler   torch._dynamo.variables.tensorr   r   r   r   r   r   r   'torch._higher_order_ops.invoke_subgraphr   torch.fx.graph_moduler   torch.fx.proxyr   torch.utilsr    r@   torch.utils._ordered_setr!   collections.abcr"   torch._dynamo.symbolic_convertr#   r$   	getLoggerr,   rw  _logginggetArtifactLoggerr   Enumr'   r7   rF   r  rY  rN   rK   rL   r<   rk   rm   rr   ru   rz   LiftedArgOriginCodeTyper   r?   r   r   r   r   r   r   r   fxr=   rp   r  r"  r$  r@  r  rN  r3   r4   r5   <module>r     sC       ! 7 7   + +  6 8 = B L E K J  O -   ) / (DLg!		)	)(4J	Kltyy ,z ,
C 
HtO 
AAA cNA 	A83s 3/? 3JJJ cNJ 	JB
"2 
tE{ 
       - - - 
 
 
 ((+@@CTT 
  5 v& 
2 15#DDD "D (	D
 v&-D D 
DNee!e v&e "D(	eP	&4-(	&4-(	 
&&.	gg-g "g +	g
 
gTYYY 
Y FFF "F $	FJ 7;[
[
[
 "[
 	[

 xx##[
 '-[
 #s(O[
 [
 [
 [
 5[
 Dj[
 
[
| , o%&	
 	<LLL L $	L$ss!s %s 	sl>">"#s(O>" 5?234>" 
/	>"BA
(? A
r4   