
    9j                    %   U d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	Z	d dl
Z	d dlmZ d dlmZmZmZmZ erd dlmZ d dlmZ e j,                  j/                  dd	      d	k(  Zd
Zdedz  fdZdedz  fdZdedz  fdZdedz  fdZdedz  fdZdedz  fdZ defdZ!defdZ"e j,                  j/                  dd	      d	k(  Z#d
Z$dZ%d
Z& e       rdndZ' e(e j,                  j/                  dd            Z)e(e*d<    edddd      Z+ee*d<   d
Z,ee*d<    e       Z-edz  e*d <    e       Z.edz  e*d!<    ed"d#d$      Z/ee*d%<   dZ0ee*d&<    e       Z1edz  e*d'<    e       Z2edz  e*d(<    ed)*      Z3ee*d+<   d
Z4ee*d,<   i Z5e6e7e7f   e*d-<   dZ8e(dz  e*d.<   d/Z9ed0   e*d1<   e j,                  j/                  d2d3      d	k(  Z:ee*d4<   e j,                  j/                  d5d3      d	k(  Z;ee*d6<   e j,                  j/                  d7d3      d	k(  Z<ee*d8<   e j,                  j/                  d9 e       rd3nd	      d	k(  Z=ee*d:<   e j,                  j/                  d;d	      d	k(  Z>e j,                  j/                  d<d	      d	k(  Z?d
Z@dZAe j,                  j/                  d=d	      d	k(  ZBe j,                  j/                  d>      d	k(  ZCe j,                  j/                  d?      d	k(  ZDe j,                  j/                  d@d	      d	k(  ZEe j,                  j/                  dA e       rd3nd	      d	k(  ZFdZGdZHdZIe j,                  j/                  dBd3      d	k(  ZJe j,                  j/                  dC      d	k(  ZKe j,                  j/                  dDdE      ZLedF   e*dG<   dZMdZN e"       ZOd
ZPd
ZQdZRd
ZSdZTe	j                  j                  j                  e*dH<   dZXe	j                  j                  j                  e*dI<   dZYe	j                  j                  j                  e*dJ<   dZ[e	j                  j                  j                  e*dK<   dZ\e	j                  j                  j                  e*dL<   dZ]e	j                  j                  j                  e*dM<   dZ^ee_dN   ge_dN   f   dz  e*dO<   dZ`ee_dN   ge_dN   f   dz  e*dP<   dZad
Zbd
Zcd
ZddZei Zfe6e7e6e7ef   f   e*dQ<   i Zge6e7e6e7ef   f   e*dR<   dZhe j,                  j/                  dSd	      d	k(  Zid
ZjdZkdZld
dTdUddVZme6e7ef   e*dW<   dXZnedY   e*dZ<   d
Zog Zpe_e7ee_dN   ge_dN   f   z     e*d[<   dZqe(dz  e*d\<   dZrd
Zsd Zte(e*d]<   d^Zued_   e*d`<   dZvee(ge(f   dz  e*da<   dbZwedc   e*dd<   d^Zxede   e*df<   dZyee(ge(f   dz  e*dg<   dbZzedc   e*dh<   d^Z{ede   e*di<   dZ|ee(ge(f   dz  e*dj<   dbZ}d
Z~ee*dk<   dZdlZdmZ eddndop      Zee*dq<   e j,                  j/                  dr      d	k(  Ze j,                  j/                  ds      d	k(  Ze j,                  j/                  dt      d	k(  Ze j,                  j/                  du      d	k(  Ze j,                  j/                  dv      d	k(  Z e( e j                  dwdl            Z e( e j                  dxdy            Zde(dz  fdzZ e       Ze(dz  e*d{<   e j,                  j/                  d|d	      d	k(  Ze j,                  j/                  d}d3      d	k(  Ze j,                  j/                  d~d3      d	k(  Ze j,                  j/                  d e       sd	nd3      d	k(  Zee*d<   dZde*d<   g Ze_e7   e*d<   g Ze_e7   e*d<   d
Zee*d<    eddd
$      Zee*d<   g Ze_e(   e*d<   e j,                  j/                  dd      j/                         Zde(dz  fdZ e       Ze(dz  e*d<   e j,                  j/                  dd      j/                         Ze j,                  j/                  dd      j/                         Zed   e*d<   e j,                  j/                  dd      j/                         Zed   e*d<   d
Zee*d<   d
Zee*d<   dZee*d<   dZee*d<   dbZed   e*d<   dZee*d<   dZee*d<   e j,                  j/                  dd3      d	k(  Zee*d<   d
ZdZd
Ze j,                  j/                  d      d	k(  Ze j,                  j/                  d      d	k(  ZdZdZdZe j,                  j/                  d      d	k(  Z e(e j,                  j/                  dd            Z ee j,                  j/                  dd            Ze j,                  j/                  d      d	k(  Ze j,                  j/                  d      d	k(  Z e(e j,                  j/                  dd	            Zd Zd Z G d d      Z G d d      Ze j,                  j/                  dd3      d	k(  Zde7defdZde7defdZde7defdZe j,                  j/                  dd      Ze	j|                  j~                  sd	nd3Ze j,                  j/                  de      d	k(  Ze j,                  j/                  dd3      d	k(  Ze j,                  j/                  dd3      d	k(  Ze j,                  j/                  dd	      d	k(  Ze j,                  j/                  d      d	k(  ZdZdZdZdZe(dz  e*d<    eddë      Zee*d<   d
Zd
Zd
ZdZe j,                  j/                  dū      d	k(  ZdZeg df   dz  e*d<   d
Ze j,                  j/                  dȫ      d	k(  Zee*d<   e j,                  j/                  dʫ      d	k(  Zee*d<   e j,                  j/                  ddͫ      Ze j,                  j/                  d e       rd3nd	      d	k(  Zee*d<   dZee*d<   dZe j,                  j/                  dd	      d	k(  ZdUZdZdZdZdZe(dz  e*d<   dZd
ZdZd
Zd
Z e j                  dd	      d	k(  Z e j                  d׫      d	k(  Z e(e j,                  j/                  dd             Ze j,                  j/                  dd3      d	k(  ZdZd
ZdZd
Zd
Zd
Zd
ZdUZdUZdZdZd
Zd
ZdZd
Zde	j                  v xs de	j                  v Z e       xs eZe j,                  j/                  dd	      d	k(  ZdZe7dz  e*d<   dZe7dz  e*d<    eddͬë      Ze7e*d<   de7fdZ e       Ze7e*d<   dZe(e*d<    eddd$      Z ee*d<    edd
ë      Zee*d<   d
ZdlZddgZe_ed   e7z     e*d<   d
Zee*d<   e j,                  j/                  dd3      d	k(  ZdZe(e*d<   dZe(e*d<   dZ	ee*d<    G d d      Z
 G d d      ZdefdZde(fdZ e       rdn e       Ze(dz  e*d<    eddd$      Zee*d <    ed      Ze(e*d<    e!       Zee*d<    ed*      Zee*d<    eddd
$      Zee*d	<   e j,                  j/                  d
d3      d	k(  Zee*d<    ed*      Zee*d<   e7dz  e*d<    e       rv	 d dlmZ erQ ej2                  e j4                  j7                  ej9                  de j:                        d            Zn ej2                  d      ZndZdѐZ!e j,                  j/                  dd	      d	k(  Z"e j,                  j/                  dd	      d	k(  Z#d
Z$d
Z%dZ&d
Z'dZ(dZ)d
Z*dZ+d
Z,ee*d<   e j,                  j/                  dd3      d	k(  Z-d
Z.d
Z/d
Z0d
Z1 e        Z2ee*d<   e j,                  j/                  ddͫ      Z3e3dk7  Z4e3d	k(  rdne3Z5e j,                  j/                  dd      Z6e7dz  e*d<   e j,                  j/                  d      d	k(  Z7d
Z8e j,                  j/                  dd3      d	k(  Z9ee*d<   d
Z:ee*d<   d
Z;ee*d<   d
Z<ee*d <   d
Z=ee*d!<   d
Z>ee*d"<   d
Z?ee*d#<   d
Z@ee*d$<   d
ZAee*d%<   e j,                  j/                  d&d3      d	k(  ZBe j,                  j/                  d'd3      d	k(  ZCee*d(<   dZDee*d)<   i ZEe6e7e6e7ef   f   e*d*<    e(e j,                  j/                  d+d,            ZFe(e*d-<   d
ZGee*d.<   de7dz  fd/ZH ed0dͬë      ZIe7e*d1<    G d2 d3      ZJ G d4 d5      ZK G d6 d7      ZL G d8 d9      ZM G d: d;      ZN G d< d=      ZO eeO       G d> d?eO             ZP eeO       G d@ dAeO             ZQ G dB dC      ZRd5ZSedD   e*dE<   d7ZTedF   e*dG<   dHZUedH   e*dI<   d7ZVed7   e*dJ<    G dK dL      ZW G dM dN      ZX G dO dP      ZYg dQZZe_e7   e*dR<   g dSZ[e_e7   e*dT<   dgZ\e_e7   e*dU<   g Z]e_ee	j                  e	j                  e	j                  gdf      e*dV<    e j                  dWd	      d	k(  Z_ G dX dY      Z` G dZ d[      Zaerd d\lb  G d] d^      Zce j,                  j/                  d_d3      d	k(  Zdee*d`<    eej                  ef          y# ee f$ r dZY )w xY w(a      N)Callable)AnycastLiteralTYPE_CHECKING)	is_fbcode)Configget_tristate_envinherit_fields_frominstall_config_module)InductorChoices)CUDAGraphPolicyTORCHINDUCTOR_INPLACE_PADDING1Freturnc                      t        d      S )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr
        V/media/conek/DATA/Code/OCR/venv/lib/python3.12/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_defaultr          ABBr   c                      t         j                  j                  d      dk(  ryt         j                  j                  d      dk(  ryy )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   vec_isa_ok_defaultr       s7    	zz~~01S8	zz~~01S8r   c                      t        d      S )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   autotune_remote_cache_defaultr#   %   r   r   c                      t        d      S )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   %bundled_autotune_remote_cache_defaultr&   )   s    IJJr   c                  :    t        dt               sd      S d       S )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r
   r   r   r   r   )bundle_triton_into_fx_graph_cache_defaultr)   -   s&    9K %) r   c                      t        d      S )N&TORCHINDUCTOR_AUTOTUNE_AT_COMPILE_TIMEr   r   r   r    autotune_at_compile_time_defaultr,   4   s    DEEr   c                      d} dt         j                  v r"t         j                  j                  d      dk(  S t               r$t        j
                  j                  d      }|| k  S y)N   &TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionTr   r   r   r   torch_utils_internaljustknobs_getval_int)STATIC_CUDA_LAUNCHER_VERSIONversions     r   static_cuda_launcher_defaultr6   8   s^    #$ /2::=zz~~FG3NN	''<<;
 666 r   c                      d} dt         j                  v r"t         j                  j                  d      dk(  S t               r&d}t        j
                  j                  |      }|| k  S y)Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr0   )ENABLE_PROLOGUE_FUSION_VERSIONjk_namer5   s      r   prologue_fusion_enabledr;   G   s\    %&"&"**4zz~~=>#EE	<''<<WE888r   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2T-/logs/dedicated_log_torch_compile_worker_rank,TORCHINDUCTOR_PRECOMPILATION_TIMEOUT_SECONDSi,  precompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cache$TORCHINDUCTOR_FX_GRAPH_CACHE_DEFAULTTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_defaultenv_name_forcedefaultfx_graph_cacheremote_gemm_autotune_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITE)rB   rD   rE   non_blocking_remote_cache_writeautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cachez*torch.compiler.config.force_disable_caches)aliasforce_disable_caches&unsafe_skip_cache_dynamic_shape_guards!unsafe_marked_cacheable_functionssleep_sec_TESTING_ONLYneeds_fixed_stride_order)rT   flexible_layout'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapper(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEcpp_wrapper_build_separateTORCHINDUCTOR_FX_WRAPPER
fx_wrapper*TORCHINDUCTOR_CPP_CACHE_PRECOMPILE_HEADERScpp_cache_precompile_headersTORCHINDUCTOR_ONLINE_SOFTMAX$TORCHINDUCTOR_APPLY_GUMBEL_MAX_TRICKTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTS(TORCHINDUCTOR_RUNTIME_TRITON_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_ALIGNMENT_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)noneri   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passcustom_partitioner_fnjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_pass_post_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)rE   tritonatenr   mixed_mm_choice'reorder_for_compute_comm_overlap_passesreorder_prefetch_limit&size_threshold_for_succ_based_strategyrj   )rj   all	only_fsdpbucket_all_gathers_fx.bucket_all_gathers_fx_bucket_size_determinatorrE   )rE   
custom_opscustom_ops_multidtypebucket_all_gathers_bucket_moderj   r   bucket_reduce_scatters_fx2bucket_reduce_scatters_fx_bucket_size_determinator"bucket_reduce_scatters_bucket_modebucket_all_reduces_fx.bucket_all_reduces_fx_bucket_size_determinator!runtime_estimations_mms_benchmark   g      I@*TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)rE   rD   rB   use_experimental_benchmarker+TORCHINDUCTOR_DISTRIBUTED_MAX_AUTOTUNE_GEMM&TORCHINDUCTOR_PIPELINE_GEMM_AUTOTUNINGTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM%TORCHINDUCTOR_DEFAULT_AUTOTUNE_WARMUP"TORCHINDUCTOR_DEFAULT_AUTOTUNE_REPd   c                      t         j                  j                  d      } | y| j                         dv ry t	        |       S )N,TORCHINDUCTOR_AUTOTUNE_NUM_CHOICES_DISPLAYED
   r   r   r   r   lowerintenv_vals    r   '_autotune_num_choices_displayed_defaultr     s8    jjnnKLG}}/)w<r   autotune_num_choices_displayed/TORCHINDUCTOR_MAX_AUTOTUNE_REPORT_CHOICES_STATS<TORCHINDUCTOR_MAX_AUTOTUNE_PRUNE_CHOICES_BASED_ON_SHARED_MEM-TORCHINDUCTOR_TRITON_DISABLE_DEVICE_DETECTIONTORCHINDUCTOR_GRAPH_PARTITIONgraph_partitionzCUDAGraphPolicy | Nonecudagraph_policycustom_should_partition_opscudagraph_unsafe_unbacked_ops#max_autotune_allow_flexible_layoutsz%pytorch/compiler:force_same_precision"TORCHINDUCTOR_FORCE_SAME_PRECISIONforce_same_precisionmulti_kernel_hints(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPPc                  ~    t         j                  j                  dd      } | j                         dv ry t	        |       S )N*TORCHINDUCTOR_NVGEMM_MAX_PROFILING_CONFIGS5r   r   r   s    r   %_nvgemm_max_profiling_configs_defaultr   n  s2    jjnnI3OG}}/)w<r   nvgemm_max_profiling_configs(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)r   
EXHAUSTIVEmax_autotune_gemm_search_space,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEmax_autotune_flex_search_spacefallback_by_defaultselective_decomposeuse_dceuse_pre_grad_passes)earlylaterE   pre_grad_pass_timinguse_joint_graph_passesuse_post_grad_passesCUTEDSL_ENABLE_AUTOTUNINGcutedsl_enable_autotuningi    TORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        #TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE(TORCHINDUCTOR_COLLECTIVE_BENCHMARK_NRUNS50*TORCHINDUCTOR_COLLECTIVE_BENCHMARK_TIMEOUT30'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUSc                  d    t         j                  j                  dd      j                  d      } | S )N#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT ,r   r   r   split)collect_envs    r    _parse_autoheuristic_collect_envr     s)    **..!FKQQRUVKr   c                  d    t         j                  j                  dd      j                  d      } | S )NTORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mmr   r   )use_envs    r   _parse_autoheuristic_use_envr     s(    jjnn>
KQQRUVGNr   c                   4    e Zd ZdZd e       v Zd e       v Zy)autoheuristic_collectzT
    Config for which autoheuristic optimizations should collect training data.
    pad_mmr   N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r     s%     9;;F=??Hr   r   c                   4    e Zd ZdZd e       v Zd e       v Zy)autoheuristic_usezU
    Config for which autoheuristic optimizations should use learned heuristics.
    r   r   N)r   r   r   r   r   r   r   r   r   r   r   r   r     s%     577F=??Hr   r   'TORCHINDUCTOR_RUN_JIT_POST_COMPILE_HOOKnamec                 2    t        |       xs t        |       S N)collect_autoheuristicuse_autoheuristicr   s    r   run_autoheuristicr     s     &A*;D*AAr   c                 n    | dk(  rt         j                  S | dk(  rt         j                  S | t               v S Nr   r   )r   r   r   r   r   s    r   r   r     s;    x$+++		$--- 7999r   c                 n    | dk(  rt         j                  S | dk(  rt         j                  S | t               v S r   )r   r   r   r   r   s    r   r   r     s;    x '''		 ))) 3555r   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT#TORCHINDUCTOR_CACHE_SDPA_CONSTRAINT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT          realize_acc_reads_size_threshold)TORCHINDUCTOR_DELAY_REALIZE_CHEAP_OUTPUTSrD   rE   delay_realize_cheap_outputs.TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTr   inductor_choices_classTORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLESr   (TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusionloop_index_inversion_in_fusionr   'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   g?max_fusion_unique_io_buffersTORCHINDUCTOR_SPLIT_REDUCTIONSTORCHINDUCTOR_DETERMINISTICTORCHINDUCTOR_MIN_NUM_SPLITTORCHINDUCTOR_BENCHMARK_KERNEL   devgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesTORCHINDUCTOR_DISABLED_PASSESdisabled_passesc                  r    dt         j                  v rt         j                  d   } nd} | dv s
J d|         | S )NTORCHINDUCTOR_WORKER_START
subprocess)r   forkspawnzInvalid start method: )r   r   )start_methods    r   decide_worker_start_methodr$    sR    #rzz1zz">?#   / 
 ~.	/ 
 r   worker_start_methodi   small_memory_access_thresholdz(pytorch/compiler:worker_suppress_logging%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGworker_suppress_loggingLOG_TLPARSElog_tlparsefuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tp)TORCHINDUCTOR_PARTITIONED_SCATTER_ENABLEDr.   "partitioned_scatter_min_partitions   "partitioned_scatter_max_partitionsg?!partitioned_scatter_memory_budgetc                   *    e Zd ZU dZeed<   dZeed<   y)_collectiveFauto_selecti   #one_shot_all_reduce_threshold_bytesN)r   r   r   r6  bool__annotations__r7  r   r   r   r   r5  r5  f  s    K/9'9r   r5  c                      e Zd ZU dZdZeed<   dZedz  ed<   dZedz  ed<   dZ	e
dz  ed<   dZedz  ed<   dZeej                   j"                  gedz  f   dz  ed	<   d
Zed   ed<   dZed   ed<   dZedz  ed<   dZedz  ed<   dZedz  ed<   dZe
dz  ed<   dZeed<   dZedz  ed<   dZeed<   dZedz  ed<   dZed   dz  ed<   dZeed<   dZeed<   dZ ed   ed<   dZ!eed<   y) aten_distributed_optimizationszDConfiguration for distributed optimization passes on ATen FX graphs.Fenable_overlap_schedulingNcollective_bucketinginsert_overlap_depsmax_compute_pre_fetchcompute_overlap_multiplercustom_runtime_estimation
analytical)rB  	benchmarkcollective_estimatorrC  compute_estimatormax_memory_increase_gbmax_memory_increase_ratiomax_in_flight_gbmax_coll_distance!log_final_collectives_estimationsbucket_exposed_firstbucket_only_internode_commsenable_fusion_regions)rE   r   r   	coalescedbucket_modeT&prioritize_bucketing_during_scheduling
spmd_checkwarn)rR  errorspmd_mismatch!overlap_scheduling_autofix_cycles)"r   r   r   r   r<  r8  r9  r=  r>  r?  r   r@  floatrA  r   r1   fxNoderD  r   rE  rF  rG  rH  rI  rJ  rK  rL  rM  rO  rP  rQ  rT  rU  r   r   r   r;  r;  k  sz   N ',t+ )-$+, (,+ )-3:,.2ut|2 QUx(EFMT @L'";<K =Hw89G ,0EDL/.2ut|2 &*edl) %)sTz(.3%t3 )-$+, ).-
 *.4$;- 	 MNQUU
 48*D7
 J
 /5M7?+4
 /4%t3r   r;  c                  R    d} d}t         j                  j                  |      }| |k\  S )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    ry   z0pytorch/inductor:enable_parallel_compile_version)r1   r2   r3   )ENABLE_PARALLEL_COMPILE_VERSIONr:   r5   s      r   #parallel_compile_enabled_internallyr[    s0     '(#@G##88AG*g55r   c                     ddl } | j                  t              }dt        j                  v r0t        t        j                  d         }|j                  d|       |S t        j                  dk(  rd}|j                  d       |S t               rt               sd}|j                  d       |S t        j                  j                         }|sJ t        d	|      }|j                  d
|       |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32ry   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcode    zcompile_threads set to %d)logging	getLoggerr   r   r   r   infosysplatformr   r[  r1   _utils	cpu_countmin)r`  logcompile_threadsrf  s       r   decide_compile_threadsrj    s      

H
%C&"**4bjj)HIJ4oF  
	 56  
@B56  LL**,	yb),,o>r   ri  z+pytorch/inductor:quiesce_async_compile_pool(TORCHINDUCTOR_QUIESCE_ASYNC_COMPILE_POOLquiesce_async_compile_pool<   )rE   quiesce_async_compile_timeuse_static_cuda_launcherz/torch._inductor.config.use_static_cuda_launcheruse_static_triton_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernels7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELS)static_launch_user_defined_triton_kernels)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERstrict_static_cuda_launcherz2torch._inductor.config.strict_static_cuda_launcherstrict_static_triton_launcherglobal_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   force_shape_padTORCHINDUCTOR_PERMUTE_FUSION_use_fp64_for_unbacked_floatsTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmwrap_inductor_compiled_regionsassume_aligned_inputsassume_32bit_indexing.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_training)enable_caching_generated_triton_templatesautotune_lookup_tableTORCHINDUCTOR_FILE_LOCK_TIMEOUT600file_lock_timeoutenable_autograd_for_aotc                      d } t               rGt        j                  j                  dd       }t        j                  j                  dd      }|d| } | S )NMAST_HPC_JOB_NAME	ROLE_RANKr   r=   )r   r   r   r   )log_locmast_job_nameglobal_ranks      r   get_worker_log_pathr    sL    G{

':DAjjnn[#6$Ek]SGNr   TORCHINDUCTOR_WORKER_LOGPATHtorchinductor_worker_logpathc                       e Zd ZU ej                  j                  d      dk(  Zeed<   dZ	e
ed<   dZe
ed<   ej                  j                  d      ! e
ej                  j                  d            nd	Ze
d	z  ed
<   y	)auto_chunkerTORCHINDUCTOR_AUTO_CHUNKERr   enablei   output_size_thresholdr   amplify_ratio_threshold TORCHINDUCTOR_CHUNKER_NUM_CHUNKSN	num_chunk)r   r   r   r   r   r   r  r8  r9  r  r   r  r  r   r   r   r  r    sz    ::>>">?3FFDF "-3, $%S$ ::>><=I 	BJJNN=>? sTz r   r  c                      e Zd ZU dZdZej                  j                  dd      dk(  Zej                  j                  dd      dk(  Z	dZ
edz  ed<    eej                  j                  d	d
            Zdej                  j                  dej                  dk(  rdnd      fZedef   ed<   ej                  j                  dd      dk(  Zej                  j                  dd      dk(  ZdZedz  ed<   dZedz  ed<    ed      Zedz  ed<   dZed   ed<    eej                  j                  dd            Zej                  j                  dd      dk(  Zej                  j                  dd      dk(  Zej                  j                  dd      Zej                  j                  dd      dk(  Z d Z! eej                  j                  d!d            Z"ej                  j                  d"d      Z#ej                  j                  d#d      Z$d$Z%d Z&ej                  j                  d%d      dk(  Z'd Z(ej                  j                  d&d      dk(  Z)ej                  j                  d'd      dk(  Z*d(Z+y))cppzu
    Settings for cpp backend.
    This class provides a centralized location for managing cpp backend settings.
    $TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE512CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr1   r  inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH%TORCHINDUCTOR_CPP_FORCE_INLINE_KERNEL-TORCHINDUCTOR_CPP_USE_CONSTEXPR_FOR_INT_ARRAYr{  ),r   r   r   r   threadsr   r   r   no_redundant_loopsdynamic_threadsr  r   r9  min_chunk_sizerc  rd  r  tuplestrenable_kernel_profileweight_prepackr  r  r
   r  r8  r  r   max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearuse_decompose_tanhuse_small_dequant_bufferforce_inline_kerneluse_constexpr_for_int_arrayuse_two_step_variance_thresholdr   r   r   r  r    s    G
 	

=sCsJ  jjnn%H#NRUUOGS4Z(JERSN 	


u3<<8+CiOCtSy	  	

@#F#M 
 ZZ^^$FLPSSN
 04 #*304!3:4 //IJJtJ 	 wHI 
 "%


EtL" 	

FLPSS   	

FLPSS   +-**..?+' 	

BCHCO 
 $)  BJJNN+PRUVW **..)PRVW **..)PRVW   !
 	

=sCsJ 
  % 	

>DK  	

FLPSS  
 '+#r   r  c                   l   e Zd ZU dZej
                  j                  d      dk(  ZdZdZ	dZ
eeeedf   z     dz  ed<   d	ZdZdZdZ e        Zd
ZdZedz  ed<   dZdZ edd      Zeed<   dZeed<   dZdZdZdZdZ ej
                  j                  d e       sdnd      dk(  Z!eed<   dZ"edz  ed<   dZ#eed<   dZ$dZ% e&       Z'edz  ed<   dZ(eed<   dZ)eed<    ejT                  dd      dk(  Z+eed<   dZ,dZ-ej
                  j                  dd      dk(  Z.ej
                  j                  dd      dk(  Z/dZ0e1d   ed<   ej
                  j                  d d      dk(  Z2ej
                  j                  d!d      dk(  Z3eed"<   ej
                  j                  d#d      dk(  Z4dZ5 eej
                  j                  d$d            Z6e1d%   ed&<   ej
                  j                  d'd      dk(  Z7d(Z8dZ9e:jv                  jx                  rd)nd*Z=eed+<   dZ>dZ?dZ@dZAeBdz  ed,<   dZCej
                  j                  d-d      dk(  ZDej
                  j                  d.d      dk(  ZEej
                  j                  d/d      dk(  ZFdZG eej
                  j                  d0e:jv                  jx                  rdnd1            ZH eej
                  j                  d2d3            ZIej
                  j                  d4d      dk(  ZJej
                  j                  d5 e       rdnd      dk(  ZKd6ZLdZMedz  ed7<   ej
                  j                  d8d      dk(  ZNdZOej
                  j                  d9      dk(  ZPi ZQeReBef   ed:<   d;ZSej
                  j                  d<d      dk(  ZTeed=<   ej
                  j                  d>      ZUeBdz  ed?<   ej
                  j                  d@d      dk(  ZVeedA<   ej
                  j                  dBd      dk(  ZWeedC<   ej
                  j                  dDd      dk(  ZXeedE<   y)Fr   z.
    Config specific to codegen/triton.py
    TORCHINDUCTOR_CUDAGRAPHSr   TFN.cudagraph_capture_sizesr   r1  r   "cudagraph_dynamic_shape_warn_limit TORCHINDUCTOR_CUDAGRAPH_OR_ERRORr  cudagraph_or_error%reorder_for_reducing_graph_partitions&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   coalesce_tiling_analysis	max_tilesprefer_nd_tilingautotune_at_compile_timeautotune_with_sample_inputstile_reductionsTORCHINDUCTOR_NATIVE_MATMULnative_matmul!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr  r  r  #TORCHINDUCTOR_PERSISTENT_REDUCTIONS TORCHINDUCTOR_DECOMPOSE_SORT_OPSdecompose_sort_ops$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   ry   r.      multi_kernelTORCHINDUCTOR_DIVISIBLE_BY_16   r_     spill_thresholdr  ENABLE_PERSISTENT_TMA_MATMULENABLE_TEMPLATE_TMA_STORETORCHINDUCTOR_SKIP_L1$TORCHINDUCTOR_NUM_DECOMPOSE_K_SPLITS10#TORCHINDUCTOR_DECOMPOSE_K_THRESHOLD32TORCHINDUCTOR_ENABLE_PDL!TORCHINDUCTOR_MIX_ORDER_REDUCTIONry   mix_order_reduction_split_size5TORCHINDUCTOR_MIX_ORDER_REDUCTION_AUTOTUNE_SPLIT_SIZE4TORCHINDUCTOR_MIX_ORDER_REDUCTION_ALLOW_MULTI_STAGESdebug_dump_kernel_inputsr  %TORCHINDUCTOR_TRITON_PROTON_PROFILINGproton_profiling&TORCHINDUCTOR_TRITON_PROTON_OUTPUT_DIRproton_output_dir'TORCHINDUCTOR_TRITON_PROTON_GROUP_BY_SMproton_group_by_sm-TORCHINDUCTOR_TRITON_PROTON_SPLIT_INVOCATIONSproton_split_invocations-TORCHINDUCTOR_TRITON_PROTON_PER_CTA_OCCUPANCYproton_per_cta_occupancy)Yr   r   r   r   r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsr  r  r   r9  cudagraph_min_partition_sizeslow_path_cudagraph_asserts!cudagraph_trees_history_recordingcudagraph_trees_objgraphr    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr  force_cudagraph_syncforce_cudagraphs_warmupr	   r  r8  r  fast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexingr  r  r  autotune_pointwiseautotune_cublasLtr,   r  r  r  getenvr   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesr  r   persistent_reductionsr  cooperative_reductionsforce_cooperative_reductionsr  divisible_by_16min_split_scan_rblockstore_cubinr1   r5   hipr  use_block_ptruse_tensor_descriptor)transpose_discontiguous_tensor_descriptorr  r  codegen_upcast_to_fp32enable_persistent_tma_matmulenable_template_tma_storeskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYnum_decompose_k_splitsdecompose_k_threshold
enable_pdlmix_order_reduction"mix_order_reduction_initial_xblockr  'mix_order_reduction_autotune_split_size#mix_order_reduction_non_strict_mode&mix_order_reduction_allow_multi_stagesr	  dictmax_kernel_dump_occurrencesr  r  r  r  r  r   r   r   r   r   y  sC   
  :;sBJ O %*! DHU3sCx#89D@G $%  #' ).%  % ,5;$ +.' 67&d
6 ! $  &9   37)46 #( "   N 	

4cRU	
 	 d  !IsTz  #d"   -M,NdTkN
 ).- "OT!$ $"))$A3G3NM4N (,$'+$ 	

:C@CG  	

?EL  	 wHI  	

<cBcI  	

93?3F   	

=sCsJ 
 $)  ),


3S9)L'*% 
 jjnn%DcJcQO   K "'!2!22OS: M "
 15-
 04 #*3 " 	

5s;sB !
 !#

/JC PTW WJJNN#:C@CGM 6;2 !


25==;L;LCRV	
  


<dC  :C@CGJ 	

:9;CTWX	  *+&15"C$J5


NPST	 , +0' 	

MNRUU + 02d38n1
 #$ 	

>DK d  %'JJNN0%sTz 
 	

@#F#M  
 	

FLPSS d  	

FLPSS d r   r   c                      e Zd ZU dZdZej                  j                  dd      dk(  Zej                  j                  dd      dk(  Z	ej                  j                  dd      dk(  Z
ej                  j                  dd	      Zej                  j                  d
d      Zed   ed<   ej                  j                  dd      ZdZdZdZeed<   dZeed<   dZdZeed<   dZedz  ed<   dZeed<   i Zeeef   ed<   ej                  j                  dd      dk(  Zeed<   dZeed<   ej                  j                  dd      dk(  Zeed<    eej                  j                  dd            Z eed<   i Z!eee"f   ed<   dZ#eed <   dZ$eed!<   ej                  j                  d"d      dk(  Z%eed#<   dZ&eed$<   dZ'edz  ed%<    e(        Z)eed&<   dZ*edz  ed'<   dZ+edz  ed(<   dZ,edz  ed)<   i Z-ee.j^                  j`                  e1e   f   ed*<   dZ2e1e   dz  ed+<   ej                  j                  d,d      dk(  Z3dZ4eed-<   dZ5edz  ed.<   dZ6ee1e   z  dz  ed/<   dZ7edz  ed0<   y)1aot_inductorz9
    Settings for Ahead-Of-Time Inductor Compilation
    r   AOT_INDUCTOR_DEBUG_COMPILEr   r   AOT_INDUCTOR_DEBUG_SYMBOLS!AOT_INDUCTOR_ENABLE_FRAME_POINTER&AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELO1-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightsTpackagepackage_cpp_onlydynamic_linkagemetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationcheck_lowerboundDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr.   repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interface)AOT_INDUCTOR_WEIGHT_USE_CACHING_ALLOCATORweight_use_caching_allocatorpackage_constants_in_so package_constants_on_disk_formatprecompile_headersembed_kernel_binaryemit_multi_arch_kernelmodel_name_for_generated_filescustom_ops_to_c_shimscustom_op_libsAOT_INDUCTOR_ENABLE_LTOlink_libtorchcross_target_platformaoti_shim_libraryaoti_shim_library_path)8r   r   r   r   output_pathr   r   r   debug_compiledebug_symbolsenable_frame_pointercompile_wrapper_opt_levelrN  r   r9  filtered_kernel_namesserialized_in_specserialized_out_specrP  r8  rQ  use_consts_asm_buildrR  rS  rT  rU  rB  r  rW  rX  rZ  r   r\  r]  r   r^  r_  ra  rb  rc  r   rd  re  rf  rg  rh  r1   _ops
OpOverloadlistri  
enable_ltork  rl  rm  rn  r   r   r   rE  rE    s    KJJNN#?ELMJJNN#?ELM 	

:C@CG  !#

0$! EGJJNN7E$g.@&A 
 JJNN0$   */ $.  %$  GT$(dTk( !OT   "Hd38n! 	

H#NRUU (  "d!  "zz~~.BCHCOO 2::>>*CQGHKH !GT#s(^  $)D( ,1"D0 	

BCHCO !$  %)T( 48$cDj7 $-;. (,+ +/D4K. 26"C$J5 EG4

 5 5tCy @AF'+NDI$+  93?3FJ M4 )-3:, 15sT#Y-4)-C$J-r   rE  c                       e Zd ZU dZeed<   y)aot_inductor_modeFcompile_standaloneN)r   r   r   r~  r8  r9  r   r   r   r}  r}    s      %$r   r}  c                      e Zd ZU dZdZed   ed<   dZdZe	j                  j                  e	j                  j                  de	j                  j                  e	j                  j                  ej"                        d                  ZdZedz  ed	<   g d
Zee   ed<    eeeeef    ed e	j                  j                  dd      j3                  d      D                    Zeeeef   ed<    eeeeef    ed e	j                  j                  ddj                  d eD                    j3                  d      D                    Zeeeef   ed<   e	j                  j                  dd      dk(  ZdZdZeed<   e	j                  j                  dd      dk(  Ze ed<   e	j                  j                  d      Z!e"dz  ed<   e	j                  j                  d      Z#e"dz  ed<   e	j                  j                  d d      Z$e"ed!<   e	j                  j                  d"d      dk(  Z%e ed#<   e	j                  j                  d$d      dk(  Z&e ed%<   e	j                  j                  d&d'      Z'e"ed(<   d)Z(e ed*<   dZ)e ed+<   dZ*e ed,<   d)Z+e ed-<   y).cutlassz-
    Config specific to cutlass backend.
    -O1)-O0r  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/Ncutlass_max_profiling_configs)ry   r.   r   r   %cutlass_max_profiling_swizzle_optionsc              #   2   K   | ]  }t        |        y wr   r   .0xs     r   	<genexpr>zcutlass.<genexpr>  s      
 F
   +TORCHINDUCTOR_CUTLASS_DYNAMIC_CLUSTER_SHAPEz2,1,1r   cutlass_dynamic_cluster_shapec              #   2   K   | ]  }t        |        y wr   r  r  s     r   r  zcutlass.<genexpr>  s      
 F
r  .TORCHINDUCTOR_CUTLASS_DYNAMIC_CLUSTER_FALLBACKc              #   2   K   | ]  }t        |        y wr   )r  )r  vs     r   r  zcutlass.<genexpr>  s     GAQGr   cutlass_dynamic_cluster_fallbackCUTLASS_EPILOGUE_FUSIONr   r   ry   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEgenerate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_level+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDcutlass_hash_with_compile_cmd"TORCHINDUCTOR_CUTLASS_PRESCREENINGcutlass_prescreening!TORCHINDUCTOR_CUTLASS_ENABLED_OPSr   cutlass_enabled_opsTuse_binary_remote_cacheupload_to_binary_remote_cachebinary_remote_cache_force_writeenable_caching_codegen),r   r   r   r   r  r   r9  enable_debug_infouse_fast_mathr   pathrealpathr   r   joindirnamer1   __file__cutlass_dirr  r   r  rz  r   r  r   r  r  cutlass_epilogue_fusion_enabledcutlass_tma_onlyr  r  r8  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r  r    s    EJw@AI  M ''""


'GGLL/)	
K 15!3:4 8D)49C:>c3m 
ZZ^^=weCj	
 	
;!5c3#7  >Bc3m 
ZZ^^@G)FGG eCj
 	
	>$eCcM&: 	 	

0#6#= $
  *+!3*
 	

H#NRUU $  .0ZZ^^).d
  -/JJNN(-sTz  (*zz~~3S(  	

DcJcQ "4  	

;SASH $   "zz~~+U  
 %)T( +0!4/ -2#T1 $(D'r   r  c                   f    e Zd ZU dZedz  ed<   dZedz  ed<   dZedz  ed<   dZdZ	dZ
edz  ed<   y)cudaNarchr5   cuda_cxxF   r   )r   r   r   r  r  r9  r5   r  enable_cuda_ltoenable_ptxas_infor   r   r   r   r   r  r  	  sR    
 D#*
 GS4Z  HcDj O  01 #*0r   r  c                       e Zd ZU dZedz  ed<   dZedz  ed<   dZedz  ed<   ej                  j                  ej                  j                  dd            Zy)xpuNr  r5   oneapi_rootr  r   )r   r   r   r  r  r9  r5   r  r   r  r  r   r   r  r   r   r   r  r  5	  sV     D#* GS4Z #Kt"''""2::>>2Mr#RSKr   r  c                   z   e Zd ZU g Zee   ed<   g dZeed      ed<   dZ	ed   ed<   dZ
dZdZdZdZd	Zed	z  ed
<   ej"                  j%                  d      Zej"                  j%                  dd      dk(  Zeed<   d	Zed	z  ed<   d	Zed	z  ed<   d	Zed	z  ed<   dZeed<   d	Zee   d	z  ed<   dZeed<   dZeed<   y	)rocmr  )gfx90agfx942gfx950ck_supported_archr  )	r  r  r  r  z-Osz-Ozz-Ominz-Ofastz-Omaxr  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r  n_max_profiling_configsck_max_profiling_configsck_tile_max_profiling_configsuse_preselected_instanceskBatch_sweepr  split_k_thresholdcontiguous_threshold)r   r   r   r  rz  r  r9  r  r   r  is_debug
save_tempsr  flush_denormalsprint_kernel_resource_usager  r   r   r   ck_dirr  r8  r  r   r  r  r  r  r  r  r   r   r   r  r  D	  s%    D$s)FtG$@AB  	 wL 
 H J M O #( !IsTz  ZZ^^23F 	

FLPSS $ 
 +/S4Z. ,0cDj/ 15!3:4 ',t+ &*L$s)d")  s !##"r   r  )r  r   halidepallascpu_backend)r   r  r  cuda_backendr  tpu_backendxpu_backendc                       e Zd ZdZy)mtiaFN)r   r   r   disable_welford_reductionr   r   r   r  r  	  s     %r   r  c                   J    e Zd ZU dZdZdZed   ed<   dZed   ed<   dZ	dZ
dZy	)
r  hostz	host-cudaAnderson2021)r  Li2018	Adams2019Mullapudi2016scheduler_cudar  scheduler_cpuFN)r   r   r   
cpu_target
gpu_targetr  r   r9  r  assertsdebugscan_kernelsr   r   r   r  r  	  sQ    J J
 	 GRS  	 7QR 
 G E Lr   r  c            	          e Zd ZU ej                  j                  dd      dk(  Zej                  j                  dd      dk(  ZdZe	dz  e
d<   dZdZdZdZdZdZdZej                  j                  d	d      dk(  Zej                  j                  d
d      dk(  Zej                  j                  dd      Zej                  j                  dd      ZdZdZee	gdf   dz  e
d<   ej                  j                  dd      dk(  Z eej                  j                  dej                  j                  dd                  Zee
d<   y)traceTORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarLOG_AUTOTUNE_RESULTSINDUCTOR_PROVENANCEprovenance_tracking_level)r   r   r   r   r   r   enabledsave_real_tensorsr  r  r9  	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler  r   log_autotuning_resultsr   r  r   r   r   r  r  	  s\   jjnn2C8C?G 

'FLPSS !IsTz  I H H   M N K JJNN#=sCsJM (>DK jjnn%CTJO !jjnn-OQUV O 04J#%,3ZZ^^,BCHCO &)


!2::>>2G#M	
&s r   r  )ztrace.upload_tarrq   rr   rs   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierrn   ro   r-  rt   r   _save_config_ignore)r  zcuda.cutlass_dirzcutlass.cutlass_dirzxpu.cutlass_dirr%  ri  ro   rn   rq   rr   rs   r-  rt   r   r  r   rF   rH   rL   rM   _cache_config_ignore_prefix_cache_config_factory_keysexternal_matmul8TORCHINDUCTOR_WRITE_ARE_DETERMINISTIC_ALGORITHMS_ENABLEDc                   J    e Zd ZU dZeeeeeef      f   dz  ed<   dZ	e
ed<   y)lookup_tableNtableTcheck_src_hash)r   r   r   r  rB  r  rz  r   r9  r  r8  r   r   r   r  r  H
  s4    48E4T$sCx.))*T18  NDr   r  c                   r   e Zd ZU dZeed<   dZedz  ed<   dZeed<   dZe	dz  ed<   dZ
dZdZej                  j                  d      Zedz  ed<   ej                  j                  d	      Zedz  ed
<   dZdZed   dz  ed<   dZdZeed<    ej0                  d      dk(  Z ej0                  dd      ZdZdZy)test_configsF%force_extern_kernel_in_multi_templateNforce_custom_op_decompositionforce_no_impl_groupingmax_mm_configs(TORCHINDUCTOR_AUTOTUNE_CHOICE_NAME_REGEXautotune_choice_name_regex(TORCHINDUCTOR_AUTOTUNE_CHOICE_DESC_REGEXautotune_choice_desc_regex)assertrh  track_memory_lifecycleT assume_bucketing_reduces_latency,TORCHINDUCTOR_FORCE_FILTER_REDUCTION_CONFIGSr   )TORCHINDUCTOR_DISTORT_BENCHMARKING_RESULTr   )r   r   r   r  r8  r9  r  r  r  r   runtime_triton_dtype_assertruntime_triton_shape_assertstatic_cpp_dtype_assertr   r   r   r  r  r  *graphsafe_rng_func_ignores_fallback_randomr  r   use_libtorchr  r&  force_filter_reduction_configsdistort_benchmarking_resultbisect_pre_grad_graph'bisect_keep_custom_backend_for_inductorr   r   r   r  r  S
  s   27)47 26!4$;5 $)D(!%NC$J%"'"'# .0ZZ^^2.d
  .0ZZ^^2.d
  27.>BGO4t;B L .2$d1 			@ASH # #,"))3R# ".3+r   r  )*c                   r    e Zd ZU ej                  j                  dd      dk(  Zeed<   dZ	eed<   dZ
eed<   y)	eager_numerics'TORCHINDUCTOR_EMULATE_DIVISION_ROUNDINGr   r   division_roundingFdisable_ftzuse_pytorch_libdeviceN)r   r   r   r   r   r   r+  r8  r9  r,  r-  r   r   r   r)  r)  
  sA    
 	

@#F#M t  K
 #(4'r   r)  %TORCHINDUCTOR_EMULATE_PRECISION_CASTSemulate_precision_casts(g  r   rc  collections.abcr   typingr   r   r   r   r1   !torch._inductor.custom_graph_passtorch._environmentr   torch.utils._config_moduler	   r
   r   r   torch._inductor.choicesr   torch._inductor.cudagraph_utilsr   r   r   inplace_paddingcan_inplace_pad_graph_inputr8  r   r    r#   r&   r)   r,   r6   r;   enable_auto_functionalized_v2r  disable_progressverbose_progressworker_log_pathr   r?   r9  rF   rG   rH   rI   rK   rL   rM   rN   rP   rQ   rR   rB  r  rS   rV   rX   rZ   r\   r^   online_softmaxapply_gumbel_max_trickdcestatic_weight_shapessize_assertsnan_assertsruntime_triton_nan_assertsscalar_assertsalignment_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningr  rm   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_first*epilogue_fusion_user_defined_triton_kernelpattern_matcherb2b_gemm_passrn   	_inductorcustom_graph_passCustomGraphPassTypero   rp   CustomPartitionerFnTyperq   rr   rs   rt   rz  ru   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionrv   rw   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_mul keep_addmm_fused_for_half_dtypesuse_mixed_mmr~   r    reorder_for_compute_comm_overlapr   r   reorder_for_peak_memoryreorder_for_peak_memory_debugr   r   r   r   r   r   r   r   r   estimate_op_runtimer   intra_node_bwinter_node_bw
cpu_gpu_bwr   distributed_max_autotune_gemmpipeline_max_autotune_gemmmax_autotunemax_autotune_pointwisemax_autotune_gemmr&   inductor_default_autotune_warmupinductor_default_autotune_repr   r   !max_autotune_report_choices_stats.max_autotune_prune_choices_based_on_shared_memtriton_disable_device_detectionr   r   r   r   r   r   r   uppermax_autotune_gemm_backendsr   r   max_autotune_conv_backendsr   r   r   r   r   r   r   r   r   r   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecollective_benchmark_nrunsrV  collective_benchmark_timeoutcoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   r   r   run_jit_post_compile_hookr   r   r   autoheuristic_log_pathr5   r1  layout_opt_defaultlayout_optimizationforce_layout_optimizationcache_sdpa_constraintkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdr  r  fallback_randomalign_random_eager"fallback_embedding_bag_byte_unpackimplicit_fallbacks assume_unaligned_fallback_outputr  aggressive_fusionr  r
  enabled_metric_tablesr  r  score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_sizemin_overlap_ratio)max_fusion_buffer_group_pairwise_attemptsr  max_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsdeterministicmin_num_splitbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingdo_not_emit_runtime_assertionscompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapescombo_kernel_max_num_args!combo_kernel_per_subkernel_blockscombo_kernels_pointwise_onlyjoint_graph_constant_foldingdebug_index_asserts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr  r  r  r$  r%  r&  r(  r*  _fuse_ddp_communication_fuse_ddp_bucket_sizer-  r.  partitioned_scatter_enabledr0  r2  r3  r5  r;  r[  rj  ri  rl  rn  ro  rp  rr  rt  ru  libfb.pyrw  __package__get_dir_pathr  r  replaceseprv  
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastpad_dynamic_shapesdisable_padding_cpu$expand_dimension_for_pointwise_nodespadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler|  permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testingr~  _profile_varprofile_bandwidthprofile_bandwidth_regexr  /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenr  r  r  r  r  r  r  r  r  enable_linear_binary_foldingr  r  r  r  r  r  r  r  r  r   rE  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  Tensor*write_are_deterministic_algorithms_enabledr  r  torch.utils._config_typingr)  r/  modulesr   r   r   r   <module>r     s   	 
 $ 4 4  ( (  7?**..!@#F#M# Ctd{ CD4K Ctd{ CKtd{ K4$; F$+ Fd 
 
 JJNN7=D 
 	     8A{3 
 '*JJNNA6J'  
 ?;1	  $) D ( &C%D td{ D ./ "4$;  )/MB)   " d ! &C%D td{ D .S-T td{ T $*VW d W 05 & 4 57 !4S> 6 &* d
 )  (1*  JJNN#>DKT K JJNN=sCsJ D  ::>>"<cBcI
D I JJNN4Y[cc  d   >DK JJNN93?3F 
    zz~~:C@CGjjnn89S@JJNN=>#E   >DK JJNN4Y[ccR
       **..!@#F#M 

<=D HJzz~~HWCD 
    *+   .3 *   TX 5??<<PP WTX EOO==QQ X TX u88PP W PT u88LL SPT 99MM T
 OS eoo77KK R 	 	;	<=:;	= 		" 	 	;	<=:;	= 		   $)     ( 68 c4S>12 7 79 $sDcN23 8   zz~~&JCPTWW  # 
 $(    	+ c3h  FQAB P $)  *  (	;	<=:;	=*  &* d
 )   %  /0 & / >D w9: CNR .#0Dt0K R  4!  5; 7=1 :RV 2HcUCZ4H44O V  #G4% 
 17 w}- 6NR .#0Dt0K R   */ !4 /   
 &,?<& d  JJNN@ASH  JJNN;<C 
 zz~~:;sB (NOSVV  JJNN#DEL #&BII5r:$   !$BII2C8! t  .U-V d
 V JJNNDcJcQ " JJNNQSVW
 / JJNNBCHCO   JJNN2y{CPST
   .2 * 1
 *, T#Y + ,. tCy - -2 #T 1 $47 d  !# DI "  ZZ^^.0A%' sTz  ,Q+R cDj R  ZZ^^.%'  DF::>>2ID%' (? @ 
 DF::>>2ID%' (? @ 
 " T ! " T !   ! T   =F g89 E  $  #! d ! JJNN.4; 4 
 "      JJNN45<	 jjnn%HISP 
 /3 +03 -14 . 

'LMQTT  !JJNN=tD 
  %JJNN?F  
 JJNN<=D  JJNNJKsR ( $'JJNN<cB$  

@ @@ @ JJNN<cBcI 
BC BD B: : :6C 6D 6 *I 
 !& 1 1Ss JJNN68JKsR  JJNN+KSQUXX  JJNN8#>#E  ZZ^^$FLPSS  **..!@ASH
       !#*  %+>% T     &+ "  JJNNCDK ! BF %6!67$> E   ZZ^^$@ASHd H(HISP $ P

'LbQ JJNN29;CC  D   (,  + !#  JJNN<cBcI 
 $%      -/ ) ,0 cDj /          299=sCsJ  		78C? BJJNN#@!DE::>>"BCHCO  "&   %     "'        "# &* #  %* !$   $     1 11OUe>O>O5O [8$8  JJNNEsKsR #
 #' S4Z &%) d
 ) 2 
C 
 67 S 7 &. s - !'7:!    T       C Xi%83%> ? 
 ! D   JJNN>DK 
 +, "C + +. "C - ,0 !5 /: :
X4 X4v6T 6 B '0kd7M7Ot O $*:=$ D  #)# C  ">!? $ ? $*
;$ D  39IL3 )4  JJNN>DK T 
 '-
>' t 
 * ;
 $$3w33[00bff=zJ   4w33J?
    

<cBcI JJNN8#>#E        (- $  (          >DK # 
 $    !  +4+o t 5zz~~5r: B&  , 3"  (*zz~~"D( #* 
 JJNNHISP 0    8#>#E$ E %* T )  %  $ (-  ,
 $ t # $ t # 8= . < ,1 "D 0 49 *D 8 JJNN?EL  **..)JCPTWW 4 W 37 )4 6 46 tCc3h/0 5RZZ^^,MuUV 3 V %  %	S4Z 	 %+1% c  "C+ C+LU Up
t. t.t% %( (D W17 1 1< WT' T TF# F#T =BW89 A 7?g23 > "*WX ) "*WX )& &
 8K K\" T#Y "* T#Y D ) DI 
 UWhellELLI4OPQ V BIIH#NRUU +
   84 84v ,( (2 JJNN:C@CG  
 ckk(+ ,K, $   s   A4AK" K"AK2K1AK2