o
    ^i^                  	   @  st  d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
l m!Z! ddl"m"Z" ddl m#Z# d	dl$m%Z%m&Z&m'Z'm(Z( ddl)m*Z* d dl+m,Z, dZ-dZ.edZ/G dd dej0Z1dRddZ2G dd dZ3i Z4g Z5dd Z6dSdd Z7G d!d" d"ee/ Z8d#d$ Z9d%d& Z:d'd( Z;G d)d* d*Z<eG d+d, d,Z=d-d. Z>G d/d0 d0e<e8e/ Z?edTd3d4Z@edddddddd5dUd@d4Z@	dVdddddddd5dWdCd4Z@G dDdE dEZAG dFdG dGZBdHdI ZCdJdK ZDG dLdM dMe<ZEG dNdO dOe<ZFdPdQ ZGdS )X    )annotationsdivisionN)defaultdict)	dataclass)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTupleTensorDescriptor)
ModuleType   )knobs   )driver)_async_compile)find_paths_ifget_iterable_pathtype_canonicalisation_dictcanonicalize_dtype)get_cache_key)get_cache_invalidating_env_varsztriton.languagez"triton.experimental.gluon.languageTc                      s   e Zd ZdZd! fddZedd Zdd	 Zd
d Zd"ddZ	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Z  ZS )#DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    returnNonec                   sV   t    || _t|d| _|| _|| _h d| _	t
tddh| _i | _d| _d S )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstancecopymathF)super__init__namehashlibsha256encodehasherglobals	nonlocalssupported_python_builtinsGLUON_MODULETRITON_MODULEsupported_modulesused_global_valsvisiting_arg_default_value)selfr2   r7   r8   src	__class__ M/var/www/html/RAG/RAG_venv/lib/python3.10/site-packages/triton/runtime/jit.pyr1   .   s   


zDependenciesFinder.__init__c                 C  
   | j  S N)r6   	hexdigestr?   rC   rC   rD   retY      
zDependenciesFinder.retc                 C  s&   t |jrdS t|dd}|tS )NT
__module__ )inspect	isbuiltinfuncr,   
startswithr;   )r?   noderO   modulerC   rC   rD   _is_triton_builtin]   s   
z%DependenciesFinder._is_triton_builtinc                 C  s   t |tsJ | j |j @ D ].}|\}}| j| \}}|j| \}}||kr?td| d| d| j d|j d| dq| j|j |j}|t	t
|dd7 }| j|d	 d S )
NGlobal variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr#   )r-   JITCallabler=   keysRuntimeErrorr2   __name__update	cache_keystrr,   r6   r5   )r?   rO   kvar_name_v1v2func_keyrC   rC   rD   _update_hashc   s   &zDependenciesFinder._update_hashNc                 C  s   ddl m} |d u st|tu rd S t|ddrd S t|dddkr$d S t|tr0| | d S t|rEt|tsEt||sEt	d| | j
rJd S |d ur\t||f| j|t|f< d S )	Nr   	constexpr__triton_builtin__FrK   rL   ztriton.language.extra.libdevicez!Unsupported function referenced: )language.corere   typer   r,   r-   rV   rc   callablerX   r>   r.   deepcopyr=   id)r?   valvar_dictr2   re   rC   rC   rD   record_referenceu   s"   

z#DependenciesFinder.record_referencec                   sd   t |jtju r|jS |j jv rd S  fdd}||j\}}|j jv r(|S  |||j |S )Nc                   sD    j | d }|d ur| j fS  j| d }|d ur | jfS dS )NNN)r7   getr8   )r2   rl   rH   rC   rD   name_lookup   s   

z2DependenciesFinder.visit_Name.<locals>.name_lookup)rh   ctxastStorerk   local_namesr9   rn   )r?   rQ   rq   rl   rm   rC   rH   rD   
visit_Name   s   	zDependenciesFinder.visit_Namec                   s    fdd|j D S )Nc                   s   g | ]}  |qS rC   )visit).0eltrH   rC   rD   
<listcomp>       z2DependenciesFinder.visit_Tuple.<locals>.<listcomp>)eltsr?   rQ   rC   rH   rD   visit_Tuple   s   zDependenciesFinder.visit_Tuplec                 C  sl   |  |j}t|tjr|  |j}t|tjst|dd}|d u s'|| jv r)d S t||j}| | |S )NrY   rL   )	rw   valuer-   rs   	Attributer,   r<   attrrn   )r?   rQ   lhslhs_namerI   rC   rC   rD   visit_Attribute   s   
z"DependenciesFinder.visit_Attributec                 C  s"   dd |j j D | _| | d S )Nc                 S  s   h | ]}|j qS rC   arg)rx   r   rC   rC   rD   	<setcomp>       z7DependenciesFinder.visit_FunctionDef.<locals>.<setcomp>)argsru   generic_visitr}   rC   rC   rD   visit_FunctionDef   s   z$DependenciesFinder.visit_FunctionDefc                   sn    fdd}t |j|j|jr|jgng |jD ]} | q||j |jd ur0 |j ||j	 d S )Nc                   sB   z j rJ d _ | D ]}|d ur | qW d _ d S d _ w )NTF)r>   rw   )defaultsexprrH   rC   rD   visit_defaults   s   

z:DependenciesFinder.visit_arguments.<locals>.visit_defaults)
	itertoolschainposonlyargsr   vararg
kwonlyargsrw   kw_defaultskwargr   )r?   rQ   r   r   rC   rH   rD   visit_arguments   s   (


z"DependenciesFinder.visit_argumentsc                 C  s:   |  |}t|tr|  jt|O  _d S | j| d S rF   )rw   r-   r(   ru   setadd)r?   rQ   targetrC   rC   rD   visitAssnTarget   s   

z"DependenciesFinder.visitAssnTargetc                 C  s4   t |jdkrtd| |jd  | | d S )Nr   z2Simultaneous multiple assignment is not supported.r   )r%   targets	TypeErrorr   r   r}   rC   rC   rD   visit_Assign   s   zDependenciesFinder.visit_Assignc                 C     |  |j | | d S rF   r   r   r   r}   rC   rC   rD   visit_AnnAssign      z"DependenciesFinder.visit_AnnAssignc                 C  r   rF   r   r}   rC   rC   rD   	visit_For   r   zDependenciesFinder.visit_For)r!   r"   ro   )rY   rK   __qualname____doc__r1   propertyrI   rS   rc   rn   rv   r~   r   r   r   r   r   r   r   __classcell__rC   rC   rA   rD   r    "   s"    +

" 	r    r!   r\   c                 C  s  dd l m  m} t| trZ|  } | dr/| d} t| } | ds'J d| dd   S | 	dr>dt| d d  S | drMdt| dd   S | drYt| dS n%t| |j
rhdt| j S t| |jrr| j} nt| tr{| j} nt| } t| d	d
| S )Nr   zconst const**kr   ztl._trL   )triton.language.corelanguagecorer-   r\   striprP   removeprefix_normalize_tyendswithpointer_type
element_tydtyper2   rh   rY   r   rp   replace)tyr   rC   rC   rD   r     s.   






r   c                   @  sr   e Zd ZdZdd	d
Zedd ZedddZedddZedd Z	edd Z
edd Zedd ZdS )KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.numr$   paraminspect.Parameterdo_not_specializebooldo_not_specialize_on_alignmentc                 C  s   || _ || _|| _|| _d S rF   )r   _paramr   r   )r?   r   r   r   r   rC   rC   rD   r1   )  s   
zKernelParam.__init__c                 C     | j jS rF   )r   r2   rH   rC   rC   rD   r2   0     zKernelParam.namer!   r\   c                 C  s(   | j jr| j jtjjkrdS t| j jS )NrL   )r   
annotationrM   	Parameteremptyr   rH   rC   rC   rD   r   4  s   zKernelParam.annotationc                 C  sN   | j }|dr|dd  }n|dr|dd  }|tt v r%| j S dS )Nr   r   r   r   rL   )r   rP   r   r   values)r?   arC   rC   rD   annotation_type:  s   

zKernelParam.annotation_typec                 C  s
   d| j v S Nre   )r   rH   rC   rC   rD   is_constexprE  rJ   zKernelParam.is_constexprc                 C  s    | j rdS d| jv p| jdS )NFr   r   )r   r   rP   rH   rC   rC   rD   is_constI  s   zKernelParam.is_constc                 C  r   rF   )r   defaultrH   rC   rC   rD   r   O  r   zKernelParam.defaultc                 C  s   | j jtjjkS rF   )r   r   rM   r   r   rH   rC   rC   rD   has_defaultS  s   zKernelParam.has_defaultN)r   r$   r   r   r   r   r   r   r!   r\   )rY   rK   r   r   r1   r   r2   r   r   r   r   r   r   r   rC   rC   rC   rD   r   &  s"    





r   c                   s0   ddl m ddlm  d	 fdd	S )
Nr   rd   r   r   FTc                   s   d u rdS t  trdS t  trA|r d|dnd } dkr%|r%dS d kr1 dkr1d	|fS d
 kr= dkr=d|fS d|fS t  trHdS t dr} j|f}t|d }|d u rn|d rbdndt|d  }|t|< |rw d|dnd }||fS t  t	rd j
fS t  rd fS t  trfdd D } fdd}|dd |D }	|dd |D }
|	|
fS t  trt jdsJ t jj}d| t j dd fS t  rt jdsJ t jj}d| t j d jdd fS tdt  )N)re   N)u1Nr$   )alignr   )re   r   i   ii32l            l    u64i64)fp32Ndata_ptrr   r   r   tensorre   c                      g | ]} |qS rC   rC   rx   x)specialize_implrC   rD   rz         zCcreate_specialize_impl.<locals>.specialize_impl.<locals>.<listcomp>c                   s   t  drt |  S t| S )N_fields)hasattrrh   tuple)valsr   rC   rD   <lambda>      zAcreate_specialize_impl.<locals>.specialize_impl.<locals>.<lambda>c                 S     g | ]}|d  qS r   rC   r   rC   rC   rD   rz     r   c                 S  r   r   rC   r   rC   rC   rD   rz     r   ztensordesc<>,zUnsupported type: %s)r-   r   r$   r)   r   r   	dtype2strrp   r   rV   r[   r   r   baser(   block_shapelayoutr   rh   )r   r   specialize_valuer   keydskresspec
make_tupletysrW   innerGluonTensorDescriptorre   specialize_extrar   r   rD   r   a  sT   










"z/create_specialize_impl.<locals>.specialize_impl)FTT)r   re   'triton.experimental.gluon.nvidia.hopperr   )r   rC   r   rD   create_specialize_impl\  s   /r   Fc                 C  s6   t tdkrttdd  td }|| |dd S )Nr   c                 [     d S rF   rC   )r_   kwargsrC   rC   rD   r     s    zmangle_type.<locals>.<lambda>)r   )r%   specialize_impl_cacheappendr   )r   
specializer   rC   rC   rD   mangle_type  s   r   c                   @  s    e Zd ZU ded< dddZdS )KernelInterfacer   runr!   c                   s    fddS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                    s   j |  dd|S )NFgridwarmup)r   )r   r   r   r?   rC   rD   r     r{   z-KernelInterface.__getitem__.<locals>.<lambda>rC   )r?   r   rC   r  rD   __getitem__  s   zKernelInterface.__getitem__N)r!   r   )rY   rK   r   __annotations__r  rC   rC   rC   rD   r     s   
 r   c           	   	   C  sl   dd |  D }dd l}| |dd | D t| dd | D t| |j|d}||}|S )Nc                 S  s*   i | ]\}}||j jd krt|n|qS )r   )rB   rY   r\   rx   r   r   rC   rC   rD   
<dictcomp>  s   * z1serialize_specialization_data.<locals>.<dictcomp>r   c                 S     g | ]}t |qS rC   r(   r   rC   rC   rD   rz     r   z1serialize_specialization_data.<locals>.<listcomp>c                 S  r  rC   r  r   rC   rC   rD   rz     r   )r2   	signatureconstant_keysconstant_vals
attrs_keys
attrs_valsoptionsr   )itemsjsonrW   r(   r   __dict__dumps)	r2   r	  	constantsattrsr  r   r  objserialized_objrC   rC   rD   serialize_specialization_data  s   $
r  c              
   C  s  t | jt |ksJ g }t| j |D ]o\}}|jr&|d| d q|jr+dnd}|jr2dnd}|jr9dnd}d| d| d| d| d	}	|j	r~t
|j	trc|j	dksa|j	dd	 d
v rcd}|rs|d|j	 d|	 d q|d|j	 d q||	  qdd }
ddtt|
| j dg  dddd | j D  dd| d}dd | j D }t|d< t|j|d< t|| |d S )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    z("constexpr", )TrueFalsezspecialize_impl(, r   Nr   )fpbfFz("z",) + z[1:]z", None)c                 S  s0   | d j tjju r| d S | d  d| d  S )Nr   r   z	=default_r   rM   r   r   )r   rC   rC   rD   r     s   0 z0create_function_from_signature.<locals>.<lambda>z
def dynamic_func(z	**optionsz):
    params = {c                 S  s   g | ]
}d | d| qS )'z': rC   )rx   r2   rC   rC   rD   rz     s    z2create_function_from_signature.<locals>.<listcomp>z}
    specialization = [r   z-]
    return params, specialization, options
c                 S  s,   i | ]\}}|j tjjurd | |j qS )default_r  )rx   r2   r   rC   rC   rD   r    s
    z2create_function_from_signature.<locals>.<dictcomp>rV   r   dynamic_func)r%   
parametersziprW   r   r   r   r   r   r   r-   r\   joinr(   mapr  rV   r   get_arg_specializationexec)sigkparamsbackendspecializationr2   kpr   r   r   rI   r   	func_bodyfunc_namespacerC   rC   rD   create_function_from_signature  s@   
r/  c                 C  s   | j  d| j S )N.)rK   r   fnrC   rC   rD   get_full_name     r3  c                   @  s`   e Zd Zdd Zdd Zedd Zdd Zed	d
 Zdd Z	dd Z
dd Zeee
dZdS )rV   c              
   C  s   || _ t|| _zt|\| _| _W n ty% } ztd|d }~ww t|| _	t
 | _td| j}|td|tj d  }|| _d | _i | _|j| _|j| _|j| _|j| _|j| _d S )Nz1@jit functions should be defined in a Python filerL   z^def\s+\w+\s*\()r2  rM   r	  getsourcelinesraw_srcstarting_line_numberOSError
ValueErrorr3  _fn_name	threadingRLock
_hash_locktextwrapdedentr$  research	MULTILINEstart_srchashr=   r   rY   r   __globals__rK   )r?   r2  er@   rC   rC   rD   r1     s(   


zJITCallable.__init__c                 C  s   | j t| jjB S rF   )rF  rM   getclosurevarsr2  r8   rH   rC   rC   rD   get_capture_scope     zJITCallable.get_capture_scopec                   s   | j q | jd ur| jW  d    S d| j | _t| jj}t| j| j|| j	d}|
|   |jt| j | _tt|j | _ddlm  |  jt fdd| j D 7  _t| jd | _W d    | jS 1 sww   Y  | jS )Nz
recursion:)r2   r7   r8   r@   r   rd   c                   s*   g | ]\\}}\}}t | r||fqS rC   )r-   )rx   r2   r_   rl   rd   rC   rD   rz   )  s    z)JITCallable.cache_key.<locals>.<listcomp>r#   )r=  rE  r:  rM   rH  r2  r8   r    rF  r@   rw   parserI   r\   r7  dictsortedr=   r  r   re   r3   r4   r5   rG   )r?   r8   dependencies_finderrC   rd   rD   r[     s*   

zJITCallable.cache_keyc                 C  sH   t | j}t|t jsJ t|jdksJ t|jd t js"J |S )Nr   r   )rs   rK  rD  r-   Moduler%   bodyFunctionDef)r?   treerC   rC   rD   rK  2  s
   zJITCallable.parsec                 C  s   ddl m} || S )Nr   )constexpr_type)r   rS  )r?   rS  rC   rC   rD   rh   9  s   zJITCallable.typec                 C  s   d| _ || _dS )a"  
        The only method allowed to modify src.
        Bypasses the __setattr__ restriction by calling super().__setattr__ directly.

        Note that it is the callers responsibility to make sure any triton functions that call this function have the `.hash` value reset to None.
        N)rE  rD  )r?   new_srcrC   rC   rD   _unsafe_update_src>  s   
zJITCallable._unsafe_update_srcc                 C     t d)NzqCannot set attribute 'src' directly. Use '_unsafe_update_src()' and manually clear `.hash` of all callersinstead.)AttributeErrorrH   rC   rC   rD   _set_srcH     zJITCallable._set_srcc                 C  s   | j S rF   )rD  rH   rC   rC   rD   _get_srcM  s   zJITCallable._get_src)fgetfsetN)rY   rK   r   r1   rI  r   r[   rK  rh   rU  rX  rZ  r@   rC   rC   rC   rD   rV     s    "


rV   c                   @  s&   e Zd ZU ded< ded< ded< dS )JitFunctionInfor   rR   r\   r2   JITFunctionjit_functionN)rY   rK   r   r  rC   rC   rC   rD   r]  S  s   
 r]  c                 C  sD   t |t|f}| |d }|d ur|S t|t| }|| |< |S rF   )r   r\   rp   )kernel_key_cacher+  r  r   r[   rC   rC   rD   compute_cache_keyZ  s   ra  c                      s   e Zd Zdd ZdddZdd Zd	d
 Zdd Zdd Zdd Z			d fdd	Z
dd Zdd Zdd Zdd Zdd Z  ZS ) r^  c                 C     dS )NFrC   rH   rC   rC   rD   is_gluong  s   zJITFunction.is_gluonr!   bool | Nonec	                 C  s   |sd S | j j}	| j j}
ddd t| j|d D }|	 d|j d|j d|j d|j	 d	|j
 d
| d}t| j }t||||d ||}||||j|j|j|j	|j
|j|||d}|||t|
|	| d|i||ddS )Nr  c                 S  s    g | ]\}}|j  d | qS )z: r2   )rx   r   r   rC   rC   rD   rz   z       z*JITFunction._call_hook.<locals>.<listcomp>r   z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=z, launch_cooperative_grid=](r  r   )r	  devicer  	num_warpsnum_ctas
num_stagesenable_fp_fusionlaunch_cooperative_gridextern_libsconfigsspecialization_data	is_warmupr   F)r   reprr2  compileis_manual_warmupalready_compiled)r2  r   rK   r$  r#  paramsri  rj  rk  rl  rm  r3  r  rn  r]  )r?   hookr   r	  rh  r  r  ro  rq  r2   rR   	arg_reprsrr  	full_namerp  r   rC   rC   rD   
_call_hookj  s:    8


zJITFunction._call_hookc                 C  s   t |sJ | j| dS )z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)ri   pre_run_hooksr   )r?   rw  rC   rC   rD   add_pre_run_hook  s   zJITFunction.add_pre_run_hookc                 C  sZ   ddl m}m}m}m} tj }||}|| _|| _|| _t| j	| j
|}i i |||fS )z1
        Precompute as much as possible.
        r   )CompiledKernelrs  	ASTSourcemake_backend)compilerr}  rs  r~  r  r   activeget_current_targetr/  r	  rv  )r?   r}  rs  r~  r  r   r*  binderrC   rC   rD   create_binder  s   
zJITFunction.create_binderc                   s    |}dd | jD }dd |D }dd t||D }d|vs&J dd|vs.J d	d
|vs6J d|D ]}	|	|jvrI|	|vrItd|	 q8t|dd }
fdd|
D }
dd |D  t dd } fdd|D }|||
|fS )Nc                 S     g | ]}|j qS rC   re  r   rC   rC   rD   rz     r   z*JITFunction._pack_args.<locals>.<listcomp>c                 S  r   r   rC   r   rC   rC   rD   rz     r   c                 S  s   i | ]\}}||qS rC   rC   rx   r]   vrC   rC   rD   r    r{   z*JITFunction._pack_args.<locals>.<dictcomp>device_typez=device_type option is deprecated; current target will be usedrh  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                 S  s   |dkS r   rC   )r_   rl   rC   rC   rD   r     s    z(JITFunction._pack_args.<locals>.<lambda>c                   s    i | ]}|t t  |qS rC   )r   r(   r   )rx   path)
bound_argsrC   rD   r    rf  c                 S  r   r   rC   r   rC   rC   rD   rz     r   c                 S  s
   t |tS rF   )r-   r\   )r_   r   rC   rC   rD   r     s   
 c                   s   i | ]}| t |qS rC   )
parse_attrr   )rx   r]   )attrvalsr*  rC   rD   r    r   )parse_optionsrv  r#  r  KeyErrorr   )r?   r*  r   r  r+  r  sigkeyssigvalsr	  r]   
constexprsr  rC   )r  r*  r  rD   
_pack_args  s"   
zJITFunction._pack_argsc              
   O  s  | d| jp
tjj|d< tj }tj|}| jD ]	}||i | q| j	| \}}	}
}}||i |\}}}t
|	||}| |d }|d u rg| |||||\}}}}| |||||||}|d u rgd S t }| j D ]\\}}\}}| || }|krtd| d| d| qo|s|d usJ t|r||}t|}|d }|dkr|d nd}|dkr|d nd}t|dr| }|j||g| R  }|j|||||j|j|tjjtjjg	| R   |S )	NdebugrT   z1 has changed since we compiled this kernel, from z to r   r   r   result)rp   r  r   runtimer   r  get_current_deviceget_current_streamr{  device_cachesra  r  _do_compileobjectr=   r  rX   ri   r%   r   r  launch_metadatar   r   functionpacked_metadatalaunch_enter_hooklaunch_exit_hook) r?   r   r  r   r   rh  r  rw  kernel_cacher`  r   r*  r  r  r+  r  r   kernelr	  r  r  not_presentr2   r_   rl   globals_dictnewVal	grid_sizegrid_0grid_1grid_2r  rC   rC   rD   r     sP   


zJITFunction.runc                 C  s   | j d u r| jS |  |S rF   )_reprr:  )r?   r_   rC   rC   rD   rr       zJITFunction.reprNc	                   s   |r|ng }|r
|ng }t  | |j| _|| _|| _|| _|| _|| _g | _	t
| jj D ]!\}	}
|	|v p<|
j|v }|	|v pE|
j|v }| j	t|	|
|| q0t| j| _d | _|| _|| _dd | j	D | _dd | j	D | _g | _d S )Nc                 S  r  rC   re  rx   prC   rC   rD   rz     r   z(JITFunction.__init__.<locals>.<listcomp>c                 S  s   g | ]}|j r|jqS rC   )r   r   r  rC   rC   rD   rz     s    )r0   r1   rK   rR   versionr   r   r  r  rv  	enumerater	  r"  r   r2   r   r   r   r  r  r  r  rU   	arg_namesr  r{  )r?   r2  r  r   r   r  rU   rr  r  ir   dnsdns_oarA   rC   rD   r1     s*   
zJITFunction.__init__c                O  s   | j ttj||dd|S )NTr   )r   r%  
MockTensor
wrap_dtype)r?   r   r   r   rC   rC   rD   r     s   zJITFunction.warmupc              	     s  dd l }dd lm  tj }||}|d | jkr(td|d  d| j t	t
|d }|d } fddt||D }t	t
|d	 }|d
 }	tt||	}
t|d  }dd |d  D }|d }| j| \}}}}}||}| j||||||
ddS )Nr   r2   zSpecialization data is for z but trying to preload for r
  r  c                   s,   i | ]\}}| j |r  |n|qS rC   )r   is_dtyper  tlrC   rD   r  -  s    z'JITFunction.preload.<locals>.<dictcomp>r  r  r	  c                 S  s(   i | ]\}}|t |trt|n|qS rC   )r-   r(   r   r  rC   rC   rD   r  5  s    r  r   T)r  )r  triton.languager   r   r  r  loadsr:  rX   r%  r   r#  rL  r  r  r  r  )r?   rp  r  rh  deserialized_objr
  r  r  r  r  r  r	  r  r   r_   r*  rC   r  rD   preload#  s@   




zJITFunction.preloadc              
     s   j  \}
}	}tjj grd S  	tj }
|
d urWt	 t
	|	}	
fdd} f	dd}|
|||}|S j	
jd}|< tjj g |S )Nc                     s   j j dS )N)r   r  	_env_vars)rs  r  rC   )env_varsr  r?   r@   r   rC   rD   async_compileS  rJ  z.JITFunction._do_compile.<locals>.async_compilec              
     s*   | <  tjj g d S rF   )rz  r   r  jit_post_compile_hook)r  )	r  r  rh  r  r   r  r?   r	  r  rC   rD   finalize_compileV  s   z1JITFunction._do_compile.<locals>.finalize_compile)r   r  )r  rz  r   r  jit_cache_hookr~  r   active_moderp   r   r   submitrs  r  r  )r?   r   r	  rh  r  r  r  r  r_   r*  
async_moder[   r  r  r  rC   )r  r  rh  r  r  r   r  r?   r	  r@   r   r  rD   r  F  s$   
zJITFunction._do_compilec                 O  rV  )Nz:Cannot call @triton.jit'd outside of the scope of a kernel)rX   r?   r   r   rC   rC   rD   __call__c  rY  zJITFunction.__call__c                 C  s   d| j  d| jj dS )NzJITFunction(:r  )rR   r2  r   rH   rC   rC   rD   __repr__f  s   zJITFunction.__repr__)r!   rd  )NNNNNNN)rY   rK   r   rc  rz  r|  r  r  r   rr  r1   r  r  r  r  r  r   rC   rC   rA   rD   r^  e  s     
.4$#r^  r2  JITFunction[T]c                 C  r   rF   rC   r1  rC   rC   rD   jito     r  r  rr  r  r   r   r  rU   rr  Optional[Callable]r  r   Optional[Iterable[int | str]]r   r  Optional[bool]rU   Callable[[T], JITFunction[T]]c                 C  r   rF   rC   r  rC   rC   rD   r  t  s   Optional[T]4Union[JITFunction[T], Callable[[T], JITFunction[T]]]c          	        s.   d fdd}| dur|| S |S )	a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    r2  r   r!   r  c              
     sP   t | sJ tjjrddlm} ||  dS t|  dS )Nr   )InterpretedFunction)r  r   r   r  rU   rr  r  )ri   r   r  	interpretinterpreterr  r^  )r2  r  r  r   r   r  rU   rr  r  rC   rD   	decorator  s"   zjit.<locals>.decoratorNr2  r   r!   r  rC   )	r2  r  rr  r  r   r   r  rU   r  rC   r  rD   r    s   c                   @  sF   e Zd ZdZedd ZdddZdd Zed	d
 Zedd Z	dS )r  zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                 C  s"   | j jdkr| jdkrt| S | S )Nr   torch)rB   rY   rK   r  r   rC   rC   rD   r    s   zMockTensor.wrap_dtypeNc                 C  s   |d u rdg}|| _ || _d S )Nr   )r   shape)r?   r   r  rC   rC   rD   r1     s   
zMockTensor.__init__c                 C  s8   dg}| j dd  D ]}||d |  q
tt|S )Nr   r   )r  r   r   reversed)r?   stridessizerC   rC   rD   stride  s   zMockTensor.stridec                   C  rb  Nr   rC   rC   rC   rC   rD   r     r  zMockTensor.data_ptrc                   C  rb  r  rC   rC   rC   rC   rD   	ptr_range  r  zMockTensor.ptr_rangerF   )
rY   rK   r   r   staticmethodr  r1   r  r   r  rC   rC   rC   rD   r    s    


r  c                   @  s^   e Zd Zdd Zdd Zdd Zdd	d
Zdd Zdd Zdd Z	dd Z
dd Zdd ZdS )TensorWrapperc                 C  s*   || _ || _|j| _|j| _| jj| _d S rF   )r   r   datarh  r  )r?   r   r   rC   rC   rD   r1     s
   zTensorWrapper.__init__c                 C  rE   rF   )r   r   rH   rC   rC   rD   r        
zTensorWrapper.data_ptrc                 G  s   | j j| S rF   )r   r  )r?   r   rC   rC   rD   r    s   zTensorWrapper.strider!   r\   c                 C  s   d| j  d| j dS )NzTensorWrapper[rg  r  )r   r   rH   rC   rC   rD   __str__  s   zTensorWrapper.__str__c                 C  rE   rF   )r   element_sizerH   rC   rC   rD   r    r  zTensorWrapper.element_sizec                 C     t | j | jS rF   )r  r   cpur   rH   rC   rC   rD   r    r4  zTensorWrapper.cpuc                 C  s   | j |j  d S rF   )r   copy_)r?   otherrC   rC   rD   r    r4  zTensorWrapper.copy_c                 C  r  rF   )r  r   cloner   rH   rC   rC   rD   r    r4  zTensorWrapper.clonec                 C     t | j|| jS rF   )r  r   tor   )r?   rh  rC   rC   rD   r    rJ  zTensorWrapper.toc                 C  r  rF   )r  r   	new_emptyr   )r?   sizesrC   rC   rD   r     rJ  zTensorWrapper.new_emptyNr   )rY   rK   r   r1   r   r  r  r  r  r  r  r  r  rC   rC   rC   rD   r    s    
r  c                 C  sP   t | tr|| jjkr| jS t| j|S t| drt| |S tdt|  d)Nr   zCannot reinterpret a r0  )r-   r  r   r   r   r   rh   )r   r   rC   rC   rD   reinterpret  s   


r  c                 C  sj   | }t |ts|j}t |tr|jjj}|j}t|jD ]\}}| 	dr0||7 } ||fS q||fS )Nzdef )
r-   rV   r2  __code__co_filenamer7  r  r6  r   rP   )r2  base_fn	file_name
begin_lineidxlinerC   rC   rD   get_jit_fn_file_line  s   


r  c                   @  s   e Zd Zdd Zdd ZdS )BoundConstexprFunctionc                 C  s   || _ || _d S rF   )__self____func__)r?   instancer2  rC   rC   rD   r1   '  s   
zBoundConstexprFunction.__init__c                 O  s   | j | jg|R i |S rF   )r  r  r  rC   rC   rD   r  +  r  zBoundConstexprFunction.__call__N)rY   rK   r   r1   r  rC   rC   rC   rD   r  %  s    r  c                      s2   e Zd Z fddZdd ZddddZ  ZS )	ConstexprFunctionc                   s   t  | d S rF   )r0   r1   )r?   r2  rA   rC   rD   r1   1  s   zConstexprFunction.__init__c                 C  s   |d ur	t || S | S rF   )r  )r?   r  objclassrC   rC   rD   __get__4  s   
zConstexprFunction.__get__N)	_semanticc                  sh   ddl m m}  fdd|D } fdd| D }| j|i |}|d u r*|S tjjr0|S ||S )Nr   )_unwrap_if_constexprre   c                   r   rC   rC   r   r  rC   rD   rz   =  r   z.ConstexprFunction.__call__.<locals>.<listcomp>c                   s   i | ]	\}}| |qS rC   rC   r  r  rC   rD   r  >  s    z.ConstexprFunction.__call__.<locals>.<dictcomp>)r   r  re   r  r2  r   r  r  )r?   r  r   r   re   r   rC   r  rD   r  :  s   zConstexprFunction.__call__)rY   rK   r   r1   r   r  r   rC   rC   rA   rD   r  /  s    r  c                 C  s   t | S )z
    Wraps an arbitrary Python function so that it can be called at
    compile-time on constexpr arguments in a Triton function and
    returns a constexpr result.
    )r  r1  rC   rC   rD   constexpr_functionM  s   r  r   )Fr  )rr  r  r  r  r   r  r   r  r  r  rU   r  r!   r  rF   )r2  r  rr  r  r  r  r   r  r   r  r  r  rU   r  r!   r  )H
__future__r   r   rs   r.   r3   rM   r   r;  r@  r>  collectionsr   dataclassesr   	functoolsr   typingr   r   r	   r
   r   r   r   r   r   r   triton.tools.tensor_descriptorr   typesr   rL   r   r   r   _utilsr   r   r   r   cacher   triton._C.libtritonr   r;   r:   r   NodeVisitorr    r   r   r   r   r   r   r   r  r/  r3  rV   r]  ra  r^  r  r  r  r  r  r  r  r  rC   rC   rC   rD   <module>   s    0 
k2
7:b  <!%
