o
    ^i{R                     @  s  d dl mZ d dlZd dlZddlmZmZ ddlmZ ddlm	Z	 ddlm
Z
mZ ddlmZmZ dd	lmZ dd
lmZmZmZmZ ddlmZ ddlmZ d dlmZ d dlZd dlZd dlZd dlZd dl Z dZ!de!iZ"dZ#de#iZ$dd Z%G dd dZ&G dd dZ'e( dd Z)dd Z*d1ddZ+G dd  d Z,d2d!d"Z-d3d'd(Z.G d)d* d*Z/G d+d, d,e0Z1d-d. Z2G d/d0 d0Z3dS )4    )annotationsN   )get_cache_invalidating_env_varsir)backends)Language)BaseBackend	GPUTarget)__version__knobs)OutOfResources)get_cache_managerget_dump_managerget_override_managerget_cache_key)driver)get_sass)Pathz=\.(?:visible|extern)\s+\.(?:entry|func)\s+(\w+)\s*\(([^)]*)\)ptxz\.param\s+\.(\w+)c                 C  sP   t d| }t d| }|d urdS t dd| } |d ur&dt|d S | S )Nz!tt\.ptr<([^,]+)ztt.nv_tma_desc = 1	nvTmaDescz {[^}]+} *   )researchsubconvert_type_reprgroup)xmatchtma r!   S/var/www/html/RAG/RAG_venv/lib/python3.10/site-packages/triton/compiler/compiler.pyr   '   s   r   c                   @  s2   e Zd ZddddZdd Zdd
dZdd ZdS )	ASTSourceNreturnNonec                 C  s   || _ tj| _d| _|j| _|| _t | _	|d ur<|
 D ]\}}t|tr-|j|fn|}t|ts6J || j	|< q|p@t | _| j D ]}t|tsRtdqGd S )NttirzSignature keys must be string)fnr   TRITONlanguageext__name__name	signaturedict	constantsitems
isinstancestr	arg_namesindextupleattrskeys	TypeError)selfr'   r-   
constexprsr6   kvr!   r!   r"   __init__6   s"   
zASTSource.__init__c                   sz   dd t | j D }dd  d fddt | j D }| jj dt| j d| d| }t	
|d S )Nc                 S  s   g | ]\}}|qS r!   r!   .0r;   r<   r!   r!   r"   
<listcomp>H   s    z"ASTSource.hash.<locals>.<listcomp>c                 S  s   t | dr| jS t| S )N	cache_key)hasattrrA   r2   )r   r!   r!   r"   <lambda>I       z ASTSource.hash.<locals>.<lambda>-c                   s   g | ]\}} |qS r!   r!   r>   get_keyr!   r"   r@   J   rD   utf-8)sortedr-   r0   joinr/   r'   rA   r2   r6   hashlibsha256encode	hexdigest)r9   
sorted_sigconstants_keykeyr!   rF   r"   hashG   s
   "$zASTSource.hashtargetr	   c                 C  s"   ddl m} || j| ||||dS )Nr   )ast_to_ttir)contextoptionscodegen_fns
module_map)code_generatorrT   r'   )r9   rS   rV   rW   rX   rU   rT   r!   r!   r"   make_irN   s   zASTSource.make_irc                 C  s   t  S N)r.   r9   r!   r!   r"   parse_optionsS   s   zASTSource.parse_options)NNr$   r%   rS   r	   r+   
__module____qualname__r=   rR   rZ   r]   r!   r!   r!   r"   r#   4   s
    
r#   c                   @  s.   e Zd Zdd Zdd ZdddZd	d
 ZdS )IRSourcec           
      C  s   || _ t|}|jdd  | _tj| _| | _t	
| |
| | jdkrStt| j | jtj}|d| _|d}tt| j |}dd t|D | _d S t	| j || _| j }d| | _| j|}| j|}	dd t|	D | _d S )Nr   r   r   c                 S  s   i | ]	\}}|t |qS r!   )r   r?   r;   tyr!   r!   r"   
<dictcomp>i       z%IRSource.__init__.<locals>.<dictcomp>@c                 S     i | ]\}}||qS r!   r!   rd   r!   r!   r"   rf   p       )pathr   suffixr*   r   r(   r)   	read_textsrcr   load_dialectsr   r   prototype_pattern	MULTILINEr   r,   findallarg_type_pattern	enumerater-   parse_mlir_modulemoduleget_entry_func_nameget_functionget_function_signature)
r9   rk   rU   backendr   r-   typesfn_namefuncOpfunc_tyr!   r!   r"   r=   Y   s&   






zIRSource.__init__c                 C  s   t | jd S )NrH   )rK   rL   rn   rM   rN   r\   r!   r!   r"   rR   r   s   zIRSource.hashrS   r	   c                 C  s   || j _| j S r[   )rv   rU   )r9   rS   rV   rW   rX   rU   r!   r!   r"   rZ   u   s   zIRSource.make_irc                 C  s4   | j dkr| jd}|d usJ dd|iS t S )Nttgirzttg.num-warpsz'Unable to parse ttg.num-warps attribute	num_warps)r*   rv   get_int_attrr.   )r9   r   r!   r!   r"   r]   y   s
   
zIRSource.parse_optionsNr_   r`   r!   r!   r!   r"   rc   W   s
    
rc   c                 C  s   t jj| d S )Nmax_shared_mem)r   activeutilsget_device_properties)devicer!   r!   r"   r      s   r   c                 C  sj   |dks|dkrt | |}||_|S |dks|dks|dkr%t|  S |dks-|dkr3t|  S d S )Nr&   r   llirr   amdgcncubinhsaco)r   ru   rU   r   rm   
read_bytes)	full_namer*   rU   rv   r!   r!   r"   parse   s   r   eBaseExceptionc                   s   t jjrdS | jdurt| j | jdurt| j ddg}dd |D }| j g } durEt fdd|D s>|   j	  dus.t
||dd D ]\}}||_	qN|s]d| _dS d|d	 _	|d
 | _dS )z
    Removes code_generator.py and related files from tracebacks.

    These are uninteresting to the user -- "just show me *my* code!"
    Nz"/triton/compiler/code_generator.pyz/ast.pyc                 S  s   g | ]	}| d tjqS )/)replaceossep)r?   bad_filer!   r!   r"   r@      rg   z$filter_traceback.<locals>.<listcomp>c                 3  s$    | ]} j jj|r|V  qd S r[   )tb_framef_codeco_filenameendswith)r?   ftbr!   r"   	<genexpr>   s   " z#filter_traceback.<locals>.<genexpr>r   r   )r   compilationfront_end_debugging	__cause__filter_traceback__context____traceback__anyappendtb_nextzip)r   	BAD_FILESframes	cur_frame
next_framer!   r   r"   r      s.   






r   c                   @  s4   e Zd ZdddZdddZdd	d
ZdddZdS )CompileTimerr$   r%   c                 C  s    t   | _d | _g | _d | _d S r[   )timestartir_initialization_endlowering_stage_endsstore_results_endr\   r!   r!   r"   r=      s   

zCompileTimer.__init__c                 C  s   t   | _d S r[   )r   r   r\   r!   r!   r"   finished_ir_initialization   s   z'CompileTimer.finished_ir_initialization
stage_namer2   c                 C  s   | j |t f d S r[   )r   r   r   )r9   r   r!   r!   r"   stage_finished   s   zCompileTimer.stage_finishedknobs.CompileTimesc                 C  s~   t   }| jd u r|| _n|| _d
dd}g }| j}| jD ]\}}|||||f |}qtj|| j| j|||| jd	S )Nr   floatendfloat | Noner$   intc                 S  s   |d u rdS t ||  d S )Nr   i@B )r   )r   r   r!   r!   r"   delta   s   zCompileTimer.end.<locals>.delta)ir_initializationlowering_stagesstore_results)r   r   r   r   r$   r   )r   r   r   r   r   r   CompileTimesr   )r9   	timestampr   lowering_stage_durationsstage_startr   	stage_endr!   r!   r"   r      s   


zCompileTimer.endNr^   )r   r2   r$   r%   )r$   r   )r+   ra   rb   r=   r   r   r   r!   r!   r!   r"   r      s
    


r   c           )   
   C  sl  t jj}|r	t }|d u rtj }t|tsJ dt	|}t| t
 }|r:t| ts0J dt }t| ||} |  }	|t|pEt fi |	}|d u rSt n|}
t| |||
d}t|d }t|}t jj}t jj}t jj}|rt|  nd }|rt|  nd }| jd d }| d}||pi }| |}t jj!}|s|d urt"| ||}|r|| |j#$ ||% dd |S ||d	|j&|
}t'|d
< t }|(||| j) t*|+ ,| j-}|r|d7 }t| tst }t.| |.| |/|}|0 }z| 1|||||}W n t2y( } zt3|  d }~ww |r=| d| j- } |4|| || < n| d} |4|| || < t jj5}!|rb|!rb|6| j7 t8d| j7  |ri|9  t*|: |d  D ]\}"}#|#||}$| d|" } |d u r| dd  }%r|%;d|" rt<|%|"|}$n|=|  }&rt8d|&  t<|&|"|}$|r|"dv r|4|$| || < |d ur|4|$|  |"dkrt>|$}'|4|'|d  |!|"kr|=| }(|$6|( t8d|(  |$}|r|?|" qs|j4t@jA|tBd|dd||< |C|| t jjDs"|E  |r0|| |||% dd t"| ||S )Nz target must be of GPUTarget typez'source must be either AST or a filepath)env_varsrH      .jsonT)rn   metadatametadata_grouptimes	cache_hit)rR   rS   triton_versionr   .z.sourcezCreating new locations for ir_overridez
Overriding kernel with file )r   r   jsonr   z.sass)defaultF)binary)Fr   r   listenerr   r   r   get_current_targetr1   r	   make_backendr#   r2   r   rU   rc   r]   r.   r   r   rK   rL   rM   rN   r   overridedump_irstore_binary_onlyr   rR   r   r,   	get_groupgetalways_compileCompiledKernelr   _asdictr   __dict__r
   
add_stagesr)   listr7   r4   r*   ro   get_codegen_implementationget_module_maprZ   	Exceptionr   put
use_ir_loccreate_location_snapshotrk   printr   r0   r   r   get_filer   r   r   dumpsvars	put_groupenable_asandisable_multithreading))rn   rS   rV   	_env_varscompilation_listenertimerrz   	ir_sourcerU   extra_optionsr   rQ   rR   fn_cache_managerenable_overrideenable_ir_dumpstore_only_binaryfn_override_managerfn_dump_manager	file_namemetadata_filenamer   metadata_pathr   resr   stagesfirst_stagerW   rX   rv   r   ir_filenamer   r*   
compile_irnext_moduler   r   sassir_full_namer!   r!   r"   compile   s   








$








r  rS   r	   r$   r   c                   sN    fddt  D }t|dkr!tt| d j d| d|d  S )Nc                   s   g | ]}|j  r|j qS r!   )compilersupports_target)r?   r   rS   r!   r"   r@   s  s    z make_backend.<locals>.<listcomp>r   z! compatible backends for target (z) (z). There should only be one.r   )r   valueslenRuntimeErrorrz   )rS   activesr!   r  r"   r   r  s   r   c                   @  s$   e Zd Zdd Zdd Zdd ZdS )LazyDictc                 C  s   || _ g | _d S r[   )dataextras)r9   r
  r!   r!   r"   r=   |  s   
zLazyDict.__init__c                 C  s0   | j D ]\}}| j|| B | _q| j   | jS r[   )r  r
  clearr9   funcargsr!   r!   r"   r     s   
zLazyDict.getc                 C  s   | j ||f d S r[   )r  r   r  r!   r!   r"   add  s   zLazyDict.addN)r+   ra   rb   r=   r   r  r!   r!   r!   r"   r	  z  s    r	  c                   @  s   e Zd Zdd ZdS )AsmDictc                 C  s.   |dkrt | d }ntd| || |< |S )Nr   r   zUnknown key: '%s')r   KeyError)r9   rQ   valuer!   r!   r"   __missing__  s
   zAsmDict.__missing__N)r+   ra   rb   r  r!   r!   r!   r"   r    s    r  c                 O  s
   t | r[   )copydeepcopy)errr  kwargsr!   r!   r"   _raise_error  s   
r  c                   @  s8   e Zd Zdd Zdd Zedd Zdd Zd	d
 ZdS )r   c                   s  ddl m} tdd | D }t| }t|d |d< |d }t|d |d |d	 |d< |d
t	t
| }|di || _t| jj}	|	| j| _|| _|| _| jj| _dd | D }
|	j t fdd|
D | _|| _| j  | _d | _d | _d | _d S )Nr   )
namedtuplec                 s  s&    | ]\}}| d rt|V  qdS )r   Nr   r   r?   cpr!   r!   r"   r     s   $ z*CompiledKernel.__init__.<locals>.<genexpr>cluster_dimsrS   rz   arch	warp_sizeKernelMetadatac                 S  s"   g | ]\}}| d st|qS )r   r  r  r!   r!   r"   r@     s   " z+CompiledKernel.__init__.<locals>.<listcomp>c                   s:   i | ]}|j d d |j d d  kr| n| qS )r   N)rl   r   rm   )r?   file
binary_extr!   r"   rf     s    ,z+CompiledKernel.__init__.<locals>.<dictcomp>r!   )collectionsr  nextr0   r   loadsrm   r5   r	   rI   r   r7   r   r   rS   pack_metadatapacked_metadatarn   rR   r,   r%  r  asmr   kernelrv   function_run)r9   rn   r   rR   r  r   r   rS   r"  rz   	asm_filesr!   r$  r"   r=     s.   


zCompiledKernel.__init__c                   s`   j d urd S  fdd}tj }tj j j _t|} jj	|kr0|t
 jj	|d t jdrN jjd urNd} jj|krN|t
 jj|d tjjd urctj j  j j j j tjj j j jj	|\ _  _ _ _ _tj j} jj|  jkr|t
 jj|  jd tjjd urtj j  j j j j d S d S )Nc                   s   t | }tt| _| r[   )r  r  	functoolspartialr  r.  )r  
cloned_errr\   r!   r"   raise_  s   
z,CompiledKernel._init_handles.<locals>.raise_zshared memory	tmem_sizei   ztensor memorythreads)rv   r   r   get_current_devicelauncher_clsrn   r   r.  r   sharedr   rB   r4  r   runtimekernel_load_start_hookr-  r,   r   rR   r   load_binaryr,  n_regsn_spillsn_max_threadsr   r!  r   kernel_load_end_hook)r9   r3  r   
max_sharedmax_tmem_sizer!  r!   r\   r"   _init_handles  s.   


"zCompiledKernel._init_handlesc                 C  s   | j d u r	|   | j S r[   )r.  rB  r\   r!   r!   r"   run  s   
zCompiledKernel.runc                 G  s   t jjd u rd S |   t| j| j|d}t| jt	r#| jj
jd u r%|S dd t| jj
j|D }|| jj
j|| j|f |S )N)r,   r-  streamc                 S  ri   r!   r!   )r?   r,   argr!   r!   r"   rf     rj   z2CompiledKernel.launch_metadata.<locals>.<dictcomp>)r   r9  launch_enter_hookrB  r	  r,   r-  r1   rn   r#   r'   launch_metadatar   r3   r  r   )r9   gridrD  r  retarg_dictr!   r!   r"   rG    s   zCompiledKernel.launch_metadatac                   s       d d fdd
}|S )N)rD  c              
     sp   | d u rt j }t j|} j | g|R  }j d  d  d | jj|tj	j
tj	jg	|R   d S )Nr   r   r   )r   r   r6  get_current_streamrG  rC  r-  r*  r   r9  rF  launch_exit_hook)rD  r  r   rG  rH  r9   r!   r"   runner  s   
"z*CompiledKernel.__getitem__.<locals>.runner)rB  )r9   rH  rN  r!   rM  r"   __getitem__  s   zCompiledKernel.__getitem__N)	r+   ra   rb   r=   rB  propertyrC  rG  rO  r!   r!   r!   r"   r     s    %
r   )r   r   )NNN)rS   r	   r$   r   )4
__future__r   rK   r   _C.libtritonr   r   r   backends.compilerr   r   r	   r   r
   r   runtime.autotunerr   runtime.cacher   r   r   r   runtime.driverr   tools.disasmr   pathlibr   r   r0  r   r   r  ptx_prototype_patternrp   ptx_arg_type_patternrs   r   r#   rc   	lru_cacher   r   r   r   r  r   r	  r.   r  r  r   r!   r!   r!   r"   <module>   sN    
#*

&
' 
