o
    ^i"                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlZd dlZeG dd dZdZd	d
 ZdefddZedkrOe  dS dS )    N)ArgumentParser)	dataclass)Path)Listc                   @   s   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed< dZ
edB ed< d	Zeed
< dZeed< dZedB ed< dZedB ed< dS )CompileArgsz@
    A class to contain arguments from command-line parser.
     pathkernel_name	signaturegridNtarget   	num_warps   
num_stagesout_nameout_path)__name__
__module____qualname____doc__r   str__annotations__r	   r
   r   r   r   intr   r   r   r    r   r   O/var/www/html/RAG/RAG_venv/lib/python3.10/site-packages/triton/tools/compile.pyr      s   
 r   a  
Triton ahead-of-time compiler:

This program compiles the kernel with name `kernel-name` in the file at the
provided `path` into self-contained C source-code that embeds the `cubin`
data along with utilities to load, unload and launch the kernel.

signature is provided as a list of (optionally divisibility-hinted) types
or constexpr values, e.g.

`compile.py --kernel-name kernel --signature "*fp32:16, i32:16, 1024, i32" --out-name kernel /path/to/kernel.py`

will compile triton.JITFunction of name `kernel` inside the file `/path/to/kernel.py`.
Said kernel will be specialized such that argument 0, 1 are assumed to be multiple of 16,
and argument 2 is assumed to be a compile-time constant of value 1024, i.e. it won't be part of the generated prototype.

The resulting entry point will have signature

CUresult kernel_{specialization_suffix}(CUstream stream, unsigned gX, unsigned gY, unsigned gZ, float* arg0, int32_t arg1, int32_t arg2)

Different such specialized entry points can be combined using the `linker.py` script.

NOTE: when resolving the scope of /path/to/kernel.py, the file will be executed from within its parent directory with the python interpreter
used to run this `compile.py` script
c                  C   s   t td} | jddd | jddtddd	d
 | jddtd dd | jddtddd | jddtddd | jddtd dd | jddtd dd | jddtdd	d  | jd!d"td#d	d  |  }td$i t|}t	| d S )%N)descriptionr   zTPath to Python source containing desired kernel in its scope. File will be executed.)helpz--kernel-namez-nr   zName of the kernel to compileT)typedefaultr   requiredz--targetz-tzThe target to compile towards, in format of '<backend>:<arch>:<warp-size>'; e.g., 'cuda:80:32', 'hip:gfx942:64'. Default to None, which means using current machine's GPU target)r   r   r   z--num-warpsz-wr   z$Number of warps to launch the kernelz--num-stagesz-nsr   z/Number of stages (meta-parameter of the kernel)z
--out-namez-onz Out name for the compiled kernelz
--out-pathz-ozOut filenamez--signaturez-szSignature of the kernel)r   r   r    z--gridz-gzLaunch grid of the kernelr   )
r   descadd_argumentr   r   r   
parse_argsr   varscompile_kernel)parsercli_argsargsr   r   r   main9   s,   
r)   r(   c           ,         s  | j r| j n| j}| jr| jnt|}t| j}tjdt|j t	j
|j|}t	j
|}|j| t|| j| jd}t|dksJJ ttdd | jd}dtt fdd}d	| j d
| j }	|||	g }
dd   fddt|D }dd | D } fddt|D }dd | D }| D ]\}}|dkr||j|d  < qfddt|D }|D ]}d||< qddd | D }dd | D }|d| j d| j g7 }| D ]}|dv sJ d| qdd | D }tj j!|||d}| j"rtj#j j$| j"d ntj%j&j'( }tj )|}| j| jd }|*|}tj+|||j,d!}t|j-d"ddkrOt.d#|j-j/dkrZt.d$g }g }g }g }tjD ]8\}}||vr|0| |0||  |0| |0||  qg|1|fd dkr|0| |0d% qgd&}t| D ]'\}} |t|7 }|1|fd dkr|d'7 }|1|fd d(kr|d)7 }qd*||
|g}!|j2|j3 }"tt45|"d+d, }#tj%j&j'j6i d-|!d.| jd/t|"d0d1d2d t7|#d d d+ |#dd d+ D dd1fd3dt7||D d4d1fd5dt7||D d6d1d7d |D d8g d9g d:t|d+ d;|d<|j-j8d=| jd>d*||	gd?|d d@|d dA|d+ dBd&}$g }%|j9}&tt:jdC |& }'|';dDD ];}(|(j<})|=dE|
 d*| |) }*|*>dF}+|+?|(@ jAdGi |$ W d    n	1 sw   Y  |%0|* q|!|%fS )HNr   ,r   c                 S   s
   |  dS )N )strip)sr   r   r   <lambda>_   s   
 z compile_kernel.<locals>.<lambda>r
   c                 S   s,   t  }|d|   | d d S )Nr+      )hashlibsha256updatejoinencode	hexdigest)r
   mr   r   r   hash_signaturea   s   z&compile_kernel.<locals>.hash_signaturewarpsxstagesc                 S   sF   zt | }|W S  ty   Y nw zt| }|W S  ty"   Y d S w N)r   
ValueErrorfloat)r-   retr   r   r   	constexpri   s   z!compile_kernel.<locals>.constexprc                    s.   i | ]\}}d |v r|f | d d qS ):r   )split.0ir-   )r>   r   r   
<dictcomp>v   s   . z"compile_kernel.<locals>.<dictcomp>c                 S      i | ]\}}|d ur||qS r:   r   rB   kvr   r   r   rD   w       c                    s    i | ]\}}j |  |qS r   )	arg_namesrA   )r>   kernelr   r   rD   x   s     c                 S   rE   r:   r   rF   r   r   r   rD   y   rI   r   c                    s&   i | ]\}} j | |d d qS )r?   r   )rJ   r@   rA   )rK   r   r   rD   }   s   & r>   xc                 S   s   g | ]}t |qS r   )r   )rB   rH   r   r   r   
<listcomp>   s    z"compile_kernel.<locals>.<listcomp>c                 S   s   g | ]\}}| d | qS )=r   rF   r   r   r   rM      rI   z
num_warps=znum_stages=)r      z#Only 1 and 16 are valid hints, got c                 S   s$   i | ]\}}|d kr|dd ggqS )rO   ztt.divisibilityr   rF   r   r   r   rD      s   $ )fn
constexprsr
   attrsr?   )r   r   )r   optionsglobal_scratch_sizezMAOT compiling kernels with global scratch requirements is not yet implementedzNAOT compiling kernels with profile scratch requirements is not yet implementedi32r   crO   d_   r	   triton_kernel_namebin_sizebin_dataz, c                 S   s   g | ]\}}d | | qS )0xr   )rB   rL   yr   r   r   rM      rI   c                    "   g | ]\}} | d | qS r+   r   rB   namety	ty_to_cppr   r   rM         " full_signaturec                    r`   ra   r   rb   re   r   r   rM      rg   arg_pointersc                 S   s   g | ]}d | qS )&r   )rB   argr   r   r   rM      s    z&global_scratchz&profile_scratchnum_argskernel_docstringsharedr   	algo_infogridXgridYgridZ_placeholderextraz	compile.*.wr   )Br   r	   r   r   r   sysinsertr   parent	importlibutilspec_from_file_locationstemmodule_from_specloaderexec_modulegetattrr   r@   lenlistmapr
   r   r   r   	enumerateitemsrJ   r3   valuestritoncompiler	ASTSourcer   backends	GPUTargetruntimedriveractiveget_current_targetmake_backendparse_optionscompile__dict__metadataRuntimeErrorprofile_scratch_sizeappendgetasm
binary_extbinasciihexlifymap_python_to_cpp_typeziprn   backend__file__globsuffixwith_suffixopenwrite	read_textformat),r(   r   r   arg_pathspecmodr   r
   r7   meta_sigsig_hashhints	constantskeyvalue	const_sig
doc_stringhrR   srcr   r   kwargsrS   ccinforJ   	arg_typesarg_names_not_1arg_types_not_1rC   arg_namer   rd   	func_namer   hex_paramsoutput_filesbackend_nametemplate_dirtemplate_pathextoutput_filefpr   )r>   rK   rf   r   r%   P   s   







. 	
r%   __main__)r   r0   importlib.utilrz   rw   argparser   dataclassesr   pathlibr   typingr   r   triton.backendsr   r!   r)   r%   r   r   r   r   r   <module>   s&     
