o
    ^i]                     @  sH  d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	 d dl
Z
d dlZd dlZd dlmZ d dlZd dlmZ d dlmZ d dlmZ ddlmZ d d	lmZ d
dlmZ d
dlmZ eG dd dZ G dd dZ!G dd dZ"eddG dd dZ#dd Z$dd Z%dd Z&dd Z'dd  Z(ej)e'ej*gd!Z+ej)e'ej,gd!Z-ej)e(ej.gd!Z/G d"d# d#Z0G d$d% d%Z1d&d' Z2d(d) Z3d*d+ Z4G d,d- d-Z5G d.d/ d/e5Z6G d0d1 d1e5Z7d2d3 Z8d4d5 Z9d6d7 Z:d8d9 Z;d:d; Z<e1 Z=ee=Z>d<d= Z?d>d? Z@G d@dA dAZAG dBdC dCejBZCG dDdE dEZDG dFdG dGZEdS )H    )annotationsN)TupleListDictCallable)	dataclass)TritonSemantic)TensorDescriptor   )InterpreterError)partial   )interpreter)irc                   @  sV   e Zd ZU dZded< ded< ejedZded< d	d
 Z	dd Z
dd Zdd ZdS )TensorHandlez
        data: numpy array
        dtype: triton type, either pointer_type or scalar_type.
        we don't store block_type here because the shape information is already available in the data field
        attr: a dictionary of attributes
    znp.arraydataztl.dtypedtype)default_factoryr   attrc                 C  s   t | j S N)boolr   allself r   U/var/www/html/RAG/RAG_venv/lib/python3.10/site-packages/triton/runtime/interpreter.py__bool__#      zTensorHandle.__bool__c                 C  s$   | j }t|dr|j}t|ds|S )N
element_ty)r   hasattrr   )r   r   r   r   r   get_element_ty&   s
   

zTensorHandle.get_element_tyc                 C  s   t | j | jS r   )r   r   copyr   r   r   r   r   clone,      zTensorHandle.clonec                 C  s   || j |< d S r   )r   )r   keyvaluer   r   r   set_attr/   r   zTensorHandle.set_attrN)__name__
__module____qualname____doc____annotations__dataclassesfielddictr   r   r    r"   r&   r   r   r   r   r      s   
 r   c                   @  s   e Zd Zdd Zdd ZdS )BlockPointerHandlec                 C  s(   || _ || _|| _|| _|| _|| _d S r   )baseshapestridesoffsetsblock_shapeorder)r   r0   r1   r2   r3   r4   r5   r   r   r   __init__5   s   
zBlockPointerHandle.__init__c           	      C  s   | j  }|jd }t| j j| j}tj| jtd}t	t
| jD ]D}dgt
| j }| j| ||< | j| jt| j|  |}||| | j| j tj }||v rf||| j| jk @ |dk@ }q"t|| j jj}||fS )N   r   r
   r   )r0   r    primitive_bitwidthnpbroadcast_tor   r4   onesr   rangelenr3   arangereshaper2   astypeuint64r1   r   r   scalar)	r   boundary_checkdtype_ttn_bytesptrsmasksdim
bcast_dimsoffr   r   r   materialize_pointers=   s   

" z'BlockPointerHandle.materialize_pointersN)r'   r(   r)   r6   rL   r   r   r   r   r/   3   s    r/   c                   @  s(   e Zd Zddd	Zd
d ZdddZdS )TensorDescHandler0   r   r1   List[TensorHandle]r2   r4   	List[int]c                 C  s,   || _ t|| _|| _|| _|| _|| _d S r   )r0   r>   ndimr1   r2   r4   padding)r   r0   r1   r2   r4   rQ   r   r   r   r6   O   s   

zTensorDescHandle.__init__c                 C  s   | j j d dksJ dt| j| jksJ t| j| jks"J | jdks+J d| jd d D ]}|j d dksAJ dq2| jd j dksPJ dd S )	N   r   zbase must be 16-byte alignedr
   z"descriptor cannot be 0 dimensionalzstride must be 16-byte alignedzlast dim must be contiguous)r0   r   itemr>   r2   rP   r4   )r   strider   r   r   validateX   s    zTensorDescHandle.validater3   c           	      C  s  t || jks	J | jjj}|jd }|d j| d dks"J dt| jj| j	}tj
| j	td}tt | j	D ]?}dgt | j	 }| j	| ||< || jt| j	|  |}||| | j| j tj }|d|k@ || j| jk @ }q:|jtjksJ t|| jjj}||fS )Nr7   rS   rR   r   z*block offset start must be 16-byte alignedr8   r
   )r>   rP   r0   r   r   r9   r   r:   r;   r4   r<   r   r=   r?   r@   r2   rA   rB   r1   r   rC   )	r   r3   	scalar_tyitemsizerG   rH   rI   rJ   rK   r   r   r   rL   b   s   

  z%TensorDescHandle.materialize_pointersN)r0   r   r1   rN   r2   rN   r4   rO   )r3   rN   )r'   r(   r)   r6   rV   rL   r   r   r   r   rM   M   s    
	
rM   T)frozenc                   @  s   e Zd ZU dZded< dZded< dZded< dZd	ed
< dZded< dZ	ded< dZ
d	ed< dZded< dZded< dZd	ed< dS )InterpreterOptionsNr.   extern_libsFr   debugTsanitize_overflowstrarch)fp8e5fp8e5b16fp8e4nvfp8e4b8fp8e4b15z
Tuple[str]supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypestf32default_dot_input_precision)rg   tf32x3ieeeallowed_dot_input_precisionsr   intmax_num_imprecise_acc_defaultr   backend_name)r'   r(   r)   r[   r+   r\   r]   r_   re   rf   rh   rk   rm   rn   r   r   r   r   rZ   u   s   
 rZ   c                 C  sD   | t jkrt jS | t jkrt jS | t jkrt jS | t jkr t jS | S r   )	r:   uint8int8uint16int16uint32int32rB   int64r8   r   r   r   _get_signed_np_dtype   s   



rv   c                 C  st  t | tjrttjS i tjtttjttjtj	ttj	tj
ttj
tjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttji}t | tjrt | jtjrttjS || j S ||  S r   )
isinstancetlpointer_typer:   r   rB   int1r   float16float32float64rp   ro   rr   rq   rt   rs   ru   bfloat16float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15
block_typer   )tt_dtypenp_typesr   r   r   _get_np_dtype   sX   	

r   c                 C  s  t td|j }t td|j }tj|  |d}||jd ? d@ }|j|j d }|j|j d }	|d|j> d @ }
|j}|j}||j? d|> d @ tj}|dk}t	|rtj
|tjd}t|jD ]}|
|? d@ }|j| ||dk< qh|
dk}d||  ||< || |||@ < |
| || > d|j> d @ |
|< tdt|| | d|	> d }||}||}|j|jkr|
|j|j ? d|j> d @ }|tjjkr|
d|j|j d > @ }||dk }||}n|
||j|j > d|j> d @ }|dk}t	|rH||j? d|> d @ tj}|dk}||@ }tj
|tjd}d| || |  ||< || || ? d|j||  > B ||< ||jd > ||j> B |B }|| jS )Nuintr8   r
   r   )getattrr:   r9   
frombuffertobytesfp_mantissa_widthexponent_biasrA   rt   any
zeros_liker=   maximumminimum_irROUNDING_MODERTNEr@   r1   )inputinput_dtypeoutput_dtyperounding_modeinput_uint_dtypeoutput_unint_dtype	input_binsigninput_exponent_widthoutput_exponent_widthsignificand
bias_inputbias_outputexponentsubnormal_indexbit_posi	bit_indexzero_significand_indexexponent_outputsign_outputsignificand_outputcut_offnon_zero_exponent_indexshiftoutputr   r   r   _convert_float   sl   
$


r   c                 C  s
   t | S r   )matherfxr   r   r   _erf   s   
r   c                 C  s   t | t | d? S )N@   )rl   )abr   r   r   
_umulhi_64   s   r   )otypesc                   @  s   e Zd Zedd ZdS )ExtraFunctionsc                 C  s   t |j| j|||S r   )rx   tensorbuildercreate_fp_to_fphandle)r   dst_tyfp_downcast_rounding	_semanticr   r   r   _convert_custom_types  s   z$ExtraFunctions._convert_custom_typesN)r'   r(   r)   staticmethodr   r   r   r   r   r      s    r   c                   @  s  e Zd Zejjejjejjejjejjejjejj	ejj	iZ
ejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejji
ZdddZdd Zdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd  Z&d!d" Z'd#d$ Z(d%d& Z)d'd( Z*d)d* Z+d+d, Z,d-d. Z-d/d0 Z.d1d2 Z/d3d4 Z0d5d6 Z1d7d8 Z2d9d: Z3d;d< Z4d=d> Z5d?d@ Z6dAdB Z7dCdD Z8dEdF Z9dGdH Z:dIdJ Z;dKdL Z<dMdN Z=dOdP Z>dQdR Z?dSdT Z@dUdV ZAdWdX ZBdYdZ ZCd[dZ ZDd\dZ ZEd]dZ ZFd^dZ ZGd_dZ ZHd`dZ ZIdadb ZJdcdd ZKdedf ZLdgdZ ZMdhdZ ZNdidZ ZOdjdZ ZPdkdZ ZQdldZ ZRdmdZ ZSdndZ ZTdodZ ZUdpdZ ZVdqdZ ZWdrdZ ZXdsdZ ZYdtdZ ZZdudZ Z[dvdZ Z\dwdZ Z]dxdZ Z^dydZ Z_dzdZ Z`d{dZ Zad|dZ Zbd}dZ Zcd~dZ ZdddZ ZeddZ ZfddZ ZgddZ ZhddZ ZiddZ ZjddZ ZkddZ ZlddZ ZmddZ ZnddZ ZoddZ ZpddZ ZqddZ ZrddZ ZsddZ ZtddZ ZuddZ ZvddZ ZwddZ ZxddZ ZyddZ ZzddZ Z{ddZ Z|eKZ}eKZ~dd Zdd Zdd Zdd ZddZ ZddZ Zdd Zdd Zdd ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ Zdd Zdd ZddZ Zdd Zdd Zdd Zdd Zdd Zdd ZddĄ ZddƄ ZddȄ Zddʄ Zdd̄ Zdd΄ ZddЄ Zdd҄ ZddԄ Zddք Zdd؄ Zddڄ Zdd܄ Zddބ Zdd Zdd Zdd Zdd Zdd Z	d	d
ddZdddZdddZdddZdddZdd ZdS (  InterpreterBuilderreturnNonec                 C  s2   d | _ t | _i | _tj| jd< dd | jd< d S )Nconvert_custom_typesc                 S  s   dS )N)r
   r
   r
   r   )lhsTyperhsTyper   r   r   <lambda>   s    z-InterpreterBuilder.__init__.<locals>.<lambda>min_dot_size)r_   rZ   optionscodegen_fnsr   r   r   r   r   r   r6     s
   zInterpreterBuilder.__init__c                 C  sR   || j d k std|| j d k std|| j d k s!td|||f| _d S )Nr   zx >= grid_dim[0]r
   zy >= grid_dim[1]r   zz >= grid_dim[2])grid_dim
ValueErrorgrid_idxr   r   yzr   r   r   set_grid_idx"  s   zInterpreterBuilder.set_grid_idxc                 C  s   |||f| _ d S r   )r   )r   nxnynzr   r   r   set_grid_dim+     zInterpreterBuilder.set_grid_dimc                 C     t jS r   )rx   r{   r   r   r   r   get_half_ty0     zInterpreterBuilder.get_half_tyc                 C  r   r   )rx   r~   r   r   r   r   get_bf16_ty3  r   zInterpreterBuilder.get_bf16_tyc                 C  r   r   )rx   r|   r   r   r   r   get_float_ty6  r   zInterpreterBuilder.get_float_tyc                 C  r   r   )rx   r}   r   r   r   r   get_double_ty9  r   z InterpreterBuilder.get_double_tyc                 C  r   r   )rx   rz   r   r   r   r   get_int1_ty<  r   zInterpreterBuilder.get_int1_tyc                 C  r   r   )rx   rp   r   r   r   r   get_int8_ty?  r   zInterpreterBuilder.get_int8_tyc                 C  r   r   )rx   ro   r   r   r   r   get_uint8_tyB  r   zInterpreterBuilder.get_uint8_tyc                 C  r   r   )rx   rr   r   r   r   r   get_int16_tyE  r   zInterpreterBuilder.get_int16_tyc                 C  r   r   )rx   rq   r   r   r   r   get_uint16_tyH  r   z InterpreterBuilder.get_uint16_tyc                 C  r   r   )rx   rt   r   r   r   r   get_int32_tyK  r   zInterpreterBuilder.get_int32_tyc                 C  r   r   )rx   rs   r   r   r   r   get_uint32_tyN  r   z InterpreterBuilder.get_uint32_tyc                 C  r   r   )rx   ru   r   r   r   r   get_int64_tyQ  r   zInterpreterBuilder.get_int64_tyc                 C  r   r   )rx   rB   r   r   r   r   get_uint64_tyT  r   z InterpreterBuilder.get_uint64_tyc                 C  r   r   )rx   r   r   r   r   r   get_fp8e4nv_tyW  r   z!InterpreterBuilder.get_fp8e4nv_tyc                 C  r   r   )rx   r   r   r   r   r   get_fp8e4b15_tyZ  r   z"InterpreterBuilder.get_fp8e4b15_tyc                 C  r   r   )rx   r   r   r   r   r   get_fp8e4b8_ty]  r   z!InterpreterBuilder.get_fp8e4b8_tyc                 C  r   r   )rx   r   r   r   r   r   get_fp8e5_ty`  r   zInterpreterBuilder.get_fp8e5_tyc                 C  r   r   )rx   r   r   r   r   r   get_fp8e5b16_tyc  r   z"InterpreterBuilder.get_fp8e5b16_tyc                 C     t ||S r   )rx   ry   )r   elt_ty
addr_spacer   r   r   
get_ptr_tyf     zInterpreterBuilder.get_ptr_tyc                 C  r   r   )rx   r   )r   r   r1   r   r   r   get_block_tyi  r   zInterpreterBuilder.get_block_tyc                 C  s   t tj|gtjdtjS Nr8   )r   r:   arraybool_rx   rz   r   r%   r   r   r   get_int1l     zInterpreterBuilder.get_int1c                 C     t tj|gtjdtjS r   )r   r:   r   ro   rx   r   r   r   r   	get_uint8o  r   zInterpreterBuilder.get_uint8c                 C  r   r   )r   r:   r   rp   rx   r   r   r   r   get_int8r  r   zInterpreterBuilder.get_int8c                 C  r   r   )r   r:   r   rq   rx   r   r   r   r   
get_uint16u  r   zInterpreterBuilder.get_uint16c                 C  r   r   )r   r:   r   rr   rx   r   r   r   r   	get_int16x  r   zInterpreterBuilder.get_int16c                 C  r   r   )r   r:   r   rs   rx   r   r   r   r   
get_uint32{  r   zInterpreterBuilder.get_uint32c                 C  r   r   )r   r:   r   rt   rx   r   r   r   r   	get_int32~  r   zInterpreterBuilder.get_int32c                 C  r   r   )r   r:   r   rB   rx   r   r   r   r   
get_uint64  r   zInterpreterBuilder.get_uint64c                 C  r   r   )r   r:   r   ru   rx   r   r   r   r   	get_int64  r   zInterpreterBuilder.get_int64c                 C  r   r   )r   r:   r   r{   rx   r   r   r   r   get_fp16  r   zInterpreterBuilder.get_fp16c                 C  r   r   )r   r:   r   r|   rx   r   r   r   r   get_fp32  r   zInterpreterBuilder.get_fp32c                 C  r   r   )r   r:   r   r}   rx   r   r   r   r   get_fp64  r   zInterpreterBuilder.get_fp64c                 C  s   t tjdgt|d|S Nr   r8   )r   r:   r   r   )r   typer   r   r   get_null_value  r   z!InterpreterBuilder.get_null_valuec                 C  s2   | j d u r	tdttj| j | gtjdtjS )Nzgrid_idx is Noner8   )r   r   r   r:   r   rt   rx   r   axisr   r   r   create_get_program_id  s   
 z(InterpreterBuilder.create_get_program_idc                 C  s    t tj| j| gtjdtjS r   )r   r:   r   r   rt   rx   r	  r   r   r   create_get_num_programs  s    z*InterpreterBuilder.create_get_num_programsc                 C  s0   t tj|jtdtj}d }| ||||||S r   )r   r:   	ones_liker   r   rx   rz   create_masked_load)r   ptr_0_1is_volatilemaskotherr   r   r   create_load  s   zInterpreterBuilder.create_loadc                 C  s*   t tj|jtdtj}| |||d d S r   )r   r:   r  r   r   rx   rz   create_masked_store)r   r  valr  r  r  r   r   r   create_store  s   zInterpreterBuilder.create_storec           
      C  sN   |  }t|}|d u rttj|j|d|}t|j|j|j|}	t|	|S r   )r    r   r   r:   r   r   _interpreterload)
r   rG   r  r  cache_modifiereviction_policyr  rE   dtype_npretr   r   r   r    s   
z%InterpreterBuilder.create_masked_loadc                 C  s   t |j|j|jS r   )r  storer   )r   rG   r%   r  r  r  r   r   r   r       z&InterpreterBuilder.create_masked_storec                 C  st   |j j}|j}|tjkr|tjks|tjkr.|tjkr.t|j||d t|}t	||jS t	|j
t||jS r   )r   rC   rx   r~   r|   r   r   viewr   r   rA   )r   srcdst_typesrc_element_typedst_element_typer   r   r   r   	cast_impl  s   zInterpreterBuilder.cast_implc                 C     |  ||S r   r&  r   r"  r#  r   r   r   r         zInterpreterBuilder.<lambda>c                 C  r'  r   r(  r)  r   r   r   r     r*  c                 C  r'  r   r(  r)  r   r   r   r     r*  c                 C  r'  r   r(  r)  r   r   r   r     r*  c                 C  r'  r   r(  r)  r   r   r   r     r*  c                 C  r'  r   r(  r)  r   r   r   r     r*  c                 C  r'  r   r(  )r   r"  r#  	is_signedr   r   r   r     r*  c                 C  s4   |j j}|j}t|j|||t|}t||jS r   )r   rC   r   r   r!  r   r   )r   r"  r#  r   r$  r%  r   r   r   r   r     s   z"InterpreterBuilder.create_fp_to_fpc                 C  s   t |jt||jS r   )r   r   r!  r   rC   r)  r   r   r   create_bitcast     z!InterpreterBuilder.create_bitcastc                 C  s   t ||j|j|jjS r   r   r   r   rC   )r   lhsrhsopr   r   r   	binary_op  r-  zInterpreterBuilder.binary_opc                 C     |  ||tjS r   r2  r:   addr   r/  r0  r   r   r   r         c                 C  r3  r   r2  r:   multiplyr6  r   r   r   r     r7  c                 C  r3  r   r2  r:   divider6  r   r   r   r     r7  c                 C  r3  r   r2  r:   fmodr6  r   r   r   r     r7  c                 C  r3  r   r2  r:   subtractr6  r   r   r   r     r7  c                 C  r3  r   r8  r6  r   r   r   r     r7  c                 C  r3  r   r:  r6  r   r   r   r     r7  c                 C  r'  r   create_idivr6  r   r   r   r     r*  c                 C  r'  r   r@  r6  r   r   r   r     r*  c                 C  r3  r   r<  r6  r   r   r   r     r7  c                 C  r3  r   r<  r6  r   r   r   r     r7  c                 C  r3  r   r4  r6  r   r   r   r     r7  c                 C  r3  r   r>  r6  r   r   r   r     r7  c                 C  r3  r   )r2  r:   
left_shiftr6  r   r   r   r     r7  c                 C  r3  r   )r2  r:   right_shiftr6  r   r   r   r     r7  c                 C  r3  r   r2  r:   r   r6  r   r   r   r     r7  c                 C  r3  r   rD  r6  r   r   r   r     r7  c                 C  r3  r   rD  r6  r   r   r   r     r7  c                 C  r3  r   rD  r6  r   r   r   r     r7  c                 C  r3  r   r2  r:   r   r6  r   r   r   r     r7  c                 C  r3  r   rE  r6  r   r   r   r     r7  c                 C  r3  r   rE  r6  r   r   r   r     r7  c                 C  r3  r   rE  r6  r   r   r   r     r7  c                 C  r3  r   r2  r:   
less_equalr6  r   r   r   r     r7  c                 C  r3  r   r2  r:   lessr6  r   r   r   r     r7  c                 C  r3  r   r2  r:   greater_equalr6  r   r   r   r     r7  c                 C  r3  r   r2  r:   greaterr6  r   r   r   r     r7  c                 C  r3  r   rF  r6  r   r   r   r     r7  c                 C  r3  r   rH  r6  r   r   r   r     r7  c                 C  r3  r   rJ  r6  r   r   r   r     r7  c                 C  r3  r   rL  r6  r   r   r   r     r7  c                 C  r3  r   r2  r:   equalr6  r   r   r   r     r7  c                 C  r3  r   r2  r:   	not_equalr6  r   r   r   r     r7  c                 C  r3  r   rH  r6  r   r   r   r     r7  c                 C  r3  r   rL  r6  r   r   r   r     r7  c                 C  r3  r   rF  r6  r   r   r   r     r7  c                 C  r3  r   rJ  r6  r   r   r   r     r7  c                 C  r3  r   rN  r6  r   r   r   r     r7  c                 C  r3  r   rP  r6  r   r   r   r     r7  c                 C  r3  r   rH  r6  r   r   r   r     r7  c                 C  r3  r   rL  r6  r   r   r   r     r7  c                 C  r3  r   rF  r6  r   r   r   r     r7  c                 C  r3  r   rJ  r6  r   r   r   r     r7  c                 C  r3  r   rN  r6  r   r   r   r     r7  c                 C  r3  r   rP  r6  r   r   r   r     r7  c                 C  r3  r   )r2  r:   bitwise_andr6  r   r   r   r     r7  c                 C  r3  r   )r2  r:   bitwise_xorr6  r   r   r   r      r7  c                 C  r3  r   )r2  r:   
bitwise_orr6  r   r   r   r     r7  c                 C  s&   t |jt|j|j |j |jjS r   )r   r   r:   r=  r   rC   r6  r   r   r   rA    s   &zInterpreterBuilder.create_idivc                 C  sD   t |jj}t |jj}|j||_|j||_| ||tjS r   )rv   r   r   rA   r2  r:   rC  )r   r/  r0  	lhs_dtype	rhs_dtyper   r   r   create_ashr  s
   zInterpreterBuilder.create_ashrc                 C  s   |j j}|tjks|tjkrtt|j |j |jjS ttd|j	d d  }|j 
|}|j 
|}t|||j	d ? }t|
||jjS )Nr   r7   r   )r   r   r:   ru   rB   r   np_umulhi_u64rC   r   rX   rA   r9  )r   r/  r0  r   compute_dtypelhs_datarhs_dataret_datar   r   r   create_umulhi  s   z InterpreterBuilder.create_umulhic                 C  s   t ||j|j|j|jjS r   r.  )r   r/  r0  r  r1  r   r   r   
ternary_op     zInterpreterBuilder.ternary_opc                 C     |  |||tjS r   )r^  r:   clip)r   arglohipropagate_nansr   r   r   r   "      c                 C  r`  r   )r^  r:   where)r   condr/  r0  r   r   r   r   #  rf  c                 C  s   t |j|j |j |jjS r   r.  r   r   r   r   
create_fma%  r_  zInterpreterBuilder.create_fmac                 C  s   t ||j|jjS r   r.  )r   rb  r1  r   r   r   unary_op)  r   zInterpreterBuilder.unary_opc                 C  sZ   |j }|jd }ttd|j }|j|}d|> d }||@ t|}t||j jS )Nr
   r   )	r   r9   r   r:   r   r!  r   r   rC   )r   rb  rE   mask_bitwidthnp_uint_dtyper   r  r  r   r   r   create_fabs,  s   
zInterpreterBuilder.create_fabsc                 C     |  |tjS r   )rj  r:   cosr   rb  r   r   r   r   6      c                 C  rn  r   )rj  r:   exprp  r   r   r   r   7  rq  c                 C  rn  r   )rj  r:   exp2rp  r   r   r   r   8  rq  c                 C  rn  r   )rj  r:   absrp  r   r   r   r   9  rq  c                 C  rn  r   )rj  r:   floorrp  r   r   r   r   :  rq  c                 C  rn  r   )rj  r:   ceilrp  r   r   r   r   ;  rq  c                 C  rn  r   )rj  r:   logrp  r   r   r   r   <  rq  c                 C  rn  r   )rj  r:   log2rp  r   r   r   r   =  rq  c                 C  rn  r   rj  r:   sqrtrp  r   r   r   r   >  rq  c                 C  rn  r   ry  rp  r   r   r   r   ?  rq  c                 C  rn  r   )rj  r:   sinrp  r   r   r   r   @  rq  c                 C  s0   |j jtjkrt|j nt|j }t||jjS r   )r   r   r:   r|   np_erf_fp32np_erf_fp64r   rC   )r   rb  r  r   r   r   
create_erfB  s   "zInterpreterBuilder.create_erfc                 C  s   t dt|j |jjS Nr
   )r   r:   rz  r   r   rC   rp  r   r   r   create_rsqrtF  r   zInterpreterBuilder.create_rsqrtc                 C  s   t |j||jjS r   )r   r   r@   r   rC   )r   rb  r1   allow_reorderr   r   r   r   J  s    c                 C     t t|j||jjS r   )r   r:   	transposer   r   rC   )r   rb  permr   r   r   create_transL  r-  zInterpreterBuilder.create_transc                 C  s   |j }|j }|jjdkr|j s|jjdkr6|j r6t||jtjd tj}t||jtjd tj}t	tj
|||j jd|j  |jjS )Nr7   r8   )r   r   r9   is_floatingr   rx   r{   r!  r:   r   matmulrC   )r   r   r   dinput_precisionmax_num_imprecise_acca_datab_datar   r   r   
create_dotO  s   $zInterpreterBuilder.create_dotc                 C  s   t tj||tjdtjS r   )r   r:   r?   rt   rx   )r   ret_tystartstopr   r   r   create_make_rangeX  r   z$InterpreterBuilder.create_make_rangec                 C  sz   |d u rt tj|jtdtj}t|j|jt|j}tj	||d|fdd }|d  t
|j 8  < t |tjS )Nr8   r   )binsr=   )r   r:   r  r   r   rx   rz   rg  r   	histogramlogical_notsumrt   )r   r   r  r  r  r   r   r   create_histogram[  s   z#InterpreterBuilder.create_histogramc                 C  s   t tj|j|j|d|jjS )Nr
  )r   r:   take_along_axisr   r   rC   )r   r"  indicesr
  r   r   r   create_gathere  s   z InterpreterBuilder.create_gatherc                 C  s<   |  }|j}td|d }t|j||jtj  |jS )Nr
   r7   )	r    r9   maxr   r   rA   r:   rB   r   )r   r  offsetrE   element_bitwidthelement_bytewidthr   r   r   create_addptrj  s    z InterpreterBuilder.create_addptrc                 C  s   | |\}}| }	t|	}
|d u rd }n.|tjjkr(ttj|j	|
d|	}n|tjj
kr=ttj|j	td|
d|	}ntd| | ||||||S )Nr8   nanzunsupported padding option )rL   r    r   r   PADDING_OPTIONPAD_ZEROr   r:   r   r   PAD_NAN	full_likefloatr   r  )r   r  rD   padding_optionr  r  r  rG   rH   rE   r  r  r   r   r   create_tensor_pointer_loadq  s   z-InterpreterBuilder.create_tensor_pointer_loadc                 C  s    | |\}}| |||||S r   rL   r  )r   r  r%   rD   r  r  rG   rH   r   r   r   create_tensor_pointer_store     z.InterpreterBuilder.create_tensor_pointer_storec                 C  r  r   )r   r:   expand_dimsr   r   rC   )r   rb  r
  r   r   r   create_expand_dims  r-  z%InterpreterBuilder.create_expand_dimsc                 C  r  r   )r   r:   r;   r   r   rC   )r   rb  r1   r   r   r   create_broadcast  r-  z#InterpreterBuilder.create_broadcastc                 C  s   t t|j|jg|jjS r   )r   r:   concatenater   r   rC   r6  r   r   r   
create_cat  r_  zInterpreterBuilder.create_catc                 C  s    t tj|j|jgdd|jjS )NrS   r  )r   r:   stackr   r   rC   r6  r   r   r   create_join  s    zInterpreterBuilder.create_joinc                 C  s(   t |jd |jjt |jd |jjfS )N).r   ).r
   r.  )r   r  r   r   r   create_split  s   (zInterpreterBuilder.create_splitc                 C  s\   |j }t|jtjrttj||jd t	|jd|jj
S ttj||jt	|jd|jj
S r  )r1   rw   r   rx   r   r   r:   fullr   r   rC   )r   r  rb  r1   r   r   r   create_splat  s   &"zInterpreterBuilder.create_splatc                 C  s&   t tjd|jd t|jd|jjS )Nr
   r   r8   )r   r:   r  r   r   r   rC   rp  r   r   r   create_unsplat  s   &z!InterpreterBuilder.create_unsplatc                 C  sB   || j vrtd| | j | }tt|j|j|j||jjS )Nunsupported semantic )ir_sem_to_interpreter_semr   r   r  
atomic_casr   r   rC   )r   r  cmpr  semscoper   r   r   create_atomic_cas  s   

 z$InterpreterBuilder.create_atomic_casc                 C  sf   || j vrtd| || jvrtd| | j | }| j| }tt||j|j|j||jjS )Nzunsupported rmwOp r  )	ir_rmw_op_to_interpreter_rmw_opr   r  r   r  
atomic_rmwr   r   rC   )r   rmwOpr  r  r  r  r  r   r   r   create_atomic_rmw  s   



"z$InterpreterBuilder.create_atomic_rmwc                 C     t d)Nz4extern_elementwise not supported in interpreter modeNotImplementedError)r   libNamelibPathsymbolargListretTypeisPurer   r   r   create_extern_elementwise     z,InterpreterBuilder.create_extern_elementwisec                 C  r  )Nz,inline_asm not supported in interpreter moder  )r   	inlineAsmconstraintsvaluesr  r  packr   r   r   create_inline_asm  r  z$InterpreterBuilder.create_inline_asmc                 C  s   d| j d  d| j d  d| j d  d}|r|d| 7 }|r*tjdd	d
 id |D ]}t|d|j   q,|rCtjd d d S d S )N(r   z, r
   r   ) r   c                 S  s   d| dS )N0x02xr   r   r   r   r   r     r*  z1InterpreterBuilder.create_print.<locals>.<lambda>)	formatter)r   r:   set_printoptionsprintr   )r   prefixhexr  isSignedmsgr%   r   r   r   create_print  s   *zInterpreterBuilder.create_printc                 C  s   |sJ | d S r   r   )r   	conditionmessager   r   r   create_assert  s   z InterpreterBuilder.create_assertc                 C  s   |sJ dd S )NzAssume failedr   )r   r  r   r   r   create_assume  r   z InterpreterBuilder.create_assumec                 C  s   d S r   r   r   r   r   r   create_barrier  s   z!InterpreterBuilder.create_barrierc                 C  s    dd |D }t ||||||S )Nc                 S     g | ]}|  qS r   r"   .0r  r   r   r   
<listcomp>      z<InterpreterBuilder.create_make_block_ptr.<locals>.<listcomp>)r/   )r   r0   r1   r2   r3   r4   r5   new_offsetsr   r   r   create_make_block_ptr  s   z(InterpreterBuilder.create_make_block_ptrc                 C  sv   t |jt |krtddd |jD }t|j|j|j||j|j}t	t |D ]}|j|  j
|| j
7  _
q)|S )Nz len(ptr.offsets) != len(offsets)c                 S  r  r   r  r  r   r   r   r    r  z5InterpreterBuilder.create_advance.<locals>.<listcomp>)r>   r3   r   r/   r0   r1   r2   r4   r5   r=   r   )r   r  r3   r  r  r   r   r   r   create_advance  s   z!InterpreterBuilder.create_advancezeror0   r   r1   rN   r2   tensor_shaperO   r+  r   rQ   r^   c                 C  s   t |||||}|  |S r   )rM   rV   )r   r0   r1   r2   r  r+  rQ   descr   r   r   create_make_tensor_descriptor  s   z0InterpreterBuilder.create_make_tensor_descriptorr  rM   r  c                 C  s   t |tsJ ||\}}| }t|}|j}	|	tjjkr+t	t
j|j|d|}
n|	tjjkr@t	t
j|jtd|d|}
ntd|	 | j|||
||ddS )Nr8   r  zunsupported padding F)r  r  r  )rw   rM   rL   r    r   rQ   r   r  r  r   r:   r   r   r  r  r  r   r  )r   r  r  r  r  rG   r  rE   r  rQ   r  r   r   r   create_descriptor_load  s   z)InterpreterBuilder.create_descriptor_loadr%   c                 C  s    | |\}}| |||d d S r   r  )r   r  r%   r  rG   r  r   r   r   create_descriptor_store  r  z*InterpreterBuilder.create_descriptor_store	x_offsetsy_offsetc                 C  s   |j jj}t|}tj|jjd |jd g|d}d }d }	t	|jD ]\}
}t
|tj|g}| ||||	j||
d d f< q"t
||S )Nr   rS   r8   )r0   r   r   r   r:   zerosr   r1   r4   	enumerater   rx   rt   r  )r   r  r  r  r  r   np_dtyperesultr  r  r   x_offsetr  r   r   r   create_descriptor_gather  s   
  
z+InterpreterBuilder.create_descriptor_gatherc           	      C  sH   t |jD ]\}}t|j| |j}t|tj|g}| ||| qd S r   )r  r   r   r   rx   rt   r  )	r   r  r%   r  r  r   r  slicer  r   r   r   create_descriptor_scatter  s
   z,InterpreterBuilder.create_descriptor_scatterc                 C  sZ   t |}d|jv rttjdd|d|jS |tjkr&ttjdd|d|jS td| )Nrl   r
   rS   r8   Tzunsupported type )r   namer   r:   r  rC   r   	TypeError)r   r  np_typer   r   r   get_all_ones_value	  s   

z%InterpreterBuilder.get_all_ones_valueNr   r   )r  )r0   r   r1   rN   r2   rN   r  rO   r+  r   rQ   r^   )r  rM   r  rN   )r  rM   r%   r   r  rN   )r  rM   r  r   r  r   )r  rM   r%   r   r  r   r  r   )r'   r(   r)   r   MEM_SEMANTICACQUIREr  RELEASERELAXEDACQUIRE_RELEASEr  	ATOMIC_OPADDRMW_OPFADDMINUMINMAXUMAXANDORXORXCHGr  r6   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r  r  r  r  r&  create_si_to_fpcreate_ui_to_fpcreate_fp_to_sicreate_fp_to_uicreate_fp_extcreate_fp_trunccreate_int_castr   r,  r2  create_faddcreate_fmulcreate_fdivcreate_fremcreate_fsub
create_mulcreate_precise_divfcreate_sdivcreate_udivcreate_sremcreate_urem
create_add
create_sub
create_shlcreate_lshrcreate_minsicreate_minuicreate_minimumfcreate_minnumfcreate_maxsicreate_maxuicreate_maximumfcreate_maxnumfcreate_icmpSLEcreate_icmpSLTcreate_icmpSGEcreate_icmpSGTcreate_icmpULEcreate_icmpULTcreate_icmpUGEcreate_icmpUGTcreate_icmpEQcreate_icmpNEcreate_fcmpOLTcreate_fcmpOGTcreate_fcmpOLEcreate_fcmpOGEcreate_fcmpOEQcreate_fcmpONEcreate_fcmpULTcreate_fcmpUGTcreate_fcmpULEcreate_fcmpUGEcreate_fcmpUEQcreate_fcmpUNE
create_and
create_xor	create_orcreate_int_to_ptrcreate_ptr_to_intrA  rW  r]  r^  create_clampfcreate_selectri  rj  rm  
create_cos
create_expcreate_exp2create_iabscreate_floorcreate_ceil
create_logcreate_log2create_precise_sqrtcreate_sqrt
create_sinr~  r  create_reshaper  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   r   r     s^   	

	
	r   c                   s*   t | |d fdd
}t| || d S )N)memberc                   s$   | |i dd |  D d iS )Nc                 S  s   i | ]\}}|d kr||qS )r   r   r  kvr   r   r   
<dictcomp>  s
    z1_patch_attr.<locals>.<lambda>.<locals>.<dictcomp>r   items)rZ  argskwargssemanticr   r   r     s    z_patch_attr.<locals>.<lambda>)r   setattr)objr  rZ  r   
new_memberr   rc  r   _patch_attr  s   rh  c                 C  s2   t | D ]\}}tj|rt| ||| qd S r   )inspect
getmembersrx   core
is_builtinrh  )pkgr   r  rZ  r   r   r   _patch_builtin  s
   rn  c                   sJ   dd  dd }dd | _  fdd| _dd | _d	d | _t|| _d S )
Nc                 S  s   | j j}|jdkrt|S dS )Nr
   T)r   r   sizer   )r   r   r   r   r   	_get_bool$  s   z%_patch_lang_tensor.<locals>._get_boolc                 S  sj   t t| jj| jj}| j sJ t| jj	}|d |d |d< |d< t
j| j|}t
j||S )NrS   )r   r:   r  r   r   r   r  is_blocklistr1   rx   rk  r   r   )r   r   r4   res_tyr   r   r   _get_transpose*  s   z*_patch_lang_tensor.<locals>._get_transposec                 S     t | jjS r   )rl   r   r   r   r   r   r   r   2  r*  z$_patch_lang_tensor.<locals>.<lambda>c                   s    | S r   r   r   rp  r   r   r   3  s    c                 S  rv  r   )reprr   r   r   r   r   r   r   4  r*  c                 S  rv  r   )r^   r   r   r   r   r   r   r   5  r*  )	__index__r   __repr____str__propertyT)r   ru  r   rw  r   _patch_lang_tensor"  s   


r~  c                   @  s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )ReduceScanOpInterfacec                 C  s   || _ || _d S r   )r
  
combine_fn)r   r
  r  r   r   r   r6   ;  s   
zReduceScanOpInterface.__init__c                 C  s0   |d ur|t |krtd| d| d S d S )Nzaxis z out of bounds for shape )r>   r   )r   r1   r
  r   r   r   
check_axis?  s   z ReduceScanOpInterface.check_axisc                 C  s>   |D ]}t |tjjstdt| | |j| j qd S )Nzinput must be a tensor, got )	rw   rx   rk  r   r   r  r  r1   r
  )r   r   rb  r   r   r   check_tensorC  s
   z"ReduceScanOpInterface.check_tensorc                 C  s`   t |}t|dr|jr||}t|t|j}n
tj|g|d}|}tj	
t||j|S )Nr1   r8   )r   r   r1   rA   rx   r   rs  r:   r   rk  r   r   rC   )r   r  r   r  ret_typer   r   r   	to_tensorI  s   
zReduceScanOpInterface.to_tensorc                 C  sJ   t |ts| |fd S | | | |}t |ttfr"t|S |fS Nr   )rw   tupleapplyr  
apply_implrs  )r   r   r  r   r   r   r  S  s
   


zReduceScanOpInterface.applyN)r'   r(   r)   r6   r  r  r  r  r   r   r   r   r  9  s    
r  c                      sF   e Zd Z fddZdd Zdd Zddd	Zd
d Zdd Z  Z	S )	ReduceOpsc                      t  || || _d S r   )superr6   	keep_dims)r   r
  r  r  	__class__r   r   r6   ]     
zReduceOps.__init__c                 C  sN   g }|D ]}|d ur| | qd}| | |jj |j qt||fS r  )appendr  r   r   flattenr   r  )r   r   r
  r  r   r   r   r   unravela  s   zReduceOps.unravelc                   s  j } j \ }g }g } d jjj}|d| ||d d   } D ]}||jj |tj||jjjd q't	|d j
D ]}	t|	|d| |d d   t fddt|D }
| dkrt	t|D ]}|
| jj || < qvqEt fddt|D }jjg ||
R  }t|ts|fn|}t	t|D ]}t|| tjjr|| jj n|| || < qqEg }t|D ]6\}	}jr|d urt||}nt	t|D ]}t|d}qn|d u r| }|| |	 j q|S )Nr   r
   r8   c                 3  *    | ]\}} |  | jV  qd S r   r  r   r  iir  )r   input_indexr   r   r   	<genexpr>z     ( z+ReduceOps.generic_reduce.<locals>.<genexpr>c                 3  r  r   r  r  oio)r   output_indexr   r   r   r    r  )r
  r  r   r   r1   r  r:   r  r   r=   ro  unravel_indexr  r  r>   rT   r  fnrw   rx   rk  r   r  r  r  )r   r   original_axisr
  
input_dataoutput_datainput_shapeoutput_shaperb  r   input_tuplej	acc_tuplecombine_fn_retr  r   _r   )r   r  r  r   r   generic_reducek  sN   zReduceOps.generic_reduceNc                 C  s   t |tr	|d n|}d }d }|r!| ||jj| j| jd|j}|r3| ||jj| j| jdtj	}|d ur?|d ur?||fS |d urE|S |d urK|S t
d)Nr   r
  keepdimsz-val_reduce_op and idx_reduce_op are both None)rw   r  r  r   r   r
  r  r   rx   rt   r   )r   r   val_reduce_opidx_reduce_opr  idxr   r   r   min_max  s     zReduceOps.min_maxc                 C  s"   |  tj|jj| j| jd|jS )Nr  )r  r:   r  r   r   r
  r  r   r   r   r   r   r   r       "zReduceOps.sumc                 C  s   | j tjjkr| j|d tjtjdS | j tjjkr&| j|d tj	tj
dS | j tjjkr8| j|d tjd dS | j tjjkrJ| j|d tjd dS | j tjjkrX| |d S | |S )Nr   )r  r  )r  rx   standard_argmin_combine_tie_break_leftr  r:   minargmin_argmax_combine_tie_break_leftr  argmax_elementwise_maxnanmax_elementwise_minnanmin_sum_combiner  r  r  r   r   r   r    s   
zReduceOps.apply_implr   )
r'   r(   r)   r6   r  r  r  r  r  __classcell__r   r   r  r   r  [  s    

+r  c                      s<   e Zd Z fddZdd Zdd Zdd Zd	d
 Z  ZS )ScanOpsc                   r  r   )r  r6   reverse)r   r
  r  r  r  r   r   r6     r  zScanOps.__init__c                 C  "   | j tj|jj| jd|jdgS Nr  r8   )r  r:   cumsumr   r   r
  r   r  r   r   r   r    r  zScanOps.cumsumc                 C  r  r  )r  r:   cumprodr   r   r
  r   r  r   r   r   r    r  zScanOps.cumprodc                   s  g }g }d j jj}D ]}||j j |tj||j jjd qt|d jD ]}t	|| t
 fddt|D } j dkr_tt|D ]}|| j j ||  < qOq+t
 fddtt D t
fddt|D }	jjg |	|R  }
t|
t
s|
fn|
}	tt|D ]}t|	| tjjr|	| j j n|	| ||  < qq+g }t|D ]\}}||| j q|S )Nr   r8   c                 3  s*    | ]\}} |  | jV  qd S r   r  r  )indexr   r   r   r   r    r  z'ScanOps.generic_scan.<locals>.<genexpr>c                 3  s.    | ]}|j kr | d  n | V  qdS )r
   Nr  )r  r   )r  r   r   r   r    s   , c                 3  r  r   r  r  )r   
prev_indexr   r   r   r    r  )r   r   r1   r  r:   r  r   r=   ro  r  r  r  r
  r>   rT   r  r  rw   rx   rk  r   r  )r   r   r  r  r1   rb  r   r   r  r  r  r  r   )r  r   r  r   r   generic_scan  s8    zScanOps.generic_scanc              	   C  s   g }| j r|D ]}|| tj|jj| jd|j qn|}| j	t
jjkr.| |d }n| j	t
jjkr=| |d }n| |}| j rV|D ]}tj|jj| jd|j_qG|S )Nr  r   )r  r  r  r:   flipr   r   r
  r   r  rx   r  r  r  _prod_combiner  r  )r   r   	new_inputrb  r  r   r   r   r    s   &
zScanOps.apply_impl)	r'   r(   r)   r6   r  r  r  r  r  r   r   r  r   r    s    r  c                  C  s4   ddd} ddd}| t _|t _| t j_|t j_d S )NFc                 [     t |||| S r   )r  r  )r   r
  r  r  rb  r   r   r   _new_reduce  r#   z'_patch_reduce_scan.<locals>._new_reducec                 [  r  r   )r  r  )r   r
  r  r  rb  r   r   r   	_new_scan   r#   z%_patch_reduce_scan.<locals>._new_scan)F)rx   reduceassociative_scanrk  )r  r  r   r   r   _patch_reduce_scan  s   

r  c                 C  sx   dd }ddd}ddd}dd	 }|| _ || _|| _t| _|| j_t|d
d| _t|dd| _	t|dd| _
t  d S )Nc                 S  sB  | j dkr	| S | j dkr| S | j dkr| S | j dkr$| S | j dkr-| S | j dkr6| S | j dkr?| S | j dkrH| S | j d	krQ|	 S | j d
krZ|
 S | j dkrc| S | j dkrl| S | j dkru| S | j dkr~| S | j dkr| S | j dkr| S | j dkr| S td|  d)Nvoidrz   rp   ro   rr   rq   rt   rs   ru   rB   r`   rb   rd   fp16bf16fp32fp64zfail to convert z to ir type)r  get_void_tyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   
_new_to_ir  sF   
















z$_patch_lang_core.<locals>._new_to_irc                 [  s6   |d u rd}|d u rd| }}n| |}}t |||S )Nr
   r   )r=   )arg1arg2steprb  r  endr   r   r   
_new_range3  s   
z$_patch_lang_core.<locals>._new_range c                 S  s   | sJ |d S r   r   )rh  r  r   r   r   _new_static_assert<  r   z,_patch_lang_core.<locals>._new_static_assertc                 S  sn   t | tjs| S t |ttfs|gn|}dd |D }t|tdt| jkr.td| | j	
|| | S )Nc                 S  s"   g | ]}t |tjr|jn|qS r   )rw   rx   	constexprr%   r  r]  r   r   r   r  E  s   " z7_patch_lang_core.<locals>._set_attr.<locals>.<listcomp>r
   z$len(values) != len(input.shape) for )rw   rx   r   rs  r  r>   r  r1   r   r   r&   )r   r  r  r   r   r   	_set_attr?  s   z#_patch_lang_core.<locals>._set_attrztt.divisibilityr  ztt.contiguityztt.constancy)NN)r  )r=   static_rangestatic_assertr  static_printr   to_irr   multiple_ofmax_contiguousmax_constancyr  )langr  r  r  r  r   r   r   _patch_lang_core	  s   
(
	
r  c                 C  s   dd | j  D }t|dksJ d|D ] }t|t t|jt |tkr-t|jt t|j t	| qttj
jt d S )Nc                 S  s,   g | ]\}}t |r|ttjfv r|qS r   )ri  ismodulerx   rk  )r  r  r%   r   r   r   r  X  s   , z_patch_lang.<locals>.<listcomp>r
   z:triton.language must be visible from within jit'd function)__globals__r`  r>   rn  interpreter_builderr   rx   r   r~  r  rk  tensor_descriptor_base)r  langsr  r   r   r   _patch_langW  s   


r  c                 C  s"   t | drt| | S t| |S )N_fields)r   r  )rb  contentsr   r   r   _tuple_created  s   "r  c                 C  s  t | trkttjj| d }tj	}d|   krdk r#n ntj	}n7d|   kr-dk r3n ntj
}n'd|   kr=dk rCn ntj}nd|   krMdk rSn ntj}ntd|  ttj| g|d|}t||S t| d	rttjj| d }ttj|  gtjd|}t||S t | trt| tt| S t | trd
d | jD }| jd dksJ td|d< tt }|jt| jdd | jD |dd | jD | j dS | S )Ni   l        l        l         l            l            zUnsupported integer value r8   data_ptrc                 S     g | ]}t |qS r   _implicit_cvtr  sr   r   r   r    r  z!_implicit_cvt.<locals>.<listcomp>rS   r
   c                 S  r  r   r  r  r   r   r   r    r  c                 S  s   g | ]}t |qS r   )rx   r  )r  r   r   r   r   r    s    )r0   r1   r2   r4   r  )!rw   rl   rx   	str_to_tytritonruntimejitmangle_typer:   rt   rs   ru   rB   r   r   r   r   r   r  r  r  mapr  r	   r2   r  r   r   make_tensor_descriptorr0   r1   r4   rQ   )rb  tyr   r   r2   rd  r   r   r   r  n  sB   




r  c                 C  s   t | tjjjr| jS | S r   )rw   r  r  r  TensorWrapperr0   )tr   r   r   _unwrap_tensor  s   r  c                 C  s&   t |tjjjrtjj| |jS | S r   )rw   r  r  r  r  r   )r  original_tensorr   r   r   _rewrap_tensor  s   r  c                   @  s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
GridExecutorc                   sN   ddl m || _|| _|| _fdd|j D   fdd|D | _d S )Nr
   _normalize_tyc                   s   i | ]	\}}| |qS r   r   )r  r  r  r	  r   r   r^    s    z)GridExecutor.__init__.<locals>.<dictcomp>c                   s   g | ]}  |d kr|qS )r  )get)r  r  )r+   r   r   r    s    z)GridExecutor.__init__.<locals>.<listcomp>)r  r
  r  	arg_namesgridr+   r`  
constexprs)r   r  r  r  r   )r+   r
  r   r6     s   zGridExecutor.__init__c                   sN   i  fdd  fdd|D }i }|  D ]
\}} |||< q||fS )Nc                   s   t | trt| t | S t | tr!t | j| j| j| j| j	S t
| ds(| S t| }|  vr@| }| | < |   }|jddd}||| | |  t|| d}|S )Nr  r   cpu)device)r  )rw   r  r  r   r	   r0   r1   r2   r4   rQ   r   r  untyped_storager  r  	new_emptyset_storage_offsetro  rU   r  )rb  unwrapped_argstoragecpu_arg_to_cpustoragesr   r   r    s*   


z,GridExecutor._init_args_hst.<locals>._to_cpuc                   s   g | ]} |qS r   r   )r  rb  )r  r   r   r    r  z/GridExecutor._init_args_hst.<locals>.<listcomp>r_  )r   args_devrb  args_hst
kwargs_hstr$   r%   r   r  r   _init_args_hst  s   zGridExecutor._init_args_hstc           
        st   i  fdd t ||D ]	\}} || q| D ]\}}|| }	 ||	 q D ]	\}}|| q.d S )Nc                   s   t | drt| t|} }|  | f|   < d S t| tr4t| |D ]	\} } | | q(d S t| trB | j|j d S d S )Nr  )	r   r  r  r  rw   r  zipr	   r0   )arg_devarg_hst	_from_cpur  r   r   r#    s   
 

z1GridExecutor._restore_args_dev.<locals>._from_cpu)r  r`  r  copy_)
r   r  r  rb  r  r   r!  r$   	kwarg_dev	kwarg_hstr   r"  r   _restore_args_dev  s   zGridExecutor._restore_args_devc              
     sh  | ddrd S tj  fdd| D }||\}}tj tjjg|R i |}fdd| D }tj	rH	|nj	}t
|dksUJ d|ddt
|   }tj|  z,t|d	 D ]#}t|d
 D ]}t|d D ]}	t|||	 jdi | q{qsqkW n ty }
 ztjjjr tt|
|
d }
~
ww |||| d S )NwarmupFc                   s    i | ]\}}| j v r||qS r   )ra  r[  )argspecr   r   r^    s     z)GridExecutor.__call__.<locals>.<dictcomp>c                   s(   i | ]\}}|| j v r|nt|qS r   )r  r  )r  r  rb  r   r   r   r^    s   (    z#grid must have at most 3 dimensionsr  r   r
   r   r   )popri  getfullargspecr  r`  r  r  getcallargscallabler  r>   r  r   r=   r   	Exceptionr  knobscompilationfront_end_debuggingr   rx  r'  )r   r  rb  r  r  ra  r  r   r   r   er   )r)  r   r   __call__  s8   


zGridExecutor.__call__N)r'   r(   r)   r6   r  r'  r4  r   r   r   r   r    s
    	$r  c                   @  s   e Zd Zdd ZdS )ASTTransformerc                 C  sv   g }|j D ]
}|| |g7 }qt|dkrtdtjtjtjdt ddt d|j	tj
ddgg d	|_	|S )
Nr
   z&Multiple assignments are not supportedinterpreter_semantic)idctxr  )r%   r   r8  F)r%   )funcra  keywords)targetsvisitr>   r   astCall	AttributeNameLoadr%   Constant)r   nodenamestargetr   r   r   visit_Assign  s   
zASTTransformer.visit_AssignN)r'   r(   r)   rF  r   r   r   r   r5    s    r5  c                   @  sJ   e Zd Ze Zdd Zdd Zdd Zdd Zd	d
 Z	dd Z
dd ZdS )FunctionRewriterc                 K  s   || _ || _d| _d| _d S )Nr  r   )r  rb  filenamedef_file_lineno)r   r  rb  r   r   r   r6     s   
zFunctionRewriter.__init__c                 C  sh   z
t | j\}}W n ty   | j Y S w |  \| _| _| || _| 	|}| 
|}| |S r   )ri  getsourcelinesr  r/  _get_jit_fn_file_linerH  rI  	_find_def
def_lineno_prepare_source_transform_ast_compile_and_exec)r   linesr  r"  transformed_astr   r   r   rewrite_ast%  s   
	


zFunctionRewriter.rewrite_astc                 C  s   ddl m}m} ||| jS )Nr
   )get_jit_fn_file_lineJITFunction)r  rT  rU  r  )r   rT  rU  r   r   r   rK  9  s   z&FunctionRewriter._get_jit_fn_file_linec                 C  s0   d}t |D ]\}}| dr|d }q|S )Nr   zdef r
   )r  strip
startswith)r   rQ  rM  r   liner   r   r   rL  =  s   zFunctionRewriter._find_defc                 C  s&   || j d d  }d|}t|S )Nr
   r  )rM  jointextwrapdedent)r   rQ  r"  r   r   r   rN  E  s   

z FunctionRewriter._prepare_sourcec                 C  s:   t |}| j|}t | | jd }t || |S r  )r=  parseast_transformerr<  fix_missing_locationsrI  increment_lineno)r   r"  
parsed_astrR  
inc_linenor   r   r   rO  J  s   


zFunctionRewriter._transform_astc                 C  s^   t || jdd}i | j}| jj}t  D ]\}}||vr"|||< qt||| || jj S )Nexec)rH  mode)	compilerH  rb  r  r  globalsr`  rb  r'   )r   rR  compiled_codelocal_namespace
fn_globalsr$   r%   r   r   r   rP  U  s   
z"FunctionRewriter._compile_and_execN)r'   r(   r)   r5  r]  r6   rS  rK  rL  rN  rO  rP  r   r   r   r   rG    s    rG  c                   @  sH   e Zd ZU i Zded< dddZdd Zed	d
 Z dd Zdd Z	dS )InterpretedFunctionzDict[Callable, Callable]rewritten_fnr   r   c                   sT   | _ t|fi | _| _ fdd}| _t|}dd |j D  _	d S )Nc                    s(   |d }   }t| j|| i |S )Nr  rewriter  r  )ra  rb  r  r  r   r   r   runi  s   z)InterpretedFunction.__init__.<locals>.runc                 S  s   g | ]}|j qS r   r  r  r   r   r   r  p  rf  z0InterpretedFunction.__init__.<locals>.<listcomp>)
r  rG  rewriterrb  rm  ri  	signature
parametersr  r  )r   r  rb  rm  ro  r   r   r   r6   d  s   
zInterpretedFunction.__init__c                 C  s*   | j | jvr| j | j| j < | j| j  S r   )r  rj  rn  rS  r   r   r   r   rl  r  s   zInterpretedFunction.rewritec                 C  s   | j jS r   )r  r'   r   r   r   r   r'   w  s   zInterpretedFunction.__name__c                 C  s   |   }t|| j|S r   rk  )r   r  r  r   r   r   __getitem__{  s   zInterpretedFunction.__getitem__c              
   O  sJ   t | j |  }z||i |W S  ty$ } ztt||d }~ww r   )r  r  rl  r/  r   rx  )r   ra  rb  r  r3  r   r   r   r4    s   
zInterpretedFunction.__call__Nr  )
r'   r(   r)   rj  r+   r6   rl  r|  rq  r4  r   r   r   r   ri  `  s   
 

ri  )F
__future__r   r=  rZ  ri  typingr   r   r   r   r   numpyr:   r  triton.languagelanguagerx   r,   r   triton.language.semanticr   triton.tools.tensor_descriptorr	   errorsr   	functoolsr   _C.libtritonr   r  r   r   r   r/   rM   rZ   rv   r   r   r   r   	vectorizer|   r|  r}   r}  rB   rX  r   r   rh  rn  r~  r  r  r  r  r  r  r  r  r  r6  r  r  r  NodeTransformerr5  rG  ri  r   r   r   r   <module>   sp    ( @    	"`>N
"kE