o
    ^i                      @   sP  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlmZ d dlmZmZmZ g dZg dZee Zg dZed	g Zee Zed	g Zd
dgZdge dg e d	g Zeeeh d Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd Z(dd  Z)d!d" Z*d#d$ Z+d%d& Z,d'd( Z-d)d* Z.d+d, Z/d-d. Z0d/d0 Z1dNd1ee fd2d3Z2dOd4ej3d5eeej4f fd6d7Z5d4e6d5ej7fd8d9Z8d5e6fd:d;Z9d<d= Z:dPd?d@Z;dPdAdBZ<ej=j>e;  e< dCZ?dDe@dEe@fdFdGZAdHeej4ejBjCjf d5ej4fdIdJZDdOdKeee6  fdLdMZEdS )Q    Nknobs)OptionalSetUnion)RandomState)TensorWrapperreinterprettype_canonicalisation_dict)int8int16int32int64)uint8uint16uint32uint64)float16float32float64bfloat16float8_e4m3fnfloat8_e5m2boolr   >   r   r   r   c                   C   s   t jdddkS )NTRITON_INTERPRET01)osenvironget r    r    S/var/www/html/RAG/RAG_venv/lib/python3.10/site-packages/triton/_internal_testing.pyis_interpreter      r"   c                   C   s   t  rd S tjjj S N)r"   tritonruntimedriveractiveget_current_targetr    r    r    r!   r)      s   r)   c                  C      t  } | d u r	dS | jdkS )NFcudar)   backendtargetr    r    r!   is_cuda$      r0   c                   C      t  otj d dkS )Nr      r0   torchr+   get_device_capabilityr    r    r    r!   is_ampere_or_newer)      r7   c                   C      t  otj d dkS )Nr   
   r4   r    r    r    r!   is_blackwell-   r8   r;   c                   C   r2   Nr   	   r4   r    r    r    r!   is_hopper_or_newer1   r8   r>   c                   C   r9   r<   r4   r    r    r    r!   	is_hopper5   r8   r?   c                  C   r*   )NFhipr,   r.   r    r    r!   is_hip9   r1   rA   c                  C   "   t  } | d uo| jdko| jdkS )Nr@   gfx90ar)   r-   archr.   r    r    r!   is_hip_cdna2>      rF   c                  C   rB   )Nr@   gfx942rD   r.   r    r    r!   is_hip_cdna3C   rG   rI   c                  C   rB   )Nr@   gfx950rD   r.   r    r    r!   is_hip_cdna4H   rG   rK   c                  C   "   t  } | d uo| jdkod| jv S )Nr@   gfx11rD   r.   r    r    r!   is_hip_gfx11M   rG   rN   c                  C   rL   )Nr@   gfx12rD   r.   r    r    r!   is_hip_gfx12R   rG   rP   c                   C   s   t  pt pt S r$   )rF   rI   rK   r    r    r    r!   is_hip_cdnaW   r#   rQ   c                   C   s   t  rdS dS )Ni  i   )rK   r    r    r    r!   get_hip_lds_size[   s   rR   c                  C   r*   )NFxpur,   r.   r    r    r!   is_xpu_   r1   rT   c                  C   s   t  } | d u r	dS t| jS )N )r)   strrE   r.   r    r    r!   get_archd   r1   rW   rsc                 C   s8  t | tr| f} |du rtdd}|tt v rOttt|}|du r&|jnt	||j}|du r3|j	nt||j	}tt|}|j
||| |d}d||dk< |S |rad|v ra|j
dd	| tjd}|S |tv ro|dd| |S |d
kr|dd| ddtd@ dS |dv r|dd| dkS td| )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
    N   )seed)dtype   r   float8   (   r   r   r   l      )r   int1bool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr   float_dtypesnormalastypeviewr   RuntimeError)shape	dtype_strrX   lowhighrg   r[   xr    r    r!   numpy_randomi   s,   


*rv   ru   returnc                 C   s   | j j}|tv r"|d}| tt|}ttj	||dtt
|S |r5d|v r5ttj	| |dtt
|S |dkrF|dkrFtj	| |d S tj	| |dS )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicer]   r   r   )r[   namere   lstriprn   rh   rf   r	   r5   tensortlr   )ru   ry   dst_typetsigned_type_namex_signedr    r    r!   	to_triton   s   
r   c                 C   s   t t|  d S r$   )r}   	str_to_tyr
   ru   r    r    r!   str_to_triton_dtype   s   r   c                 C   sL   t | tjjr
| jS t | tjrtdt| }|	dS t
dt|  )Nz^torch\.(\w+)$r\   znot a triton or torch dtype: )rb   r%   languager[   rz   r5   rematchrV   group	TypeErrortype)r[   mr    r    r!   torch_dtype_name   s   
r   c                 C   sl   t | tr| j  ttt| j	S t | t
jr/| j	t
ju r)|    S |   S td|  )Nz Not a triton-compatible tensor: )rb   r   basecpunumpyrn   rh   rf   r   r[   r5   Tensorr   float
ValueErrorr   r    r    r!   to_numpy   s   
 r   Fc                 C   sl   t  rdS t s
dS tjjj}| rdnd}ttt|	d}t
|dks)J |tj d dko5||kS )	NTF)   r   )r      .   r   r=   )r"   r0   r   nvidiaptxasversiontuplemaprc   splitlenr5   r+   r6   )
byval_onlycuda_versionmin_cuda_versioncuda_version_tupler    r    r!   supports_tma   s   
r   c                 C   s   | rdS dS )NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r    )r   r    r    r!   tma_skip_msg   s   r   )reasonsizealignc                 C   s   t j| t jddS )Nr+   )r[   ry   )r5   emptyr   )r   r   _r    r    r!   default_alloc_fn   r#   r   r   c                 C   s   t | tjjjr| jS | S r$   )rb   r%   r&   jitr   r   )r   r    r    r!   unwrap_tensor   s   r   skipped_attrc                    st   ddl m d u rt t fddj D g  j fdd} fdd}||fS )	Nr   r   c                    s4   i | ]\}}t | jr| jkr|vr||qS r    )rb   
base_knobs).0rz   knobset)r   r   r    r!   
<dictcomp>   s    
z%_fresh_knobs_impl.<locals>.<dictcomp>c                     sj     D ]+\} }t| |   |j D ]}|jtjv r(j	|jdd q 
|j qqd_S )NF)raisingT)itemssetattrcopyresetknob_descriptorsvalueskeyr   r   delenvappendpropagate_env)rz   r   knob)env_to_unsetr   	knobs_mapmonkeypatchr    r!   fresh_function   s   z)_fresh_knobs_impl.<locals>.fresh_functionc                     sL     D ]
\} }t| | q   D ]}|tjv r tj|= q_d S r$   )r   r   undor   r   r   )rz   r   k)r   r   r   r   prev_propagate_envr    r!   reset_function   s   

z)_fresh_knobs_impl.<locals>.reset_function)r%   r   setpytestMonkeyPatch__dict__r   r   )r   r   r   r    )r   r   r   r   r   r   r!   _fresh_knobs_impl   s   	r   )NNNr$   )F)Fr   r   r   rf   r5   r%   triton.languager   r}   r   typingr   r   r   r   numpy.randomr   triton.runtime.jitr   r	   r
   rd   re   integral_dtypesrl   float_dtypes_with_bfloat16dtypesdtypes_with_bfloat16torch_float8_dtypestorch_dtypessortedr   
tma_dtypesr"   r)   r0   r7   r;   r>   r?   rA   rF   rI   rK   rN   rP   rQ   rR   rT   rW   rv   ndarrayr   r   rV   r[   r   r   r   r   r   markskipifrequires_tmarc   r   r&   r   r   r   r    r    r    r!   <module>   sb    

 

$