o
    ^ic>                     @  sV  d dl mZ ddlmZmZ ddlmZ ddlmZ edd Zed	d
 Z	ej
edd Zej
eeddd Zej
eeddsddZej
edtddZedd Zedd Zedd Zedd Zedd Zedd  Zed!d" Zej
eejd#d$d%d&dud(d)Zej
eejd*d+d,dvd-d.Zed/d0 Zed1d2 Zed3d4 Zed5d6 Zej
eejd7d$d%d&dud8d9Zej
eejd:d+d,dvd;d<Zed=d> Z ed?d@ Z!ej
eejdAdBdCdwdxdEdAZ"edFdG Z#ej
eedHdydIdJZ$edKdL Z%ej
eedMdtdNdMZ&ej
eej'dOdBdCdzdxdPdOZ(edQdR Z)ej
ee'dSd{dTdSZ*ed|dWdXZ+ed}dZd[Z,ed~d^d_Z-edd`daZ.eddej/fddedfZ0edej/fddgdhZ1edddidjZ2edej/fddkdlZ3edmdn Z4ej
eddodpZ5edqdr Z6dS )    )annotations   )jitconstexpr_function   )core)mathc                 C  s,   d}| }|dkr|dL }|d7 }|dks|S )Nr   r    )ilog2nr	   r	   S/var/www/html/RAG/RAG_venv/lib/python3.10/site-packages/triton/language/standard.py_log2
   s   r   c                 C  s   | | d @ dko| dkS Nr   r   r	   )r
   r	   r	   r   _is_power_of_two   s   r   c                 C  s   | | d | S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :type div: Block
    r   r	   )xdivr	   r	   r   cdiv   s   r   sigmoidc                 C  s   ddt |    S )Nr   )r   exp)r   r	   r	   r   r   ,   s   softmaxNFc                 C  sJ   |d u rd}n|}| t | ||d }t|}t|||d}t|||S )Nr   	keep_dims)maxr   r   sumfdiv)r   dimr   ieee_rounding_dimznumdenr	   r	   r   r   3   s   
c                 C  s   t j| | jg|dS )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    )can_reorder)r   reshapenumel)r   r"   r	   r	   r   ravelA   s   	r%   c                 C  sX   | | | }|| }|| }|| }t || |}|| }|||  }	|| }
|	|
fS )a  
    Transforms the indices of a row-major `size_i * size_j` matrix into
    the indices of a column-major matrix for each group of `size_g` rows.

    For example, for :code:`size_i = size_j = 4` and :code:`size_g = 2`, it will
    transform ::

        [[0 , 1 , 2 , 3 ],
         [4 , 5 , 6 , 7 ],
         [8 , 9 , 10, 11],
         [12, 13, 14, 15]]

    into ::

        [[0, 2,  4 , 6 ],
         [1, 3,  5 , 7 ],
         [8, 10, 12, 14],
         [9, 11, 13, 15]]
    r   minimum)r
   jsize_isize_jsize_gijsize_gjgroup_idoff_inew_inew_jr	   r	   r   	swizzle2dM   s   r2   c                 C  s   t | d|S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtyper	   r	   r   zerosu   s   
r6   c                 C  s   t | j| jS )z
    Returns a tensor of zeros with the same shape and type as a given tensor.

    :param input: input tensor
    :type input: Tensor
    )r6   r4   r5   )inputr	   r	   r   
zeros_like   s   r8   c           	      C  sJ   |r| |ko	||k }nd}| |kp|}t || |}t |||}||fS NFr   where)	value1index1value2index2tie_break_lefttiegtv_reti_retr	   r	   r   _argmax_combine      rE   c                 C     t | |||dS NTrE   r<   r=   r>   r?   r	   r	   r   _argmax_combine_tie_break_left      rK   c                 C  rG   r9   rI   rJ   r	   r	   r   _argmax_combine_tie_break_fast   rL   rM   c                 C     t | |S N)r   maximumabr	   r	   r   _elementwise_max      rT   rP   return_indicesreturn_indices_tie_break_left)return_indices_argtie_break_argTc                 C  s   t | } |r|rt j| |t|dS t j| |t|dS t | jjt dk rEt | j r6| 	t j
} n| j s?J d| 	t j} t j| |t|dS Nr       z"Expecting input to be integer type)r   _promote_bfloat16_to_float32_reduce_with_indicesrK   rM   	constexprr5   primitive_bitwidthis_floatingtofloat32is_intint32reducerT   r7   axisrV   rW   r   r	   r	   r   r      s   
r   zmaximum indexr@   )rY   c                 C     t | |d||d\}}|S NT)rV   rW   r   )r   r7   rg   r@   r   _retr	   r	   r   argmax      rm   c           	      C  sJ   |r| |ko	||k }nd}| |k p|}t || |}t |||}||fS r9   r:   )	r<   r=   r>   r?   r@   rA   lt	value_ret	index_retr	   r	   r   _argmin_combine   rF   rr   c                 C  rG   rH   rr   rJ   r	   r	   r   _argmin_combine_tie_break_left   rL   rt   c                 C  rG   r9   rs   rJ   r	   r	   r   _argmin_combine_tie_break_fast   rL   ru   c                 C  rN   rO   r&   rQ   r	   r	   r   _elementwise_min   rU   rv   r'   c                 C  s   t | } |r|rt j| |t|dS t j| |t|dS t | jjdk rBt | j r3| 	t j
} n| j s<J d| 	t j} t j| |t|dS rZ   )r   r\   r]   rt   ru   r^   r5   r_   r`   ra   rb   rc   rd   re   rv   rf   r	   r	   r   min   s   
rw   zminimum indexc                 C  rh   ri   )rw   rj   r	   r	   r   argmin   rn   rx   c                 C  s   | | S rO   r	   rQ   r	   r	   r   _sum_combine     ry   c                 C  sT   |d ur|S d }|   r| jdk rtj}|S d }|S |  r(| jdk r&tjnd }|S )Nr[   )is_int_signedint_bitwidthr   rd   is_int_unsigneduint32)in_dtyper5   	out_dtyper	   r	   r   _pick_sum_dtype  s   r   r   r5   )	dtype_argcore.constexprc                 C  s0   t | j|}|d ur| |} tj| |t|dS )Nr   )r   r5   ra   r   re   ry   )r7   rg   r   r5   r   r	   r	   r   r     s   
c                 C  s   | |A S rO   r	   rQ   r	   r	   r   _xor_combine&  rz   r   zxor sumc                 C  &   t | jj d t j| |t|dS )Nz#xor_sum only supported for integersr   )r   static_asserttypescalarrc   re   r   r7   rg   r   r	   r	   r   xor_sum.     r   c                 C  s   | |B S rO   r	   )r   yr	   r	   r   _or_combine9  rz   r   	reduce_orc                 C  r   )Nz%reduce_or only supported for integersr   )r   r   r   r   rc   re   r   r   r	   r	   r   r   >  r   cumsumc                 C  s8   t | } t| j|}|d ur| |} t | |t|S rO   )r   r\   r   r5   ra   associative_scanry   )r7   rg   reverser5   r   r	   r	   r   r   I  s
   

c                 C  s   | | S rO   r	   rQ   r	   r	   r   _prod_combine[  rz   r   cumprodc                 C  s   t | } t | |t|S rO   )r   r\   r   r   )r7   rg   r   r	   r	   r   r   `  s   
n_dimsr(   c                 C  s:   t dd}t |dg| | d  dg dg|  }|S )Nr   r   r   )r   aranger#   )r   r(   arr	   r	   r   
_indicatorl  s   *r   r
   c           
      C  sz   t | j}tj| jjdd}| j|dd}|t||d | dA }|j| jdd}t||}t	| |k||A k|| }	|	S )NTbitwidthsignedbitcastr   )
r   r$   r   get_int_dtyper5   r_   ra   r   r   r;   )
r   flipr
   r   idtypeixiyr   is_rightrl   r	   r	   r   _compare_and_swaps  s   

r   stageorderc                 C  sF   |dkrt t| j|}n|}t|D ]}t| ||d | } q| S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   )r   r   r$   r   static_ranger   )r   r   r   r   r
   r	   r	   r   _bitonic_merge_hypercube  s   r   c                 C  s6   t | dgt| j }t|||}t || j} | S )Nr   )r   r#   r   r$   r   r4   )r   r   r   r   hr	   r	   r   _bitonic_merge  s   r   kr   
descendingc           
      C  s.  |du rt | jd n|}t|t | jd kd t| j| }|du r'|nt|}t| j}t| dg| }td|d D ]}	t||	|	|k rLdn|}qAt|d |d D ]*}	|rkt	|t|jd | dnt
|t|jd | d}t|||	|k rdn|}qZt|| jdd d| g } | S )ai  
    Sorts a tensor along a specified dimension.

    :param x: The input tensor to be sorted.
    :type x: Tensor
    :param dim: The dimension along which to sort the tensor. If None, the tensor is sorted along the last dimension. Currently, only sorting along the last dimension is supported.
    :type dim: int, optional
    :param k: the number of top elements to select. If none, assume k = x.shape[dim]
    :type k: int, optional
    :param descending: If set to True, the tensor is sorted in descending order. If set to False, the tensor is sorted in ascending order.
    :type descending: bool, optional
    Nr   +only minor dimension is currently supportedr   )rg   )lenr4   r   r   r   r$   r#   r   r   r   rw   )
r   r   r   r   r   log_nlog_kr   r   r
   r	   r	   r   	sort_impl  s   
8 r   c                 C  s   t | ||dS )N)r   r   r   )r   r   r   r	   r	   r   sort  s   r   c                 C  s   t | ||ddS )NT)r   r   r   r   )r   r   r   r	   r	   r   topk  rL   r   c                 C  sP   |d u rt | jd n|}t|t | jd kd t| jd }t| |||S )Nr   r   r   )r   r4   r   r   r   r   )r   r   r   r   r   r	   r	   r   bitonic_merge  s   r   c                 C  s,   | d u r
t |d } | dk r| t |7 } | S r   )r   )r   r4   r	   r	   r   _get_flip_dim  s
   r   c                 C  s   t t| j |ko|t| jk  t|| j}t t| j|  t| j| }t j| jj	dd}t 
| j|dd| jd| dg|  | j|d d  }t |D ]}|t||| dA }qUt 
|| jj| jdd} | S )z
    Flips a tensor `x` along the dimension `dim`.

    :param x: the first input tensor
    :type x: Block
    :param dim: the dimension to flip along
    :type dim: int
    Tr   r   Nr   r   )r   r   r   r4   r   r   r   r   r5   r_   r#   ra   r   r   )r   r   r   stepsr   r   r
   r	   r	   r   r     s   $<r   c                 C  sD   t | |}t|jdkr|S t ||jdd d|jd  g S )a7  
    Interleaves the values of two tensors along their last dimension. The two tensors must have the same shape.
    Equivalent to `tl.join(a, b).reshape(a.shape[:-1] + [2 * a.shape[-1]])`

    :param a: The first input tensor.
    :type a: Tensor
    :param b: The second input tensor.
    :type b: Tensor
    r   Nr   )r   joinr   r4   r#   )rR   rS   cr	   r	   r   
interleave  s   &r   )NFF)F)NFTF)TF)NFN)r5   r   r9   )r   FN)r   F)r   r   r(   r   )r
   r   )r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   rO   )r   r   r   r   )7
__future__r   runtime.jitr   r    r   r   r   r   _tensor_member_fnr   _add_math_1arg_docstrr   r   r%   r2   r6   r8   rE   rK   rM   rT   _add_reduction_docstrr   rm   rr   rt   ru   rv   rw   rx   ry   r   r   r   r   r   r   _add_scan_docstrr   r   r   r   r   r   r   CONSTEXPR_0r   r   r   r   r   r   r   r	   r	   r	   r   <module>   s    
	
	

'











	


	(
