o
    ^i;                     @   s,  d dl Z d dlmZmZmZmZ d dlZd dlmZ	 d dlm
Z d dlmZ d dlmZ d dlmZ d dlm
Z d dlmZ d d	lmZ d d
lmZmZ d dlmZ ddlmZ ddlm Z m!Z! ddl"m#Z# dZ$G dd dZ%G dd dZ&dee'e#j(f de#j(fddZ)de'fddZ*G dd deZ+dS )    N)DictOptionalUnionAny)ir)proton)amd)nvidia)passes)LazyDict)JITFunction)set_profile_allocatorNullAllocator)backends   )Hook   )set_instrumentation_onset_instrumentation_off)modec                   @   s.   e Zd Zdd Zdededee fddZdS )	CudaAllocatorc                 C   
   || _ d S N)instrumentation_hook)selfr    r   `/var/www/html/RAG/RAG_venv/lib/python3.10/site-packages/triton/profiler/hooks/instrumentation.py__init__      
zCudaAllocator.__init__size	alignmentstreamc                 C   sn   || j jkrtd| d| j j || d | | }t|| j j}dd l}|j|f|jdd}|| j _|S )NzAlignment mismatch: z != r   r   cudadtypedevice)	r   profile_buffer_alignmentRuntimeErrormaxprofile_buffer_sizetorchemptyuint8buffer)r   r   r    r!   aligned_sizer*   r-   r   r   r   __call__   s   zCudaAllocator.__call__N)__name__
__module____qualname__r   intr   r/   r   r   r   r   r      s    r   c                   @   sF   e Zd Zdeeef fddZdefddZdefddZd	d
 Z	dS )Instrumentationir_mapc                 C   r   r   )manager)r   r5   r   r   r   r   1   r   zInstrumentation.__init__r   c                 C   s&   || j v rtd| || j |< d S )NzIR already registered: )r6   r'   )r   r   funcr   r   r   register4   s   
zInstrumentation.registerc                 C   s*   |  | || jv r| j| | d S d S r   )load_dialectsr6   )r   r   pmcontextr   r   r   patch9   s   

zInstrumentation.patchc                 C   s   t | d S r   )triton_protonr9   )r   ctxr   r   r   r9   >   s   zInstrumentation.load_dialectsN)
r0   r1   r2   r   strr   r   r8   r<   r9   r   r   r   r   r4   /   s
    r4   mode_objreturnc           
         s  t | tjr| S | sd} | d}|d }i }|dd  D ]}d|v r0|dd\}}|||< qtd| d|dd	|d
d|ddt|dd|dd|dd|dd|ddd  fdd}|dtj d< |d
tj d
< |dtj	 d< |dtj
 d< |dtj d< t d dkrdd  d dD ng }|D ]}	|	tjvrtd|	 qdd |D  d< |dkrtjd!i  S |dkrtjd!i  S td |  )"Ndefault:r   r   =z#Malformed instrumentation option: ''metric_typecyclebuffer_typesharedbuffer_strategycircularbuffer_size0granularitywarpsampling_strategynonesampling_options optimizations)rF   rH   rJ   rL   rN   rP   rR   rT   c                    s8    |  }|r||vrt d|  d| |r|| S |S )NzUnknown z: )
ValueError)opt_namemappingvalueoptionsr   r   get_option_value[   s   z)_interpret_mode.<locals>.get_option_valuec                 S   s   g | ]}|  qS r   )strip.0rX   r   r   r   
<listcomp>h   s    z#_interpret_mode.<locals>.<listcomp>,zUnknown optimization: c                 S   s   g | ]}t j| qS r   )r   rT   r]   r   r   r   r_   m   s    mmazUnknown mode: r   )
isinstancer   InstrumentationModesplitrU   getr3   metric_typesbuffer_typesbuffer_strategiesgranularitiessampling_strategieslenrT   DefaultMMA)
r@   parts	mode_nameoptsoptkeyvalr[   valuesrX   r   rY   r   _interpret_modeB   sN   


ru   c                  C   s6   t jjj j} | dkrdS | dkrdS td|  )Nr"   r	   hipr   zUnsupported backend: )tritonruntimedriveractiveget_current_targetbackendr'   )r|   r   r   r   _get_backend_namex   s   r}   c                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZ	e
e ed< dZeed	< d
Zeed< dedeejf fddZdd Zdd Zdedededeeef deddfddZdefddZdeddfddZdeddfd d!Zdeddfd"d#ZdS )$InstrumentationHookr   priorityactive_countFenable_host_bufferNhost_bufferr   r)      r&   r@   c                 C   s$   t || _t| | _d | _i | _d S r   )ru   r   r   	allocatorr-   metadata_path)r   r@   r   r   r   r      s   


zInstrumentationHook.__init__c                    s   t jdkr	tdt  jd7  _t  tjjj tjjjj	
d t   fdd fddtfd	d
fdd
dt  j_tj tjjtfdd}|t_d S )Nr   zFOnly one instance of the instrumentation hook can be active at a time.r   max_shared_memc                    s   t jjj jv r
dnd}t| j jj jj jj j	j j
j jj jjj| tj|  t jjj jv r?t|  t|  t jjj jv rW dkrYt|  d S d S d S )NFTr   )r   OptimizeCLOCK32rT   r=   add_convert_proton_to_protongpurF   rP   rR   rN   rJ   rH   rL   r)   r&   triton_passescommonadd_cseSCHED_STORESadd_schedule_buffer_store!add_allocate_proton_shared_memorySCHED_BARRIERSadd_sched_barriers)r:   is_long_clk)backend_namer   r   r   r   to_llvmir_passes   s   

z6InstrumentationHook.activate.<locals>.to_llvmir_passesc                    s\   t |   dkrt |  d S  dkr,tjjjjd 	dd }t 
| | d S d S )Nr	   r   archrC   r   )r=   )add_allocate_proton_global_scratch_buffer%add_convert_proton_nvidia_gpu_to_llvmrw   rx   ry   rz   utilsget_device_propertiesrd   "add_convert_proton_amd_gpu_to_llvm)r:   r   )r   r%   r   r   to_llvm_passes   s   
 z4InstrumentationHook.activate.<locals>.to_llvm_passesc                        | S r   r   r:   )r   r   r   <lambda>       z.InstrumentationHook.activate.<locals>.<lambda>c                    r   r   r   r   )r   r   r   r      r   )ttgpuir_to_llvmirllvmir_to_llvmc                    s"   t  |d< | g|R i |S )Ninstrumentation_mode)r?   )r   argskwargs)original_modeoriginal_runr   r   instrumented_run   s   z6InstrumentationHook.activate.<locals>.instrumented_run)r~   r   r'   r   rw   rx   ry   rz   get_current_devicer   r   r}   r4   r   compilerinstrumentationr   r   r   runr   	functoolswraps)r   r   r   )r   r%   r   r   r   r   r   r   r   activate   s&   




zInstrumentationHook.activatec                 C   sj   t jdkrd S t  jd8  _t }i t| j_t  ttj	dr%tj	j
t_	tt  t jr0d t _d | _d S )Nr   r   __wrapped__)r~   r   r}   r   r   r   r   hasattrr   r   r   r   r   r   r   r-   )r   r   r   r   r   
deactivate   s   



zInstrumentationHook.deactivatemodulefunctionnamemetadata_grouphashrA   c                 C   s   |sd S t dd | D d }t dd | D d }|| j|< |ret }t| t }	|	dkr9t| n	|	dkrBt| t	| t
||}||_t	|}
t	|}t|||
|| d S td| )Nc                 s   "    | ]\}}| d r|V  qdS )ttgirNendswithr^   rr   pathr   r   r   	<genexpr>        z2InstrumentationHook.init_handle.<locals>.<genexpr>c                 s   r   )jsonNr   r   r   r   r   r      r   r	   r   z+IR path not found in metadata for function )nextitemsr   	triton_irr;   r9   r}   triton_nvidia
triton_amdr=   parse_mlir_moduleget_scope_id_namesget_scope_id_parents	libprotoninit_function_metadatar'   )r   r   r   r   r   r   ir_pathr   r;   r   scope_id_namesscope_id_parentsr   r   r   init_handle   s(   





zInstrumentationHook.init_handlec                 C   s   | j d u rdS | j  S )Nr   )r-   data_ptr)r   r   r   r   	_data_ptr  s   zInstrumentationHook._data_ptrmetadatac                 C   sb   |j d}|j d}| jd u rdn	| j | j  }t|||  | tj	r/d t_
d S d S Nr   r!   r   )datare   r-   element_sizenumelr   enter_instrumented_opr   r~   r   r   r   r   r7   r!   
alloc_sizer   r   r   enter	  s   "
zInstrumentationHook.enterc                 C   sf   |j d}|j d}| jd u rdn	| j | j  }t|||  | tj	r1| 
| d S d S r   )r   re   r-   r   r   r   exit_instrumented_opr   r~   r   _populate_host_bufferr   r   r   r   exit  s   "zInstrumentationHook.exitc              
   C   s  |r| j | rdd l}dd l}dd l}dtttf dtfdd}| jd u r'dn	| j	 | j
  }| jj d}i }t| j | d}	||	}W d    n1 sTw   Y  ||d }
|d }|d	 }| jjtjjkrq|nt|}t|| }| jjd
ko| jjtjjk}|rdd t|D }ndd |D }d|d  }d}|}|}t|||||
||||g
|}|jdt| g|R  }|j|| |jddt_tjd | }| |j!t"||jd tj|d  #| j}| | j$  d S d S d S )Nr   targetrA   c                 S   s$   | d dkrdS | d dkrdS dS )Nr|   r"   r   rv   r   r   r   )r   r   r   r   encode_target   s
   z@InstrumentationHook._populate_host_buffer.<locals>.encode_targetr`   rprofile_scratch_size	num_warpsrS   c                 S   s   g | ]}|qS r   r   r^   ir   r   r   r_   V  s    z=InstrumentationHook._populate_host_buffer.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   )r3   r   r   r   r   r_   X  s    (      Icpur#   )r$   )%r   r*   structr   r   r?   r   r3   r-   r   r   r   rR   r\   rd   openloadrP   r=   SAMPLING_STRATEGYNONErk   rN   GRANULARITYWARPrangeVERSIONpackr+   r,   r~   r   copy_tensorlistview_asr   )r   r   r*   r   r   r   r   sampled_warpsr   filedevice_typescratch_mem_size
total_unituid_num	block_numis_all_warpsuid_vecheader_sizeheader_offsetpayload_offsetpayload_sizeheader_valuesheader_bytesconfig_portiondata_portionr   r   r   r     sL   "!z)InstrumentationHook._populate_host_buffer)r0   r1   r2   r   r3   __annotations__r   r   boolr   r   r   r)   r&   r   r?   r   rc   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r~      s   
 ;*	r~   ),r   typingr   r   r   r   rw   triton._C.libtritonr   r   r   r=   r   r   r	   r   r
   r   triton._C.libprotonr   triton.compilerr   triton.runtime.jitr   triton.runtime._allocationr   r   triton.backendsr   hookr   flagsr   r   rS   r   r   r   r4   r?   rc   ru   r}   r~   r   r   r   r   <module>   s,    6
