o
    ^iqF                  	   @   s  d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZmZmZ d dlZd d	lmZ g d
ZeeZG dd deeZeddG dd dZededee ded fddZdededdfddZde e ddfddZ!de e ddfddZ"dedede e fddZ#dede e fdd Z$dede e fd!d"Z%dede e fd#d$Z&dede e fd%d&Z'ed'Z(ed(Z)d)ee) d*ee)ge(f de*e(e e) f fd+d,Z+d-ede e fd.d/Z,d-ede e fd0d1Z-d2ede e fd3d4Z.d2ede e fd5d6Z/defd7d8Z0dedefd9d:Z1d2ede e fd;d<Z2d2edefd=d>Z3d?edefd@dAZ4d2edefdBdCZ5dDede e fdEdFZ6dGee defdHdIZ7de e fdJdKZ8dLede e fdMdNZ9de e fdOdPZ:dS )Q    N)defaultdict)IterableIterator)contextmanager)asdict	dataclass)Enum)	getLogger)CallableOptionalTypeVar)signpost_event)AffinityMode6maybe_temporarily_apply_numa_binding_to_current_threadNumaOptionsc                   @   s    e Zd ZdZdZdZdZdZdS )r   zW
    See behavior description for each affinity mode
    in torch.distributed.run.
    nodesocket	exclusivezcore-complexN)__name__
__module____qualname____doc__NODESOCKET	EXCLUSIVECORE_COMPLEX r   r   M/var/www/html/RAG/RAG_venv/lib/python3.10/site-packages/torch/numa/binding.pyr      s    r   T)frozenc                   @   s$   e Zd ZU eed< 	 dZeed< dS )r   affinity_modeF!should_fall_back_if_binding_failsN)r   r   r   r   __annotations__r    boolr   r   r   r   r   $   s   
 r   	gpu_indexnuma_optionsreturnc                 c   s:    |du r
dV  dS t  }t| |d dV  t|d dS )z
    1. Applies NUMA binding to the current thread, suitable for the thread
    which will be interacting with GPU gpu_index.
    2. Resets to the original CPU affinity before exiting the context manager.
    Nr#   r$   logical_cpu_indices)+_get_allowed_cpu_indices_for_current_thread%_apply_numa_binding_to_current_thread$_bind_current_thread_to_logical_cpus)r#   r$   original_logical_cpu_indicesr   r   r   r   2   s   	
r   c              
   C   s   | t |d}td| z:t| |d}tdt| t|d tdt| t|d tdt| tddi |d	t|id
 W d S  tyr   tddi |dt	
 id
 td| |jrqtdt	
  Y d S  w )Nr&   z0Attempting to apply NUMA binding, given input %rz0Computed logical_cpu_indices=%s for NUMA bindingr'   z1Validated logical_cpu_indices=%s for NUMA bindingz=Successfully bound to logical_cpu_indices=%s for NUMA bindingnuma_bindingapply_successr(   )categoryname
parametersapply_exception	tracebackz)Failed to apply NUMA binding for input=%rzHContinuing executing without applying NUMA binding, despite exception %s)r   loggerinfo_get_logical_cpus_to_bind_to_get_ranges_str_from_ints%_raise_if_logical_cpu_indices_invalidr+   r   	Exceptionr3   
format_exc	exceptionr    warning)r#   r$   kwargsr(   r   r   r   r*   I   sd   

r*   r(   c                 C   s   | st dd S )Nz+Must bind to a non-empty set of CPU indices)RuntimeErrorr'   r   r   r   r8      s   r8   c                 C   s   t d|  d S Nr   )ossched_setaffinityr'   r   r   r   r+      s   r+   c                 C   sz   |j tjkrt| d}|S |j tjkrt| d}|S |j tjkr't| d}|S |j tjkr4t	| d}|S t
d|j  d)z
    Args:
        gpu_index: The index of the GPU that will be used by the subprocess.
            Example: 0
        numa_options: See NumaOptions for details.

    Returns:
        Set of logical CPU indices to bind to.
    r#   zAffinity mode z not supported.)r   r   r   !_node_get_logical_cpus_to_bind_tor   #_socket_get_logical_cpus_to_bind_tor   &_exclusive_get_logical_cpus_to_bind_tor   )_core_complex_get_logical_cpus_to_bind_to
ValueError)r#   r$   logical_cpusr   r   r   r6      s   




r6   c                 C      t | d}t|dS )z-
    Core logic of 'node' numa strategy.
    rB   numa_node_index)"_get_numa_node_index_for_gpu_index._get_allowed_logical_cpu_indices_for_numa_node)r#   rK   r   r   r   rC      s   
rC   c                 C   sB   t | d}t|d}t|d}t }|D ]
}|t|d q|S )z/
    Core logic of 'socket' numa strategy.
    rB   rJ   )socket_index)rL   _get_socket_index_for_numa_node'_get_numa_node_indices_for_socket_indexsetupdaterM   )r#   numa_node_index_of_gpurN   numa_node_indicesrH   rK   r   r   r   rD      s   
rD   c                 C   s   t | d}t|d}t|}|| }t|d}t|dd }tt| }t|t| }t|t| }|dk rPt	dt| d|dd	t| d
 || t
|| }|| ||k rbdnd }	dd t| ||	 D }
|
S )z2
    Core logic of 'exclusive' numa strategy.
    rB   rJ   c                 S      t t| dS Nlogical_cpu_index)min6_get_logical_cpu_indices_sharing_same_physical_core_asrW   r   r   r   <lambda>   
    z8_exclusive_get_logical_cpus_to_bind_to.<locals>.<lambda>   zThere are only z# physical cores on numa_node_index=,z but there are z% GPUs associated with this NUMA node.r   c                 S   s   h | ]	}|D ]}|qqS r   r   ).0r(   rX   r   r   r   	<setcomp>  s    z9_exclusive_get_logical_cpus_to_bind_to.<locals>.<setcomp>)rL   _get_gpu_indices_for_numa_nodesortedindexrM   	_group_bydictitemslenr>   rY   listvalues)r#   rK   gpu_indicesoriginal_gpu_relative_indexallowed_logical_cpu_indices,physical_core_to_allowed_logical_cpu_indicesnum_physical_cores_per_gpu(num_gpus_to_give_one_extra_physical_corestartend$logical_cpu_indices_for_original_gpur   r   r   rE      sb   


	
rE   c                 C   sv   t | d}t|d}t|}|| }t|d}t|dd }tt| dd d}|t| }t	|
 | }|S )z
    Core logic of 'core-complex' numa strategy.

    Each GPU is assigned a full core complex (group of cores sharing L3 cache)
    within its affined NUMA node.
    rB   rJ   c                 S   rU   rV   )rY   1_get_logical_cpus_sharing_same_max_level_cache_asrW   r   r   r   r[   %  r\   z;_core_complex_get_logical_cpus_to_bind_to.<locals>.<lambda>c                 S   s   t | d  | d fS )Nr]   r   )rg   )itemr   r   r   r[   1  s    )key)rL   ra   rb   rc   rM   rd   re   rf   rg   rh   ri   )r#   rK   rj   rk   rl   .max_level_cache_to_allowed_logical_cpu_indicescache_index_for_original_gpurr   r   r   r   rF     s4   


		rF   KVri   get_keyc                 C   s,   t t}| D ]}||}|| | q|S )z2
    Groups elements with same key into sets.
    )r   rQ   add)ri   rz   key_to_valuesvalueru   r   r   r   rd   C  s
   rd   rX   c                 C   sD   d|  d}t |}t| W  d    S 1 sw   Y  d S )N/sys/devices/system/cpu/cpuz/topology/thread_siblings_list)open_get_set_of_int_from_ranges_strread)rX   "thread_siblings_list_absolute_pathfr   r   r   rZ   N  s
   


$rZ   c              	   C   s<  d|  d}d}t  }t|D ]}|dr|dd   s qtj||}tj|d}t|}| 	 dvrC	 W d    qW d    n1 sMw   Y  tj|d}t|}	t
|	 }
W d    n1 snw   Y  |
|krxq|
}tj|d	}t|}t| }W d    n1 sw   Y  q|S )
Nr~   z/cacherc      type>   DataUnifiedlevelshared_cpu_list)rQ   r@   listdir
startswith	isdecimalpathjoinr   r   stripintr   )rX   cpu_cache_dir_absolute_path	max_level$logical_cpus_sharing_max_level_cacheentrycache_index_absolute_pathtype_absolute_path	type_filelevel_absolute_path
level_filer   shared_cpu_list_absolute_pathshare_cpu_list_filer   r   r   rs   X  s@   



rs   rK   c                 C   s   t | d}t }||@ S NrJ   )0_get_cpu_indices_for_numa_node_MAYBE_NOT_ALLOWEDr)   )rK   all_cpu_indicesallowed_cpu_indicesr   r   r   rM   ~  s
   rM   c              
   C   s|   d|  d}z"t |}| }W d   n1 sw   Y  W t|S W t|S  ty= } z	td| d|d}~ww )z
    Returns:
        Indices of all CPUs associated with numa_node_index. However, the list
        is not filtered based on whether the thread is allowed to use them.
    z/sys/devices/system/node/nodez/cpulistNz:Could not determine CPUs corresponding to numa_node_index=.)r   r   FileNotFoundErrorr>   r   )rK   cpulist_absolute_pathr   cpu_range_strer   r   r   r     s    


r   c                   C   s
   t j S )N)torchcudadevice_countr   r   r   r   _get_gpu_count  s   
r   c                 C   s   t j| }|j}|j}|j}|dd|dd|dd}d| d}t|}tt|	 
 dW  d    S 1 s=w   Y  d S )N04x:02xz.0z/sys/bus/pci/devices/z
/numa_noder   )r   r   get_device_propertiespci_domain_id
pci_bus_idpci_device_idr   maxr   r   r   )r#   device_propertiesdomainbusdevicepci_addrpci_numa_node_absolute_pathr   r   r   r   rL     s   
$rL   c                    s    fddt t D S )Nc                    s   h | ]}t |d  kr|qS )rB   )rL   )r_   r#   rJ   r   r   r`     s
    z1_get_gpu_indices_for_numa_node.<locals>.<setcomp>)ranger   rJ   r   rJ   r   ra     s   
ra   c                 C   rI   NrJ   )	cpu_index)._get_arbitrary_allowed_cpu_index_for_numa_node_get_socket_index_for_cpu)rK   arbitrary_cpu_indexr   r   r   rO     s   
rO   r   c              
   C   sv   d|  d}z t |}t|  W  d    W S 1 sw   Y  W d S  ty: } ztd| |d }~ww )Nr~   z/topology/physical_package_idz)Could not determine socket for cpu_index=)r   r   r   r   r   r>   )r   package_id_absolute_pathr   r   r   r   r   r     s   

(r   c                 C   rU   r   )rY   rM   rJ   r   r   r   r     s   r   
ranges_strc                 C   st   t  }| dD ]/}| }|sqd|v r0|d\}}t|t|}}|t||d  q|t| q|S )z
    Util for parsing a string of int ranges, as in a sysfs file.

    Args:
        ranges_str: E.g., "0-2,4,6-7"

    Returns:
        E.g., {0, 1, 2, 4, 6, 7}
    r^   -r]   )rQ   splitr   r   rR   r   r{   )r   ints	range_str	start_strend_strrp   rq   r   r   r   r     s   
r   r   c                 C   s   | sdS t | }g }|d  }}|dd D ]$}||d kr!|}q||kr,||  n
|| d|  | }}q||krF||  n
|| d|  d|S )z
    Convert a set of integers to a compact string with ranges.

    Args:
        ints: E.g., {0, 1, 2, 4, 6, 7}

    Returns:
        E.g., "0-2,4,6-7"
     r   r]   Nr   r^   )rb   appendr   )r   sorted_intsrangesrp   prevnumr   r   r   r7     s    


r7   c                  C   s>   t d} |  }W d    t|S 1 sw   Y  t|S )Nz!/sys/devices/system/node/possible)r   r   r   )r   possible_nodes_strr   r   r   !_get_systemwide_numa_node_indices  s   


r   rN   c                 C   s<   t  }t }|D ]}t|d}| t|dkr|| q|S r   )r   rQ   r   r   r{   )rN   systemwide_numa_node_indicesmatching_numa_node_indicesrK   r   r   r   r   rP     s   
rP   c                   C   s
   t dS r?   )r@   sched_getaffinityr   r   r   r   r)   !  s   
r)   );r@   r3   collectionsr   collections.abcr   r   
contextlibr   dataclassesr   r   enumr   loggingr	   typingr
   r   r   r   torch._utils_internalr   __all__r   r4   strr   r   r   r   r*   rQ   r8   r+   r6   rC   rD   rE   rF   rx   ry   re   rd   rZ   rs   rM   r   r   rL   ra   rO   r   r   r   r7   r   rP   r)   r   r   r   r   <module>   s    
9
F/0


&
$