o
    UTi<Y                     @   s   d Z ddlZddlZddlZddlZddlmZ ddlm	Z
 ddlmZ ddlmZ ddlmZ ddlmZ ejZeeZG dd de
jZd	d
 Z														dddZG dd dZdS ))PdfPagePdfColorScheme    N)PdfiumError)	PdfBitmap)PdfTextPage)	PdfObjectc                       s  e Zd ZdZ fddZedd Zedd Zdd	 Z	d
d Z
dd Zdd Zdd Zdd Zd?ddZdd Zd?ddZdd Zd?ddZdd  Zd?d!d"Zd#d$ Zd?d%d&Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd@d6d7Zej fd8d9Z!d:d5d;de"j#d3d<fd=d>Z$  Z%S )Ar   ah  
    Page helper class.
    
    Attributes:
        raw (FPDF_PAGE):
            The underlying PDFium page handle.
        pdf (PdfDocument):
            Reference to the document this page belongs to.
        formenv (PdfFormEnv | None):
            Formenv handle, if the parent pdf had an active formenv at the time of page retrieval. None otherwise.
    c                    s(   || _ || _|| _t tj| j d S N)rawpdfformenvsuper__init__r   _close_impl)selfr	   r
   r   	__class__ R/var/www/html/RAG/RAG_venv/lib/python3.10/site-packages/pypdfium2/_helpers/page.pyr   "   s   zPdfPage.__init__c                 C   s   |rt | | t |  d S r   )pdfium_cFORM_OnBeforeClosePageFPDF_ClosePage)r	   r   r   r   r   r   )   s   zPdfPage._close_implc                 C   s   | j S r   )r
   r   r   r   r   parent0   s   zPdfPage.parentc                 C   
   t | S )z`
        Returns:
            float: Page width (horizontal size), in PDF canvas units.
        )r   FPDF_GetPageWidthFr   r   r   r   	get_width6      
zPdfPage.get_widthc                 C   r   )z_
        Returns:
            float: Page height (vertical size), in PDF canvas units.
        )r   FPDF_GetPageHeightFr   r   r   r   
get_height=   r   zPdfPage.get_heightc                 C   s   |   |  fS )zb
        Returns:
            (float, float): Page width and height, in PDF canvas units.
        )r   r   r   r   r   r   get_sizeD      zPdfPage.get_sizec                 C   s   t jt|  S )zO
        Returns:
            int: Clockwise page rotation in degrees.
        )pdfium_iRotationToDegreesr   FPDFPage_GetRotationr   r   r   r   get_rotationN   r    zPdfPage.get_rotationc                 C   s   t | tj|  dS )z\
        Define the absolute, clockwise page rotation (0, 90, 180, or 270 degrees).
        N)r   FPDFPage_SetRotationr!   RotationToConst)r   rotationr   r   r   set_rotationU      zPdfPage.set_rotationc           	      C   sR   t  t  t  t  f\}}}}|| ||||}|s|r| S d S |j|j|j|jfS r   )c_floatvalue)	r   box_funcfallback_funcfallback_okleftbottomrighttopokr   r   r   _get_box\   s
   zPdfPage._get_boxTc                 C   s   |  tjdd |S )a  
        Returns:
            (float, float, float, float) | None:
            The page MediaBox in PDF canvas units, consisting of four coordinates (usually x0, y0, x1, y1).
            If MediaBox is not defined, returns ANSI A (0, 0, 612, 792) if ``fallback_ok=True``, None otherwise.
        
        .. admonition:: Known issue

            Due to quirks in PDFium, all ``get_*box()`` functions except :meth:`.get_bbox` do not inherit from parent nodes in the page tree (as of PDFium 5418).
        c                   S      dS )N)r   r   id  i  r   r   r   r   r   <lambda>p       z&PdfPage.get_mediabox.<locals>.<lambda>)r4   r   FPDFPage_GetMediaBoxr   r.   r   r   r   get_mediaboxe   s   zPdfPage.get_mediaboxc                 C      t | |||| dS )zn
        Set the page's MediaBox by passing four :class:`float` coordinates (usually x0, y0, x1, y1).
        N)r   FPDFPage_SetMediaBoxr   lbrtr   r   r   set_mediaboxr   r)   zPdfPage.set_mediaboxc                 C      |  tj| j|S )zc
        Returns:
            The page's CropBox (If not defined, falls back to MediaBox).
        )r4   r   FPDFPage_GetCropBoxr:   r9   r   r   r   get_cropboxx      zPdfPage.get_cropboxc                 C   r;   )z)
        Set the page's CropBox.
        N)r   FPDFPage_SetCropBoxr=   r   r   r   set_cropbox   r)   zPdfPage.set_cropboxc                 C   rC   )zc
        Returns:
            The page's BleedBox (If not defined, falls back to CropBox).
        )r4   r   FPDFPage_GetBleedBoxrE   r9   r   r   r   get_bleedbox   rF   zPdfPage.get_bleedboxc                 C   r;   )z*
        Set the page's BleedBox.
        N)r   FPDFPage_SetBleedBoxr=   r   r   r   set_bleedbox   r)   zPdfPage.set_bleedboxc                 C   rC   )zb
        Returns:
            The page's TrimBox (If not defined, falls back to CropBox).
        )r4   r   FPDFPage_GetTrimBoxrE   r9   r   r   r   get_trimbox   rF   zPdfPage.get_trimboxc                 C   r;   )z)
        Set the page's TrimBox.
        N)r   FPDFPage_SetTrimBoxr=   r   r   r   set_trimbox   r)   zPdfPage.set_trimboxc                 C   rC   )za
        Returns:
            The page's ArtBox (If not defined, falls back to CropBox).
        )r4   r   FPDFPage_GetArtBoxrE   r9   r   r   r   
get_artbox   rF   zPdfPage.get_artboxc                 C   r;   )z(
        Set the page's ArtBox.
        N)r   FPDFPage_SetArtBoxr=   r   r   r   
set_artbox   r)   zPdfPage.set_artboxc                 C   s4   t  }t | |}|std|j|j|j|jfS )zz
        Returns:
            The bounding box of the page (the intersection between its media box and crop box).
        z Failed to get page bounding box.)r   FS_RECTFFPDF_GetPageBoundingBoxr   r/   r0   r1   r2   )r   rectr3   r   r   r   get_bbox   s
   zPdfPage.get_bboxc                 C   s.   t | }|stdt|| }| | |S )zY
        Returns:
            PdfTextPage: A new text page handle for this page.
        zFailed to load text page.)r   FPDFText_LoadPager   r   _add_kid)r   raw_textpagetextpager   r   r   get_textpage   s   


zPdfPage.get_textpagec                 C   sN   |j rtd|jr|j| jurtdt| | |  | |_ | j|_dS )a  
        Insert a pageobject into the page.
        
        The pageobject must not belong to a page yet. If it belongs to a PDF, the target page must be part of that PDF.
        
        Position and form are defined by the object's matrix.
        If it is the identity matrix, the object will appear as-is on the bottom left corner of the page.
        
        Parameters:
            pageobj (PdfObject): The pageobject to insert.
        zAThe pageobject you attempted to insert already belongs to a page.zBThe pageobject you attempted to insert belongs to a different PDF.N)page
ValueErrorr
   r   FPDFPage_InsertObject_detach_finalizer)r   pageobjr   r   r   
insert_obj   s   zPdfPage.insert_objc                 C   s~   |j | ur	td|jdkr#|jdusJ t|j|}d\|_|_n|jdu s*J t| |}|s6tdd|_ |  dS )a#  
        Remove a pageobject from the page.
        As of PDFium 5692, detached pageobjects may be only re-inserted into existing pages of the same document.
        If the pageobject is not re-inserted into a page, its ``close()`` method may be called.
        
        Note:
            If the object's :attr:`~.PdfObject.type` is :data:`FPDF_PAGEOBJ_TEXT`, any :class:`.PdfTextPage` handles to the page should be closed before removing the object.
        
        Parameters:
            pageobj (PdfObject): The pageobject to remove.
        z@The pageobject you attempted to remove is not part of this page.r   N)r   NzFailed to remove pageobject.)	r^   r_   level	containerr   FPDFFormObj_RemoveObjectFPDFPage_RemoveObjectr   _attach_finalizer)r   rb   r3   r   r   r   
remove_obj   s   

zPdfPage.remove_objc                 C   s   t | }|stddS )z
        Generate page content to apply additions, removals or modifications of pageobjects.
        
        If page content was changed, this function should be called once before saving the document or re-loading the page.
        z Failed to generate page content.N)r   FPDFPage_GenerateContentr   )r   r3   r   r   r   gen_content  s   
zPdfPage.gen_contentN   r   c                 c   s    |rt j}t j}|}nt j}t j}| }||}|dk r tdt|D ];}	|||	}
|
s1tdt|
| | j||d}|rB|j	|v rE|V  |j	t j
kr_||d k r_| j||||d dE dH  q$dS )a<  
        Iterate through the pageobjects on this page.
        
        Parameters:
            filter (list[int] | None):
                An optional list of pageobject types to filter (:attr:`FPDF_PAGEOBJ_*`).
                Any objects whose type is not contained will be skipped.
                If None or empty, all objects will be provided, regardless of their type.
            max_depth (int):
                Maximum recursion depth to consider when descending into Form XObjects.
        
        Yields:
            :class:`.PdfObject`: A pageobject.
        r   z$Failed to get number of pageobjects.zFailed to get pageobject.)r^   r
   re   rd      )filter	max_depthformrd   N)r   FPDFFormObj_CountObjectsFPDFFormObj_GetObjectFPDFPage_CountObjectsFPDFPage_GetObjectr   ranger   r
   typeFPDF_PAGEOBJ_FORMget_objects)r   rn   ro   rp   rd   count_objects
get_objectr   	n_objectsiraw_obj
helper_objr   r   r   rx     s6   
zPdfPage.get_objectsc                 C   s0   | j stdt| |}|tjkrtd|S )a  
        Flatten form fields and annotations into page contents.
        
        Attention:
            * :meth:`~.PdfDocument.init_forms` must have been called on the parent pdf, before the page was retrieved, for this method to work. In other words, :attr:`.PdfPage.formenv` must be non-null.
            * Flattening may invalidate existing handles to the page, so you'll want to re-initialize these afterwards.
        
        Parameters:
            flag (int): PDFium flattening target (:attr:`FLAT_*`)
        Returns:
            int: PDFium flattening status (:attr:`FLATTEN_*`). :attr:`FLATTEN_FAIL` is handled internally.
        zHpage.flatten() requires previous pdf.init_forms() before page retrieval.z,Failed to flatten annotations / form fields.)r   RuntimeErrorr   FPDFPage_FlattenFLATTEN_FAILr   )r   flagrcr   r   r   flatten>  s   
zPdfPage.flattenrm   )r   r   r   r   Fc                    s  t |    }	t |    }
|dv r|
|	}	}
 fdd|D }|	|d  |d  }|
|d  |d  }tdd	 ||fD rGtd
t| fi |\}}}}|dur^|r^|tjO }|||||d}|	|dd|| |d  |d  |	|
t
j| f}|| g||R }|du rtj|  n,tjdd}t
|ddd  ||}tjg |||R  }|tjksJ t|  |r| jrtj| jg|R   t| g|R |_|S )a  
        Rasterize the page to a :class:`.PdfBitmap`.
        
        Parameters:
            
            scale (float):
                A factor scaling the number of pixels per PDF canvas unit. This defines the resolution of the image.
                To convert a DPI value to a scale factor, multiply it by the size of 1 canvas unit in inches (usually 1/72in). [#user_unit]_
            
            rotation (int):
                Additional rotation in degrees (0, 90, 180, or 270).
            
            crop (tuple[float, float, float, float]):
                Amount in PDF canvas units to cut off from page borders (left, bottom, right, top). Crop is applied after rotation.
            
            may_draw_forms (bool):
                If True, render form fields (provided the document has forms and :meth:`~.PdfDocument.init_forms` was called).
            
            bitmap_maker (typing.Callable):
                Callback function used to create the :class:`.PdfBitmap`.
            
            fill_color (tuple[int, int, int, int]):
                Color the bitmap will be filled with before rendering. This uses RGBA syntax regardless of the pixel format used, with values from 0 to 255.
                If the fill color is not opaque (i.e. has transparency), ``{BGR,RGB}A`` will be used.
            
            grayscale (bool):
                If True, render in grayscale mode.
            
            optimize_mode (None | str):
                Page rendering optimization mode (None, "lcd", "print").
            
            draw_annots (bool):
                If True, render page annotations.
            
            no_smoothtext (bool):
                If True, disable text anti-aliasing. Overrides ``optimize_mode="lcd"``.
            
            no_smoothimage (bool):
                If True, disable image anti-aliasing.
            
            no_smoothpath (bool):
                If True, disable path anti-aliasing.
            
            force_halftone (bool):
                If True, always use halftone for image stretching.
            
            limit_image_cache (bool):
                If True, limit image cache size.
            
            rev_byteorder (bool):
                If True, render with reverse byte order, leading to ``RGB{A/x}`` output rather than ``BGR{A/x}``.
                Other pixel formats are not affected.
            
            prefer_bgrx (bool):
                If True, use 4-byte ``{BGR/RGB}x`` rather than 3-byte ``{BGR/RGB}`` (i.e. add an unused byte).
                Other pixel formats are not affected.
            
            maybe_alpha (bool):
                If True, use a pixel format with alpha channel (i.e. ``{BGR/RGB}A``) if page content has transparency.
                This is recommended for performance in these cases, but as page-dependent format selection can be unexpected, it is not enabled by default.
            
            force_bitmap_format (int | None):
                If given, override automatic pixel format selection and enforce use of the given format (one of the :attr:`FPDFBitmap_*` constants). In this case, you should not pass any other format selection options, except potentially *rev_byteorder*.
            
            extra_flags (int):
                Additional PDFium rendering flags. May be combined with bitwise OR (``|`` operator).
            
            color_scheme (PdfColorScheme | None):
                A custom pdfium color scheme. Note that this may flatten different colors into one, so the usability of this is limited.
            
            fill_to_stroke (bool):
                If a *color_scheme* is given, whether to only draw borders around fill areas using the `path_stroke` color, instead of filling with the `path_fill` color.
        
        Returns:
            PdfBitmap: Bitmap of the rendered page.
        
        .. admonition:: Format selection
            
            This is the format selection hierarchy used by :meth:`.render`, from lowest to highest priority:
            
            * default: ``BGR``
            * ``prefer_bgrx=True``: ``BGRx``
            * ``grayscale=True``: ``L``
            * ``maybe_alpha=True``: ``BGRA`` if the page has transparency, else the format selected otherwise
            * ``fill_color[3] < 255``: ``BGRA`` (background color with transparency)
            * ``force_bitmap_format=...`` -> any supported by pdfium
            
            Additionally, *rev_byteorder* will swap ``BGR{A/x}`` to ``RGB{A/x}`` if applicable.
        
        .. [#user_unit] Since PDF 1.6, pages may define an additional user unit factor. In this case, 1 canvas unit is equivalent to ``user_unit * (1/72)`` inches. PDFium does not currently provide an API to get the user unit, so this is not taken into account.
        )Z   i  c                    s   g | ]	}t |  qS r   )mathceil).0cscaler   r   
<listcomp>  s    z"PdfPage.render.<locals>.<listcomp>r      rm      c                 s   s    | ]}|d k V  qdS )rm   Nr   )r   dr   r   r   	<genexpr>  s    z!PdfPage.render.<locals>.<genexpr>zCrop exceeds page dimensionsN)formatrev_byteorder)versionNeedToPauseNowc                 S   r5   )NFr   )_r   r   r   r6     r7   z PdfPage.render.<locals>.<lambda>)r   r   r   r   anyr_   _parse_renderoptsr   FPDF_CONVERT_FILL_TO_STROKE	fill_rectr!   r&   FPDF_RenderPageBitmapIFSDK_PAUSEset_callbackconvert*FPDF_RenderPageBitmapWithColorScheme_StartFPDF_RENDER_DONEFPDF_RenderPage_Closer   FPDF_FFLDrawweakrefref	_pos_args)r   r   r'   cropmay_draw_formsbitmap_makercolor_schemefill_to_strokekwargs	src_width
src_heightwidthheight	cl_formatr   
fill_colorflagsbitmappos_argsrender_argspausefpdf_csstatusr   r   r   renderX  s8   g

 


zPdfPage.render)T)Nrl   Nr   )&__name__
__module____qualname____doc__r   staticmethodr   propertyr   r   r   r   r$   r(   r4   r:   rB   rE   rH   rJ   rL   rN   rP   rR   rT   rX   r]   rc   ri   rk   rx   r   FLAT_NORMALDISPLAYr   r   
new_nativer   __classcell__r   r   r   r   r      sL    



	



!
1r   c                 C   s:   |d dk s|rt | rt jS |rt jS |rt jS t jS )Nr      )r   FPDFPage_HasTransparencyFPDFBitmap_BGRAFPDFBitmap_GrayFPDFBitmap_BGRxFPDFBitmap_BGR)r^   r   	grayscaleprefer_bgrxmaybe_alphar   r   r   _auto_bitmap_format  s   r   r   r   r   r   FTc                 C   s   |d u rt | ||||}n|}|tjkrd}
|}|r|tjO }|r&|tjO }|r-|tjO }|r4|tjO }|r;|tjO }|rB|tjO }|	rI|tj	O }|
rP|tj
O }|rq| }|dkr`|tjO }n|dkrj|tjO }ntd| ||
||fS )NFlcdprintzInvalid optimize_mode )r   r   r   FPDF_GRAYSCALE
FPDF_ANNOTFPDF_RENDER_NO_SMOOTHTEXTFPDF_RENDER_NO_SMOOTHIMAGEFPDF_RENDER_NO_SMOOTHPATHFPDF_RENDER_FORCEHALFTONEFPDF_RENDER_LIMITEDIMAGECACHEFPDF_REVERSE_BYTE_ORDERlowerFPDF_LCD_TEXTFPDF_PRINTINGr_   )r^   r   r   optimize_modedraw_annotsno_smoothtextno_smoothimageno_smoothpathforce_halftonelimit_image_cacher   r   r   force_bitmap_formatextra_flagsr   r   r   r   r   r     s<   








r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	r   z
    Rendering color scheme.
    Each color shall be provided as a list of values for red, green, blue and alpha, ranging from 0 to 255.
    c                 C   s   t ||||d| _d S )N)path_fill_colorpath_stroke_colortext_fill_colortext_stroke_color)dictcolors)r   	path_fillpath_stroke	text_filltext_stroker   r   r   r   1  s   zPdfColorScheme.__init__c                 C   s   t | j d| j dS )Nz(**))rv   r   r   r   r   r   r   __repr__7  s   zPdfColorScheme.__repr__c                 C   s4   t  }| j D ]\}}t||t|| q	|S )z\
        Returns:
            The color scheme as :class:`FPDF_COLORSCHEME` object.
        )r   FPDF_COLORSCHEMEr   itemssetattrr!   color_tohex)r   r   r   keyr+   r   r   r   r   :  s   zPdfColorScheme.convertN)r   r   r   r   r   r   r   r   r   r   r   r   +  s
    r   )r   FNTFFFFFFFFNr   )__all__r   ctypesloggingr   pypdfium2.rawr	   r   pypdfium2.internalinternalr!   pypdfium2._helpers.miscr   pypdfium2._helpers.bitmapr   pypdfium2._helpers.textpager   pypdfium2._helpers.pageobjectsr   r*   	getLoggerr   loggerAutoCloseabler   r   r   r   r   r   r   r   <module>   sD   
   S
9