a
    b3j?                     @  s  d Z ddlmZ ddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6m7Z7 e	8e9Z:ee;e;f Z<dddddZ=G dd de>Z?ddd d!d"Z@G d#d$ d$e>ZAdd%dd&d'd(ZBdd%dd&d)d*ZCd+dd,d-d.ZDG d/d0 d0ZEG d1d2 d2eZFd2d2d3d4d5ZGeGd6d7d8d9d:ZHed;d<G d=d6 d6ZIG d>d? d?eZJdRd@dAdBddCdDdEZKdSddFd6dGdHdIZLd@d%dJdKdLdMZMG dNdO dOeZNG dPdQ dQZOdS )TzO
The main purpose of this module is to expose LinkCollector.collect_sources().
    )annotationsN)IterableMutableMappingSequence)	dataclass)
HTMLParser)Values)Callable
NamedTupleProtocol)requests)Response)
RetryErrorSSLError)NetworkConnectionError)Link)SearchScope)
PipSession)raise_for_status)is_archive_fileredact_auth_from_url)url_to_path)vcs   )CandidatesFromPage
LinkSourcebuild_sourcestr
str | Noneurlreturnc                 C  s6   t jD ]*}|  |r| t| dv r|  S qdS )zgLook for VCS schemes in the URL.

    Returns the matched VCS scheme, or None if there's no match.
    z+:N)r   schemeslower
startswithlen)r!   scheme r(   z/www/wwwroot/dpstar/app/297b3aabda72fedb274352021c2dd8b5_venv/lib/python3.9/site-packages/pip/_internal/index/collector.py_match_vcs_scheme.   s    

r*   c                      s&   e Zd Zdddd fddZ  ZS )_NotAPIContentr   None)content_typerequest_descr"   c                   s   t  || || _|| _d S N)super__init__r-   r.   )selfr-   r.   	__class__r(   r)   r1   :   s    z_NotAPIContent.__init__)__name__
__module____qualname__r1   __classcell__r(   r(   r3   r)   r+   9   s   r+   r   r,   )responser"   c                 C  s6   | j dd}| }|dr$dS t|| jjdS )z
    Check the Content-Type header to ensure the response contains a Simple
    API Response.

    Raises `_NotAPIContent` if the content type is not a valid content-type.
    Content-TypeUnknown)z	text/htmlz#application/vnd.pypi.simple.v1+html#application/vnd.pypi.simple.v1+jsonN)headersgetr$   r%   r+   requestmethod)r9   r-   content_type_lr(   r(   r)   _ensure_api_header@   s    rB   c                   @  s   e Zd ZdS )_NotHTTPN)r5   r6   r7   r(   r(   r(   r)   rC   V   s   rC   r   )r!   sessionr"   c                 C  sF   t j| \}}}}}|dvr$t |j| dd}t| t| dS )z
    Send a HEAD request to the URL, and ensure the response contains a simple
    API Response.

    Raises `_NotHTTP` if the URL is not available for a HEAD request, or
    `_NotAPIContent` if the content type is not a valid content type.
    >   httpshttpT)allow_redirectsN)urllibparseurlsplitrC   headr   rB   )r!   rD   r'   netlocpathqueryfragmentrespr(   r(   r)   _ensure_api_responseZ   s    rQ   c                 C  sx   t t| jrt| |d tdt|  |j| dg dddd}t	| t
| tdt| |jd	d
 |S )aY  Access an Simple API response with GET, and return the response.

    This consists of three parts:

    1. If the URL looks suspiciously like an archive, send a HEAD first to
       check the Content-Type is HTML or Simple API, to avoid downloading a
       large file. Raise `_NotHTTP` if the content type cannot be determined, or
       `_NotAPIContent` if it is not HTML or a Simple API.
    2. Actually perform the request. Raise HTTP exceptions on network failures.
    3. Check the Content-Type header to make sure we got a Simple API response,
       and raise `_NotAPIContent` otherwise.
    rD   zGetting page %sz, )r<   z*application/vnd.pypi.simple.v1+html; q=0.1ztext/html; q=0.01z	max-age=0)AcceptzCache-Control)r=   zFetched page %s as %sr:   r;   )r   r   filenamerQ   loggerdebugr   r>   joinr   rB   r=   )r!   rD   rP   r(   r(   r)   _get_simple_responsel   s&    rX   ResponseHeaders)r=   r"   c                 C  s<   | r8d| v r8t j }| d |d< |d}|r8t|S dS )z=Determine if we have any encoding information in our headers.r:   zcontent-typecharsetN)emailmessageMessage	get_paramr   )r=   mrZ   r(   r(   r)   _get_encoding_from_headers   s    

r`   c                   @  s:   e Zd ZdddddZdddd	d
ZddddZdS )CacheablePageContentIndexContentr,   pager"   c                 C  s   |j s
J || _d S r/   )cache_link_parsingrd   r2   rd   r(   r(   r)   r1      s    
zCacheablePageContent.__init__objectbool)otherr"   c                 C  s   t |t| o| jj|jjkS r/   )
isinstancetyperd   r!   )r2   ri   r(   r(   r)   __eq__   s    zCacheablePageContent.__eq__intr"   c                 C  s   t | jjS r/   )hashrd   r!   r2   r(   r(   r)   __hash__   s    zCacheablePageContent.__hash__N)r5   r6   r7   r1   rl   rq   r(   r(   r(   r)   ra      s   ra   c                   @  s   e Zd ZdddddZdS )
ParseLinksrb   Iterable[Link]rc   c                 C  s   d S r/   r(   rf   r(   r(   r)   __call__       zParseLinks.__call__N)r5   r6   r7   rt   r(   r(   r(   r)   rr      s   rr   )fnr"   c                   s>   t jddd fddt  ddd fdd	}|S )
z
    Given a function that parses an Iterable[Link] from an IndexContent, cache the
    function's result (keyed by CacheablePageContent), unless the IndexContent
    `page` has `page.cache_link_parsing == False`.
    ra   z
list[Link])cacheable_pager"   c                   s   t  | jS r/   )listrd   )rw   )rv   r(   r)   wrapper   s    z*with_cached_index_content.<locals>.wrapperrb   rc   c                   s   | j rt| S t | S r/   )re   ra   rx   )rd   rv   ry   r(   r)   wrapper_wrapper   s    z2with_cached_index_content.<locals>.wrapper_wrapper)	functoolscachewraps)rv   r{   r(   rz   r)   with_cached_index_content   s
    r   rb   rs   rc   c           
      c  s   | j  }|drTt| j}|dg D ]"}t|| j	}|du rHq,|V  q,dS t
| j	}| jpfd}|| j| | j	}|jp|}|jD ]$}	tj|	||d}|du rq|V  qdS )z\
    Parse a Simple API's Index Content, and yield its anchor elements as Link objects.
    r<   filesNzutf-8)page_urlbase_url)r-   r$   r%   jsonloadscontentr>   r   	from_jsonr!   HTMLLinkParserencodingfeeddecoder   anchorsZfrom_element)
rd   rA   datafilelinkparserr   r!   r   anchorr(   r(   r)   parse_links   s&    





r   T)frozenc                   @  sL   e Zd ZU dZded< ded< ded< ded< d	Zd
ed< ddddZdS )rb   a  Represents one response (or page), along with its URL.

    :param encoding: the encoding to decode the given content.
    :param url: the URL from which the HTML was downloaded.
    :param cache_link_parsing: whether links parsed from this page's url
                               should be cached. PyPI index urls should
                               have this set to False, for example.
    bytesr   r   r-   r   r   r!   Trh   re   rn   c                 C  s
   t | jS r/   )r   r!   rp   r(   r(   r)   __str__  s    zIndexContent.__str__N)r5   r6   r7   __doc____annotations__re   r   r(   r(   r(   r)   rb      s   
	c                      sJ   e Zd ZdZddd fddZddddd	d
ZdddddZ  ZS )r   zf
    HTMLParser that keeps the first base HREF and a list of all anchor
    elements' attributes.
    r   r,   r    c                   s$   t  jdd || _d | _g | _d S )NT)convert_charrefs)r0   r1   r!   r   r   )r2   r!   r3   r(   r)   r1     s    zHTMLLinkParser.__init__zlist[tuple[str, str | None]])tagattrsr"   c                 C  sH   |dkr,| j d u r,| |}|d urD|| _ n|dkrD| jt| d S )Nbasea)r   get_hrefr   appenddict)r2   r   r   hrefr(   r(   r)   handle_starttag  s    
zHTMLLinkParser.handle_starttagr   )r   r"   c                 C  s"   |D ]\}}|dkr|  S qd S )Nr   r(   )r2   r   namevaluer(   r(   r)   r   !  s    
zHTMLLinkParser.get_href)r5   r6   r7   r   r1   r   r   r8   r(   r(   r3   r)   r     s   r   r   zstr | ExceptionzCallable[..., None] | None)r   reasonmethr"   c                 C  s   |d u rt j}|d| | d S )Nz%Could not fetch URL %s: %s - skipping)rU   rV   )r   r   r   r(   r(   r)   _handle_get_simple_fail(  s    r   rh   )r9   re   r"   c                 C  s&   t | j}t| j| jd || j|dS )Nr:   )r   r!   re   )r`   r=   rb   r   r!   )r9   re   r   r(   r(   r)   _make_index_content2  s    
r   IndexContent | None)r   rD   r"   c             
   C  s  | j ddd }t|}|r0td||  d S |drvtjt	|rv|
ds\|d7 }tj|d}td| zt||d	}W nL ty   td
|  Y n< ty } z"td| |j|j W Y d }~nd }~0  ty } zt| | W Y d }~nd }~0  ty< } zt| | W Y d }~nd }~0  ty } z,d}|t|7 }t| |tjd W Y d }~nld }~0  tjy } zt| d|  W Y d }~n6d }~0  tjy   t| d Y n0 t|| jdS d S )N#r   r   zICannot look at %s URL %s because it does not support lookup as web pages.zfile:/z
index.htmlz# file: URL is directory, getting %srR   z`Skipping page %s because it looks like an archive, and cannot be checked by a HTTP HEAD request.zSkipping page %s because the %s request got Content-Type: %s. The only supported Content-Types are application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html, and text/htmlz4There was a problem confirming the ssl certificate: )r   zconnection error: z	timed out)re   )r!   splitr*   rU   warningr%   osrM   isdirr   endswithrH   rI   urljoinrV   rX   rC   r+   r.   r-   r   r   r   r   r   infor   ConnectionErrorTimeoutr   re   )r   rD   r!   
vcs_schemerP   excr   r(   r(   r)   _get_index_content?  sT    

$$r   c                   @  s   e Zd ZU ded< ded< dS )CollectedSourceszSequence[LinkSource | None]
find_links
index_urlsN)r5   r6   r7   r   r(   r(   r(   r)   r   {  s   
r   c                   @  sp   e Zd ZdZddddddZeddd	d
d dddZeddddZdddddZ	ddddddZ
dS )LinkCollectorz
    Responsible for collecting Link objects from all configured locations,
    making network requests as needed.

    The class's main method is its collect_sources() method.
    r   r   r,   )rD   search_scoper"   c                 C  s   || _ || _d S r/   )r   rD   )r2   rD   r   r(   r(   r)   r1     s    zLinkCollector.__init__Fr   rh   )rD   optionssuppress_no_indexr"   c                 C  sd   |j g|j }|jr8|s8tdddd |D  g }|jp@g }tj|||jd}t	||d}|S )z
        :param session: The Session to use to make requests.
        :param suppress_no_index: Whether to ignore the --no-index option
            when constructing the SearchScope object.
        zIgnoring indexes: %s,c                 s  s   | ]}t |V  qd S r/   r   ).0r!   r(   r(   r)   	<genexpr>  ru   z'LinkCollector.create.<locals>.<genexpr>)r   r   no_index)rD   r   )
	index_urlextra_index_urlsr   rU   rV   rW   r   r   creater   )clsrD   r   r   r   r   r   link_collectorr(   r(   r)   r     s$    

zLinkCollector.createz	list[str]rn   c                 C  s   | j jS r/   )r   r   rp   r(   r(   r)   r     s    zLinkCollector.find_linksr   r   )locationr"   c                 C  s   t || jdS )z>
        Fetch an HTML page containing package links.
        rR   )r   rD   )r2   r   r(   r(   r)   fetch_response  s    zLinkCollector.fetch_responser   r   r   )project_namecandidates_from_pager"   c                   s   t  fddjD  }t  fddjD  }ttj	rdd t
||D }t| d dg| }td| tt|t|d	S )
Nc              	   3  s&   | ]}t | jjd d dV  qdS )Fr   page_validator
expand_dirre   r   Nr   rD   is_secure_originr   locr   r   r2   r(   r)   r     s   	z0LinkCollector.collect_sources.<locals>.<genexpr>c              	   3  s&   | ]}t | jjd d dV  qdS )Tr   Nr   r   r   r(   r)   r     s   	c                 S  s*   g | ]"}|d ur|j d urd|j  qS )Nz* )r   )r   sr(   r(   r)   
<listcomp>  s   z1LinkCollector.collect_sources.<locals>.<listcomp>z' location(s) to search for versions of :
)r   r   )collectionsOrderedDictr   get_index_urls_locationsvaluesr   rU   isEnabledForloggingDEBUG	itertoolschainr&   rV   rW   r   rx   )r2   r   r   index_url_sourcesfind_links_sourceslinesr(   r   r)   collect_sources  s*    	
	

zLinkCollector.collect_sourcesN)F)r5   r6   r7   r   r1   classmethodr   propertyr   r   r   r(   r(   r(   r)   r     s    !r   )N)T)Pr   
__future__r   r   email.messager[   r|   r   r   r   r   urllib.parserH   collections.abcr   r   r   Zdataclassesr   html.parserr   optparser   typingr	   r
   r   pip._vendorr   Zpip._vendor.requestsr   Zpip._vendor.requests.exceptionsr   r   pip._internal.exceptionsr   pip._internal.models.linkr   !pip._internal.models.search_scoper   pip._internal.network.sessionr   pip._internal.network.utilsr   pip._internal.utils.filetypesr   pip._internal.utils.miscr   pip._internal.utils.urlsr   pip._internal.vcsr   sourcesr   r   r   	getLoggerr5   rU   r   rY   r*   	Exceptionr+   rB   rC   rQ   rX   r`   ra   rr   r   r   rb   r   r   r   r   r   r   r(   r(   r(   r)   <module>   sf   
?  <