
    [6g,                        d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlZd dlmZ d dlmZmZ d dlmZ erd dlmZ  G d de          ZdS )    )annotations)TYPE_CHECKINGN)using_pyarrow_string_dtype)lib)import_optional_dependency)ParserErrorParserWarning)find_stack_level)pandas_dtype)
is_integer)	DataFrame)_arrow_dtype_mappingarrow_string_types_mapper)
ParserBase)
ReadBufferc                  L     e Zd ZdZd fdZddZddZddZddZddZ	 xZ
S )ArrowParserWrapperz7
    Wrapper for the pyarrow engine for read_csv()
    srcReadBuffer[bytes]returnNonec                    t                                          |           || _        || _        |                                  d S N)super__init__kwdsr   _parse_kwds)selfr   r   	__class__s      e/var/www/surfInsights/venv3-11/lib/python3.11/site-packages/pandas/io/parsers/arrow_parser_wrapper.pyr   zArrowParserWrapper.__init__%   sA    	    c                    | j                             d          }|dn|| _        | j         d         }t          |t                    rt          d          t          | j         d                   | _        dS )z?
        Validates keywords before passing to pyarrow.
        encodingNzutf-8	na_valuesz?The pyarrow engine doesn't support passing a dict for na_values)r   getr#   
isinstancedict
ValueErrorlistr$   )r   r#   r$   s      r    r   zArrowParserWrapper._parse_kwds,   sv      $y}}Z88#+#3Ik*	i&& 	Q   di455r!   c                   ddddddd}|                                 D ]J\  }}|| j        v r<| j                            |          "| j                            |          | j        |<   K| j        }t          |t                    r|g}nd}|| j        d	<   d
 | j                                         D             | _        | j                            d          }|~t          |          r|| j        d<   nd|t          j
        j        k    rd| j        d<   nD|t          j
        j        k    rdd}|| j        d<   n |t          j
        j        k    rd | j        d<   d | j                                         D             | _        d| j        d         v | j        d<   | j        (d| j        v rd | j        d         D             | j        d<   | j        du | j        | j        n| j        d         | j        d| _        dS )z:
        Rename some arguments to pass to pyarrow
        include_columnsnull_valuesescape_charignore_empty_linesdecimal_point
quote_char)usecolsr$   
escapecharskip_blank_linesdecimal	quotecharNtimestamp_parsersc                &    i | ]\  }}||dv ||S )N)	delimiterr0   r-   r.    .0option_nameoption_values      r    
<dictcomp>z;ArrowParserWrapper._get_pyarrow_options.<locals>.<dictcomp>Y   sI     
 
 
)\'OP P P P Pr!   on_bad_linesinvalid_row_handlerr   strc                    t          j        d| j         d| j         d| j         t
          t                                 dS )Nz	Expected z columns, but found z: )
stacklevelskip)warningswarnexpected_columnsactual_columnstextr	   r
   )invalid_rows    r    handle_warningz?ArrowParserWrapper._get_pyarrow_options.<locals>.handle_warningk   si    MLK$@ L L&5L L9D9IL L%#3#5#5	    "6r!   c                    dS )NrD   r9   )_s    r    <lambda>z9ArrowParserWrapper._get_pyarrow_options.<locals>.<lambda>v   s    f r!   c                &    i | ]\  }}||dv ||S )N)r+   r,   true_valuesfalse_valuesr/   r6   r9   r:   s      r    r>   z;ArrowParserWrapper._get_pyarrow_options.<locals>.<dictcomp>x   sF      
  
  
)\'    r!    strings_can_be_nullc                    g | ]}d | S )fr9   )r;   ns     r    
<listcomp>z;ArrowParserWrapper._get_pyarrow_options.<locals>.<listcomp>   s+     7 7 7A7 7 7r!   skiprows)autogenerate_column_names	skip_rowsr#   )r   rA   )itemsr   r%   popdate_formatr&   rA   parse_optionscallabler   BadLineHandleMethodERRORWARNSKIPconvert_optionsheaderr#   read_options)r   mappingpandas_namepyarrow_namer]   r?   rK   s          r    _get_pyarrow_optionsz'ArrowParserWrapper._get_pyarrow_options:   sx   
 )&' 4&%
 
 *1 	E 	E%Kdi''DIMM+,F,F,R*.)--*D*D	,'
 &k3'' 	&-KK K)4	%&
 
-1Y__->->
 
 
 y}}^44#%% M<H"#899!?!EEE  ")  !?!DDD" " " " =K"#899!?!DDD<L<L"#89 
  
-1Y__->-> 
  
  
 79DIm<T6T23;#48L#L#L7 7!%!56G!H7 7 7D !23
 *.)<{& :&
 
r!   framer   c                   t          j                  }d}| j        | j        | j        t	          |          | _        t          | j                  |k    r@t          t	          |t          | j                  z
                      | j        z   | _        d}| j        _        |                     j                  \  }| j        Q| j                                        }t          | j                  D ]\  }}t          |          rj        |         ||<   n|j        vrt          d| d          | j        | j                            |          || j                            |          fn1j        |         | j                            j        |                   f\  }}	|	&|                             |	          |<   | j        |= ܉                    |dd           | j        )|s'dgt          j        j                  z  j        _        | j        t#          | j        t$                    r+fd| j                                        D             | _        nt)          | j                  | _        	                     | j                  n!# t*          $ r}
t          |
          d}
~
ww xY wS )z
        Processes data read in based on kwargs.

        Parameters
        ----------
        frame: DataFrame
            The DataFrame to process.

        Returns
        -------
        DataFrame
            The processed DataFrame.
        TNFzIndex z invalid)dropinplacec                H    i | ]\  }}|j         v |t          |          S r9   )columnsr   )r;   kvrk   s      r    r>   z>ArrowParserWrapper._finalize_pandas_output.<locals>.<dictcomp>   s<       1EM)) |A)))r!   )lenrp   re   namesranger)   _do_date_conversions	index_colcopy	enumerater   r(   dtyper%   astype	set_indexindexr&   r'   r[   r   	TypeError)r   rk   num_colsmulti_index_namedrM   index_to_setiitemkey	new_dtypees    `         r    _finalize_pandas_outputz*ArrowParserWrapper._finalize_pandas_output   s    u}%% ;z!;&!&xDJ4:(**
 "%3tz??(B"C"CDDtzQ
$)! JEM,,U]EBB5>%>..00L$T^44 , ,4d## >&+mD&9LOO..$%<d%<%<%<=== :)  :>>$//; tz~~d3344#mD14:>>%-PTBU3V3VW #C
 !,%*3Z%6%6y%A%Ac
 JsOOOLtTOBBB{"+<"%)FS1B-C-C$C!:! $*d++ 6    $
 0 0 2 2  

 *$*55
$TZ00 $ $ $ mm#$ s   J 
J;'J66J;c                    t          j        |          r(t          d |D                       st          d          t	          |          rt          d          d S )Nc              3  @   K   | ]}t          |t                    V  d S r   r&   rA   r;   xs     r    	<genexpr>z7ArrowParserWrapper._validate_usecols.<locals>.<genexpr>   s,      0U0UAs1C1C0U0U0U0U0U0Ur!   zwThe pyarrow engine does not allow 'usecols' to be integer column positions. Pass a list of string column names instead.z=The pyarrow engine does not allow 'usecols' to be a callable.)r   is_list_likeallr(   r_   )r   r1   s     r    _validate_usecolsz$ArrowParserWrapper._validate_usecols   s|    G$$ 	S0U0UW0U0U0U-U-U 	P   g 	O  	 	r!   c           	        t          d          }t          d          }|                                  	  |j        di | j        }n# t          $ r | j                            dd          }||                     |           | j                            dt                                }t          j	        |          rt          d |D                       st	          d           w xY w	 |                    | j         |j        di | j         |j        di | j        |          }n"# |j        $ r}t%          |          |d}~ww xY w| j        d	         }|t          j        u r|j        }	|                                }
t/          |j        j                  D ][\  }}|j                            |          r<|	                    ||	                    |                              |
                    }	\|                    |	          }|dk    r!|                    t<          j        
          }n|dk    rRtA                      }t=          j!                    ||"                                <   |                    |j        
          }nEtG                      r#|                    tI                      
          }n|                                }| %                    |          S )z
        Reads the contents of a CSV file into a DataFrame and
        processes it according to the kwargs passed in the
        constructor.

        Returns
        -------
        DataFrame
            The DataFrame created from the CSV file.
        pyarrowzpyarrow.csvr+   Nr,   c              3  @   K   | ]}t          |t                    V  d S r   r   r   s     r    r   z*ArrowParserWrapper.read.<locals>.<genexpr>   s=       6 6'(
1c""6 6 6 6 6 6r!   z9The 'pyarrow' engine requires all na_values to be strings)rf   r^   rd   dtype_backend)types_mappernumpy_nullabler9   )&r   rj   ConvertOptionsrd   r~   r%   r   setr   r   r   read_csvr   ReadOptionsrf   ParseOptionsr^   ArrowInvalidr   r   
no_defaultschemafloat64ry   typesis_nullfield	with_typecast	to_pandaspd
ArrowDtyper   
Int64Dtypenullr   r   r   )r   papyarrow_csvrd   includenullstabler   r   
new_schemanew_typer   
arrow_typerk   dtype_mappings                  r    readzArrowParserWrapper.read   s    (	220??!!###	8k8PP4;OPPOO 	 	 	*../@$GGG"&&w///(,,]CEEBBE#E** # 6 6,16 6 6 3 3   O   		(((4[4IIt7HII6k6LL9KLL /	 )  EE  	( 	( 	(a..a'	( 	/2 CN**Jzz||H!*5<+=!>!>  :8##J// !+:++A..88BB" "J JJz**EI%%OOO??EE... 122M')}M"''))$OO1BOCCEE')) 	&OO1J1L1LOMMEE OO%%E++E222s$   A B!C(,>D+ +
E
5EE
)r   r   r   r   )r   r   )rk   r   r   r   )r   r   )__name__
__module____qualname____doc__r   r   rj   r   r   r   __classcell__)r   s   @r    r   r       s              6 6 6 6Y
 Y
 Y
 Y
vG G G GR	 	 	 	F3 F3 F3 F3 F3 F3 F3 F3r!   r   )
__future__r   typingr   rE   pandas._configr   pandas._libsr   pandas.compat._optionalr   pandas.errorsr   r	   pandas.util._exceptionsr
   pandas.core.dtypes.commonr   pandas.core.dtypes.inferencer   pandasr   r   pandas.io._utilr   r   pandas.io.parsers.base_parserr   pandas._typingr   r   r9   r!   r    <module>r      sz   " " " " " "              5 5 5 5 5 5       > > > > > >        5 4 4 4 4 4 2 2 2 2 2 2 3 3 3 3 3 3                  5 4 4 4 4 4 *))))))O3 O3 O3 O3 O3 O3 O3 O3 O3 O3r!   