o
    Fh&                     @   s   d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	 d dlm
Z d dlmZ d dlmZ G dd dZd	d
 Zh dZ		dddZ		dddZdddZdS )    )SequenceN)_pandas_api)CodecTableconcat_tablesschema)_feather)FeatherErrorc                   @   s6   e Zd ZdZdddZdddZdd	 Zdd
dZdS )FeatherDataseta  
    Encapsulates details of reading a list of Feather files.

    Parameters
    ----------
    path_or_paths : List[str]
        A list of file names
    validate_schema : bool, default True
        Check that individual file schemas are all the same / compatible
    Tc                 C   s   || _ || _d S N)pathsvalidate_schema)selfpath_or_pathsr    r   P/var/www/html/Persson_Maskin/env/lib/python3.10/site-packages/pyarrow/feather.py__init__*   s   
zFeatherDataset.__init__Nc                 C   sj   t | jd |d}|g| _|j| _| jdd D ]}t ||d}| jr)| || | j| qt| jS )a,  
        Read multiple feather files as a single pyarrow.Table

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file

        Returns
        -------
        pyarrow.Table
            Content of the file as a table (of columns)
        r   columns   N)
read_tabler   _tablesr   r   validate_schemasappendr   )r   r   _filpathtabler   r   r   r   .   s   
zFeatherDataset.read_tablec                 C   s0   | j |j std| d| j  d|j  d S )Nz
Schema in z was different. 
z

vs

)r   equals
ValueError)r   piecer   r   r   r   r   G   s   
zFeatherDataset.validate_schemasc                 C   s   | j |dj|dS )a  
        Read multiple Parquet files as a single pandas DataFrame

        Parameters
        ----------
        columns : List[str]
            Names of columns to read from the file
        use_threads : bool, default True
            Use multiple threads when converting to pandas

        Returns
        -------
        pandas.DataFrame
            Content of the file as a pandas DataFrame (of columns)
        r   )use_threadsr   	to_pandas)r   r   r    r   r   r   read_pandasL   s   zFeatherDataset.read_pandas)Tr   )NT)__name__
__module____qualname____doc__r   r   r   r#   r   r   r   r   r
      s    

r
   c                 C   sL   |j dkrd S |jt t fv rtd|  dtd|  d|j d)Nr   zColumn 'zg' exceeds 2GB maximum capacity of a Feather binary column. This restriction may be lifted in the futurez
' of type zU was chunked on conversion to Arrow and cannot be currently written to Feather format)
num_chunkstypeextbinarystringr   )namecolr   r   r   check_chunked_overflow`   s   
r/   >   lz4zstduncompressed   c                 C   sv  t jrt jrt| t jjr|  } t | rH|dkrd}n|dkr$d}ntdt	j
| |d}|dkrGt|jjD ]\}}	|| }
t|	|
 q9n| }|dkrot|jtt|jkr^td|durftd|durntd	n|du r{td
r{d}n|dur|tvrtd| dt ztj||||||d W dS  ty   t|trzt| W   tjy   Y  w  w )a  
    Write a pandas.DataFrame to Feather format.

    Parameters
    ----------
    df : pandas.DataFrame or pyarrow.Table
        Data to write out as Feather format.
    dest : str
        Local destination path.
    compression : string, default None
        Can be one of {"zstd", "lz4", "uncompressed"}. The default of None uses
        LZ4 for V2 files if it is available, otherwise uncompressed.
    compression_level : int, default None
        Use a compression level particular to the chosen compressor. If None
        use the default compression level
    chunksize : int, default None
        For V2 files, the internal maximum size of Arrow RecordBatch chunks
        when writing the Arrow IPC file format. None means use the default,
        which is currently 64K
    version : int, default 2
        Feather file version. Version 2 is the current. Version 1 is the more
        limited legacy format
    r   Fr3   Nz%Version value should either be 1 or 2)preserve_indexz'cannot serialize duplicate column namesz2Feather V1 files do not support compression optionz0Feather V1 files do not support chunksize option	lz4_framer0   zcompression="z " not supported, must be one of )compressioncompression_level	chunksizeversion)r   have_pandas
has_sparse
isinstancepdSparseDataFrameto_denseis_data_framer   r   from_pandas	enumerater   namesr/   lencolumn_namessetr   is_available_FEATHER_SUPPORTED_CODECSr   write_feather	Exceptionstrosremoveerror)dfdestr6   r7   r8   r9   r4   r   ir-   r.   r   r   r   rI   s   sb   



rI   TFc                 K   s    t | |||djdd|i|S )a  
    Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
    feather.read_table.

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads. If false the
        restriction is used in the conversion to Pandas as well as in the
        reading from Feather format.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str.
    **kwargs
        Additional keyword arguments passed on to `pyarrow.Table.to_pandas`.

    Returns
    -------
    df : pandas.DataFrame
        The contents of the Feather file as a pandas.DataFrame
    )r   
memory_mapr    r    Nr   r!   )sourcer   r    rR   kwargsr   r   r   read_feather   s   rU   c                 C   s   t j| ||d}|du r| S t|tstdt|jdd |D }t	t
dd |r5||}n t	t
dd |rD||}nd	d |D }td
| d| |jdk r\|S tt||krf|S ||S )a  
    Read a pyarrow.Table from Feather format

    Parameters
    ----------
    source : str file path, or file-like object
        You can use MemoryMappedFile as source, for explicitly use memory map.
    columns : sequence, optional
        Only read a specific set of columns. If not provided, all columns are
        read.
    memory_map : boolean, default False
        Use memory mapping when opening file on disk, when source is a str
    use_threads : bool, default True
        Whether to parallelize reading using multiple threads.

    Returns
    -------
    table : pyarrow.Table
        The contents of the Feather file as a pyarrow.Table
    )use_memory_mapr    Nz&Columns must be a sequence but, got {}c                 S   s   g | ]}t |qS r   )r)   ).0columnr   r   r   
<listcomp>  s    zread_table.<locals>.<listcomp>c                 S      | t kS r   )inttr   r   r   <lambda>      zread_table.<locals>.<lambda>c                 S   rZ   r   )rK   r\   r   r   r   r^     r_   c                 S   s   g | ]}|j qS r   )r$   )rW   r]   r   r   r   rY     s    z.Columns must be indices or names. Got columns z
 of types    )r   FeatherReaderreadr<   r   	TypeErrorformatr)   r$   allmapread_indices
read_namesr9   sortedrF   select)rS   r   rR   r    readercolumn_typesr   column_type_namesr   r   r   r      s2   


r   )NNNr3   )NTF)NFT)collections.abcr   rL   pyarrow.pandas_compatr   pyarrow.libr   r   r   r   libr*   pyarrowr   pyarrow._featherr	   r
   r/   rH   rI   rU   r   r   r   r   r   <module>   s"   B
S
 