o
    Gh                     @   sh  d dl Z d dlmZ d dlZd dlZd dlmZ d dlmZ d dl	Z	d dl
Zd dl
mZ d dlmZ d dlmZmZmZ zd dlmZ d dlmZmZ W n eyW   dZY nw zd dlZd dlmZ d d	lmZ d d
lm Z  W n ey}   d ZZY nw zd dl!Z"W n ey   dZ"Y nw e	j#jZ$dd Z%dd Z&e	j#jdd Z'e	j#jdd Z(e	j#jdd Z)e	j#jdd Z*e	j#jdd Z+dd Z,dd Z-dd Z.e	j#j/dd  Z0e	j#jd!d" Z1e	j#jd#d$ Z2d%d& Z3e	j#jd'd( Z4d)d* Z5e	j#6d+de7 ge	j#6d,d-d.d/ Z8d0d1 Z9d2d3 Z:d4d5 Z;d6d7 Z<d8d9 Z=d:d; Z>d<d= Z?d>d? Z@e	j#jd@dA ZAe	j#jdBdC ZBe	j#jdDdE ZCdFdG ZDdHdI ZEe	j#jdJdK ZFdLdM ZGe	j#je	j#jHe	j#IdNe	j#IdOdPdQ ZJe	j#6dRdSdT dUdT dVdT dWdT ge	j#6dXdYdZgd[d\ ZKd]d^ ZLd_d` ZMdadb ZNdcdd ZOdedf ZPdgdh ZQe	j#jRdidj ZSdS )k    N)OrderedDict)copytree)Decimal)fs)util)_check_roundtrip_roundtrip_table_test_dataframe)_read_table_write_table)dataframe_with_lists)alltypes_samplec                 C   s   t dg di}tjtdd t|| d dd W d    n1 s$w   Y  tjtdd t|| d dd	 W d    d S 1 sEw   Y  d S )
Na         z"Unsupported Parquet format versionmatchztest_version.parquetz2.2versionz%Unsupported Parquet data page version)data_page_version)patablepytestraises
ValueErrorr   )tempdirr    r   a/var/www/html/Persson_Maskin/env/lib/python3.10/site-packages/pyarrow/tests/parquet/test_basic.pytest_parquet_invalid_version;   s   
"r    c                  C   sH   t g dd } t jj| gdgd}ddg}|D ]}t||d qd S )Nr   i f0namesi   i   )data_page_size)r   arrayTablefrom_arraysr   )arrt
page_sizestarget_page_sizer   r   r   test_set_data_page_sizeE   s   r,   c                  C   s,   t d} tjj| dd}t|dddd d S )Nd   Fpreserve_index
   r   2.4)r$   write_batch_sizer   )r	   r   r&   from_pandasr   dfr   r   r   r   test_set_write_batch_sizeO   s
   
r6   c                  C   sh   t d} tjj| dd}t|dddd tt t|dddd W d    d S 1 s-w   Y  d S )	Nr-   Fr.   r   r0   r1   )dictionary_pagesize_limitr$   r   r   )r	   r   r&   r3   r   r   r   	TypeErrorr4   r   r   r   "test_set_dictionary_pagesize_limitY   s   "r9   c               	   C   s   g } t jtdd}| t j|gd  t \}}t j|}| t j|gd  dD ]}dD ]}| D ]
}t|d||d q8q4q0d S )Nr0   sizer   )z1.0z2.0)TF2.6)r   r   use_dictionary)	r   RecordBatchr3   r   appendr&   from_batchesr   r   )tablesbatchr5   _r   r=   r   r   r   r   test_chunked_table_writef   s"   
rD   c                 C   s   t dd}tj|}t|ddidd t| d }t|d}t||dd	 W d    n1 s1w   Y  tj	|dd
}|
|sDJ d S )Nr0   r:   
memory_mapTr<   read_table_kwargsr   tmp_filewbr   )rE   r   r   r&   r3   r   stropenr   pqread_pandasequalsr   r5   r   filenamef
table_readr   r   r   test_memory_mapy      

rT   c                 C   s   t dd}tj|}t|ddidd t| d }t|d}t||dd	 W d    n1 s1w   Y  tj	|d
d}|
|sDJ d S )Nr0   r:   buffer_sizei  r<   rF   rH   rI   r   i   )rV   rJ   rP   r   r   r   test_enable_buffered_stream   rU   rW   c                 C   sj   t jt dggdg}d}| | }| rJ t|t| | s&J tt|}||s3J d S )N*   intsz	foo # bar)	r   r&   r'   r%   existsr   rK   r
   rO   )r   r   rQ   pathrS   r   r   r   test_special_chars_filename   s   r\   c                   C   sv   t jtdd td  W d    n1 sw   Y  t jtdd td  W d    d S 1 s4w   Y  d S )NNoner   )r   r   r8   rM   
read_tableParquetFiler   r   r   r   test_invalid_source   s   "r`   c              	   C   sx  ddl m} G dd d}| d }tdg di}t|| |jd|d	 tjtd
d t	j
|dgd W d    n1 sAw   Y  tjtdd t	j
|ddgd W d    n1 saw   Y  tjtdd t	j
||jd W d    n1 sw   Y  tt t	
|  W d    n1 sw   Y  t	
|}||ksJ W d    d S 1 sw   Y  d S )Nr   )mockc                   @   s   e Zd Zdd ZdS )z;test_read_table_without_dataset.<locals>.MockParquetDatasetc                 _      t d)NMockParquetDataset)ImportError)selfargskwargsr   r   r   __init__      zDtest_read_table_without_dataset.<locals>.MockParquetDataset.__init__N)__name__
__module____qualname__rh   r   r   r   r   rc      s    rc   test.parquetr   r   z#pyarrow.parquet.core.ParquetDataset)newzthe 'filters' keywordr   )integer=r   )filterszthe 'partitioning' keywordweekcolor)partitioningzthe 'schema' argumentschema)unittestra   r   r   r   patchr   r   r   rM   r^   rv   OSError)r   ra   rc   r[   r   resultr   r   r   test_read_table_without_dataset   s*   

"r{   c                  C   s*   t jttdgdgd} t| dd d S )Ni@  r!   r"   r   )row_group_size)r   r   listranger   )r)   r   r   r   (test_file_with_over_int16_max_row_groups   s   r   c                  C   s   t dd} tj| }tjjdd | D |jjd}|jdj	t
 ks)J |jdj	tt
 ks9J t|dd	 d S )
Nr0   r:   c                 S   s   g | ]}| d dd  qS )r   N)chunk).0colr   r   r   
<listcomp>   s    z.test_empty_table_roundtrip.<locals>.<listcomp>r"   null	null_listr<   r   )r   r   r&   r3   r'   itercolumnsrv   r#   fieldtyper   list_r   r4   r   r   r   test_empty_table_roundtrip   s   
 
r   c                  C   s$   t  } tjj| dd}t| d S )NFr.   )pd	DataFramer   r&   r3   r   )r5   emptyr   r   r   test_empty_table_no_columns   s   r   c                     sp   t t tt d g t dddgg}  fdd| D } fdd|D }tj|t }t| d S )N)int32list_stringr   )Gc                    s$   g | ]}t j|t  d  qS )r   )r   r%   structflattenr   rB   colsr   r   r      s    zEtest_write_nested_zero_length_array_chunk_failure.<locals>.<listcomp>c                    s"   g | ]}t jj|t  d qS )ru   )r   r>   r'   rv   r   r   r   r   r      s    )	r   r   r   r   stringr&   r@   rv   r   )data	my_arrays
my_batchestblr   r   r   1test_write_nested_zero_length_array_chunk_failure   s   

r   c                 C   s   | d }t dtjdtjdi}t|| t|}| }t	|| t
| d }t dtjdtjdi}t|| t|}| }t	|| d S )Nzzzz.parquetxr0   dtype)r   r   nparangeint64r   r
   	to_pandastmassert_frame_equalrK   )r   r[   r5   rS   df_readr   r   r   test_multiple_path_types  s   

r   c                 C   s   | d }t dg di}t|| t|}t|}||s"J tt	 t|t
 d W d    d S 1 s;w   Y  d S )Nrm   r   r   
filesystem)r   r   r   r   FSProtocolClassr
   rO   r   r   r8   r   
FileSystem)r   r[   r   fs_protocol_objrz   r   r   r   test_fspath  s   

"r   r   name)data.parquetu   例.parquetc                 C   s   t dg di}| | }t|t| t|  tj||d}W d    n1 s,w   Y  ||s8J |	  |
 rBJ t|  tj|||d W d    n1 sZw   Y  t|}||skJ d S )Nr   r   r   )r   r   rM   write_tablerK   r   
change_cwdr^   rO   unlinkrZ   )r   r   r   r   r[   rz   r   r   r   test_relative_paths&  s   
r   c                   C   s:   t t td W d    d S 1 sw   Y  d S )Nzi-am-not-existing.parquet)r   r   FileNotFoundErrorrM   r^   r   r   r   r   test_read_non_existing_file?  s   "r   c                  C   sT   G dd dt j} tjtdd t| d W d    d S 1 s#w   Y  d S )Nc                   @   s   e Zd Zdd Zdd ZdS )z3test_file_error_python_exception.<locals>.BogusFilec                 W   rb   NzorglubZeroDivisionErrorre   rf   r   r   r   readG  ri   z8test_file_error_python_exception.<locals>.BogusFile.readc                 W   rb   r   r   r   r   r   r   seekJ  ri   z8test_file_error_python_exception.<locals>.BogusFile.seekN)rj   rk   rl   r   r   r   r   r   r   	BogusFileF  s    r   r   r       )ioBytesIOr   r   r   rM   r^   )r   r   r   r    test_file_error_python_exceptionE  s   "r   c                 C   s   t dg di}t|t| d  tt| d d}t|}W d    n1 s,w   Y  ||s8J tt| d d}tt |}W d    n1 sTw   Y  ||s`J d S )Nr   r   r   rb)	r   r   rM   r   rK   rL   r^   rO   
PythonFile)r   r   rR   rz   r   r   r   test_parquet_read_from_bufferR  s   r   c                  C   s,  t ttttd} t ttttd}t ddgd }| | g}t jj|ddgd}t	||dddd	 t	||ddgdgd	 t	||dddgddgd	 t jj| | ||gg d
d}t	||ddgddgd t jj|gdgd}t
jtdd t	||ddd W d    d S 1 sw   Y  d S )Nr-   TF2   r   br"   gzip)expectedcompressionr=   use_byte_stream_splitr   r   cdr   r   )r   r=   r   tmpBYTE_STREAM_SPLIT only supportsr   )r   r   r=   )r   r%   r}   mapfloatr~   intr&   r'   r   r   r   IOError)	arr_floatarr_intarr_bool
data_floatr   mixed_tabler   r   r   test_byte_stream_split`  s:   "r   c              	   C   sX  t jttttdt ddd}t jttttdt ddd}t jttttdt ddd}t dd	gd
 }|||g}t jj|g dd}t	||dd	dd t
j| d}tj||dd	dd t|}|jd}	|jd}
|	jdksJ |
jdksJ t	||dd	ddddd t jj||||gg dd}t	||d	dd d S )Nr-      r   r      	      TFr   r   r   r   r"   r   )r   r   r=   store_decimal_as_integerrm   )r   r=   r   r   r   INT32INT64DELTA_BINARY_PACKEDr   r   )r   r   r=   r   column_encodingr   )r   r=   r   )r   r%   r}   r   r   r~   
decimal128r&   r'   r   osr[   joinrM   r   r_   rv   columnphysical_type)r   arr_decimal_1_9arr_decimal_10_18arr_decimal_gt18r   data_decimalr   pqtestfile_path
pqtestfilepqcol_decimal_1_9pqcol_decimal_10_18r   r   r   r   test_store_decimal_as_integer  s^   






r   c               
   C   s  t ttttd} t ttttd}t jdd tdD t  d}t jdd tdD t dd}t g dd }t jj	| ||||gg d	d
}t
||ddddddd t
||ddd t
||dddddd t
||dddddd t
||ddddddd t
||dddid tjtdd t
||dddddd W d    n1 sw   Y  tjtdd t
||dddddd W d    n1 sw   Y  tjtdd t
||ddd W d    n1 sw   Y  tjtdd t
||dddid W d    n	1 sw   Y  tt t
||dgddid W d    n	1 s8w   Y  tt t
||ddid  W d    n	1 sWw   Y  tt t
||ddgddddd! W d    n	1 s{w   Y  tt t
||dd"ddddd! W d    n	1 sw   Y  tt t
||dd"d W d    d S 1 sw   Y  d S )#Nr-   c                 S   s   g | ]}t |qS r   )rK   r   r   r   r   r   r     s    z(test_column_encoding.<locals>.<listcomp>r   c                 S   s   g | ]	}t |d qS )r0   )rK   zfillr   r   r   r   r     s    r0   )FTFF   )r   r   r   r   er"   FBYTE_STREAM_SPLITPLAINr   )r   r=   r   r   r   DELTA_LENGTH_BYTE_ARRAYDELTA_BYTE_ARRAYr   RLEr   r   )r   r   r   z)DELTA_BINARY_PACKED encoder only supportsz+'RLE_DICTIONARY' is already used by defaultRLE_DICTIONARYz/Unsupported column encoding: 'MADE_UP_ENCODING'r   MADE_UP_ENCODINGr   )r   r   )r   r=   r   r   T)r   r%   r}   r   r   r~   r   binaryr&   r'   r   r   r   r   ry   r   r8   )r   r   arr_binarr_flbar   r   r   r   r   test_column_encoding  s    


$r  c               	   C   s   t ttttd} | | g}t jj|ddgd}t||ddd t||ddd t||dd	d
d t||dddd
d t||ddd t||ddd g d}t	
 }|D ]#\}}tttf t||||d W d    n1 sww   Y  qYd S )N  r   r   r"   r   r   )r   r   compression_levelr   snappyr   )r   r   r   r   lz4r   ))r     )r   i)r]   i  )lzo   )r   r  )r   r%   r}   r   r   r~   r&   r'   r   r   r   r   r   r   ry   r   )r(   r   r   invalid_combinationsbufcodeclevelr   r   r   test_compression_level<  s>   	r  c                  C   sP   t g d} d}t j| g|g}t|ddid}d}|jd j|ks&J d S )N)r   r   r   r   r  zprohib; ,	{}flavorspark)write_table_kwargsprohib______r   )r   r%   r&   r'   r   rv   r   )a0r   r   rz   expected_namer   r   r    test_sanitized_spark_field_namesi  s   r  c                  C   sl   t dd} tj| }t }t||ddd |d t|dd}|d t|d	d}|	|s4J d S )
Ni'  r:   SNAPPYr<   )r   r   r   T)use_threadsF)
r   r   r&   r3   r   r   r   r   r
   rO   )r5   r   r  table1table2r   r   r   test_multithreaded_readt  s   


r  c                  C   s   t jtdgg dd} tj|  }t	 }t
||dd |d t|}||s0J tt t
||dd W d    d S 1 sHw   Y  d S )Nr  )ABCD)columns)
chunk_sizer   )r   r   r   r   r   r&   r3   reset_indexr   r   r   r   r
   rO   r   r   r   )r   r   r  rz   r   r   r   test_min_chunksize  s   
"r#  c                 C   s   t tdttddtdddtjddd	d
g dt tdt jdddt jddddt jddddd	}t	j
|}| d }z	t||dd W n
 t	jyX   Y nw | r_J d S )Nabcr   r  r      u1      @      @float64r   TFT20130101periodsz
US/Eastern)r-  tzns)r-  freq)	r   r   r   r   r   rR   ghirH   r1   r   )r   r   r}   r~   r   r   astypeCategorical
date_ranger   r&   r3   r   ArrowExceptionrZ   )r   r5   pdfrQ   r   r   r   (test_write_error_deletes_incomplete_file  s(   
r9  c              
   C   sN   d}zt | W d S  ty& } z||jd v sJ W Y d }~d S d }~ww )Nznonexistent-file.parquetr   )rM   r^   	Exceptionrf   )r   r[   r   r   r   r   test_read_non_existent_file  s    r;  c                 C   sH   t   t jdd t| d  W d    d S 1 sw   Y  d S )Nerror)actionzv0.7.1.parquet)warningscatch_warningssimplefilterrM   r^   )datadirr   r   r   test_read_table_doesnt_warn  s   
"rB  c                  C   s`   t jt ddggdg} t }tj| |dd |d t	|}t
| |   d S )Nr$  defsome_colr   r   r   )r   r&   r'   r%   r   r   rM   r   r   r^   r   r   r   )r   rR   	roundtripr   r   r   test_zlib_compression_bug  s   

rG  c              	   C   s   t | d }tjtjtfdd" t|d}W d    n1 s!w   Y  t| W d    n1 s5w   Y  tjtjtfdd( t|d}|	d W d    n1 sZw   Y  t| W d    d S 1 sow   Y  d S )Nrm   zsize is 0 bytesr   rI   zsize is 4 bytess   ffff)
rK   r   r   r   ArrowInvalidry   rL   rM   r^   write)r   r[   rR   r   r   r   test_parquet_file_too_small  s"   "rJ  zignore:RangeIndex:FutureWarningz.ignore:tostring:DeprecationWarning:fastparquetc           	      C   s   t d}ttdttddtjddddg d	tjd
ddt	g dd}t
|}t| d }tj||d d ||}| }t|| t| d }||| t|}|d t|d< t| | d S )Nfastparquetr$  r   r  r'  r(  r)  r   r*  r+  r   r,  )r   r   r   )r   r   r   r   r   rR   zcross_compat_arrow.parquetrE  z cross_compat_fastparquet.parquetrR   )r   importorskipr   r   r}   r~   r   r   r6  r5  r   r   rK   rM   r   r_   r   r   r   rI  rN   r4  object)	r   fpr5   r   
file_arrowfp_filedf_fpfile_fastparquettable_fpr   r   r   $test_fastparquet_cross_compatibility  s*   



rT  array_factoryc                   C      t dd gd S Nr   r0   r   r%   r   r   r   r   <lambda>      rY  c                   C      t dd gd  S rW  r   r%   dictionary_encoder   r   r   r   rY        c                   C   rV  N r0   rX  r   r   r   r   rY    rZ  c                   C   r[  r_  r\  r   r   r   r   rY    r^  read_dictionaryFTc                 C   s   t jd|  i}t }tj||dd |d |rdgnd }tj|d|d}|j	D ]}|j
\}| d }| |jd ksCJ q,d S )	Nr   T)r=   r   F)r  ra  r       )r   r&   from_pydictr   r   rM   r   r   r^   r  chunksbuffers
to_pybytesr;   )rU  ra  
orig_tablebior   r   r   r  r   r   r   test_buffer_contents  s   

ri  c                 C   sP   t jt tdgdgd}| d }tj||dd t|}||s&J d S )Nr  rY   r"   zarrow-10480.pyarrow.gzGZIPrE  )r   r   r%   r~   rM   r   r^   rO   )r   r   r[   rz   r   r   r   "test_parquet_compression_roundtrip  s
   
rk  c                 C   s   t jt jg ddgdg}| d }d}t||j}t|D ]}|| q W d    n1 s2w   Y  t	|}|j
j|ksDJ t|D ]}|||sTJ qHd S )Nr   r   r!   zempty_row_groups.parquetr   )r   r&   r'   r%   rM   ParquetWriterrv   r~   r   r_   metadatanum_row_groupsread_row_grouprO   )r   r   r[   
num_groupswriterr3  readerr   r   r   test_empty_row_groups+  s   
rs  c                 C   sV   d gd }| dg tj|gdg}| d }t|| t|}||ks)J d S )Ni   r   r   zarrow-11607.parquet)r?   r   r&   r'   rM   r   r^   )r   r   r   r[   r  r   r   r   test_reads_over_batch=  s   

rt  c                 C   s   | d }|j dd tjg dg dgddgd}t||d	  tjg d
g dgddgd}t||d  tt|}tjg dg dgddgd}||ksTJ d S )N dataset_column_order_permutationT)exist_okr   )皙?皙?333333?r   r   r"   zdata1.parquet)皙?      ?333333?)r  r   r%  zdata2.parquet)r   r   r   r  r   r%  )rw  rx  ry  rz  r{  r|  )mkdirr   r   rM   r   r^   rK   )r   casedata1data2r   r  r   r   r    test_permutation_of_column_orderK  s   
r  c                 C   s  | d }t ttd}d}t j|g| dd t|D d}t|| tjt	dd tj
|d	| d
 W d    n1 sAw   Y  tjt	dd tj
||d W d    n1 s_w   Y  tj
|d| d
}||kssJ tj
|d| d}||ksJ t
|}||ksJ d S )Nzlargethrift.parquetr0   r  c                 S   s   g | ]}d | qS )some_long_column_name_r   )r   r3  r   r   r   r   e  r^  z+test_thrift_size_limits.<locals>.<listcomp>r"   z1Couldn't deserialize thrift:.*Exceeded size limitr   r   )thrift_string_size_limit)thrift_container_size_limitr-   r   )r   r%   r}   r~   r   rM   r   r   r   ry   r^   )r   r[   r%   num_colsr   gotr   r   r   test_thrift_size_limits^  s4   
r  c           
      C   s  | d }t dg di}tj||dd tj|dd}||ks"J t| }|d |d ks2J |d |d |d< |d< | d	 }|| tj|d
d}||ksUJ |t dg diksbJ tj	t
dd tj|dd}W d   n1 s{w   Y  tj|d
d}| }	|	|ksJ |	t dg diksJ tj|dd}tj	t
dd | }W d   dS 1 sw   Y  dS )zUCheck that checksum verification works for datasets created with
    pq.write_table()zcorrect.parquetr   r   r   r   r  Twrite_page_checksumpage_checksum_verification   $   zcorrupted.parquetFr   r   r   r  CRC checksum verificationr   N)r   r   rM   r   r^   	bytearray
read_byteswrite_bytesr   r   ry   r_   r   )
r   original_path
table_origtable_checkbin_datacorrupted_pathtable_corruptrC   corrupted_pq_filetable_corrupt2r   r   r   +test_page_checksum_verification_write_tabley  s<   

"r  c                 C   s>  t dg di}| d }tj||dd t| }t|dks#J |d }tj|dd}||ks4J t|	 }|d	 |d
 ksDJ |d
 |d	 |d	< |d
< | d }t
|| ||j }|| tj|dd}	|	|ksqJ |	t dg diks~J tjtdd tj|dd}
W d   dS 1 sw   Y  dS )zXCheck that checksum verification works for datasets created with
    pq.write_to_datasetr   r  correct_dirTr  r   r   r  r  r  corrupted_dirFr  r  r   N)r   r   rM   write_to_datasetr}   iterdirlenr^   r  r  r   r   r  r   r   ry   )r   r  original_dir_pathoriginal_file_path_listr  r  r  corrupted_dir_pathcorrupted_file_pathr  rC   r   r   r   test_checksum_write_to_dataset  s4   


"r  )Tr   collectionsr   r   r>  shutilr   decimalr   r   pyarrowr   r   pyarrow.testsr   pyarrow.tests.parquet.commonr   r   r	   pyarrow.parquetparquetrM   r
   r   rd   pandasr   pandas.testingtestingr   pyarrow.tests.pandas_examplesr   r   numpyr   mark
pytestmarkr    r,   r6   r9   rD   rT   rW   r\   r`   r{   slowr   r   r   r   r   r   parametrizeLocalFileSystemr   r   r   r   r   r   r  r  r  r  r#  r9  r;  rB  rG  rJ  rK  filterwarningsrT  ri  rk  rs  rt  r  r  r  datasetr  r   r   r   r   <module>   s   


	









&6 -





$: