o
    GhE                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ zd dlZW n eyI   dZY nw d dlZd dlZd dlmZ d dlZd dlZd dlmZ d dlZd dlmZ d dlm Z m!Z!m"Z"m#Z#m$Z$ zd dl%Z&W n ey   dZ&Y nw zd dl'm(Z) W n ey   dZ)Y nw zd dl*m+Z, W n ey   dZ,Y nw ej-j(Z.G dd dZ/dd	 Z0d
d Z1dd Z2ej3dd Z4ej3dd Z5ej3dddd Z6ej3dd Z(ej3ddgddgddd Z7ej-j+dd  Z8d!d" Z9ej-j+d#d$ Z:ej-j+d%d& Z;ej-j+d'd( Z<ej-j+d)d* Z=ej-j+d+d, Z>ej-j+d-d. Z?ej-j+d/d0 Z@d1d2 ZAd3d4 ZBd5d6 ZCej-Dd7g d8d9eEd:eFd;eFfd<d=ZGd>d? ZHd@dA ZIej-j+dBdC ZJej-j+dDdE ZKej-j+dFdG ZLdHdI ZMdJdK ZNej-DdLejOdMddNdOdPggej-DdQddgej-j+dRdS ZPej-j+dTdU ZQej-j+ej-jRdVdW ZSdXdY ZTdZd[ ZUej-j+d\d] ZVej-j+dd^d_ZWej-j+d`da ZXej-j%ej-j+dbdc ZYej-j+ddde ZZej-j+dfdg Z[ej-j+dhdi Z\ej-j%ej-j+djdk Z]ej-j+dldm Z^ej-j+dndo Z_ddpdqZ`ej-j%ej-j+drds Zaej-j+dtdu Zbej-j%ej-j+dvdw Zcej-j+dxdy Zdej-j+dzd{ Zeej-j+d|d} Zfej-j+d~d Zgej-j+dd Zhej-j+dd Ziej-j%ej-j+dd Zjej-j+ej-Dddd dd gdd Zkej-j+ej-Ddddgej-Dddd dd gdd Zlej-Dddd dd gdd Zmej-Dddd dd gdd Zndd Zodd Zpej-j+ej-j%dd Zqdd Zrdd Zsdd Ztdd Zudd Zvdd Zwdd Zxej-j+dd Zydd ZzdddZ{dd Z|dd Z}dd Z~ej-j+dd Zej-j+dd Zej-j+dd Zej-j+dd Zej-j+dd Zej-j+ddĄ Zej-j+ddƄ Zej-j+ddȄ Zej-j+ddʄ Zej-j+dd̄ Zdd΄ ZddЄ Zdd҄ ZddԄ Zej-Ddddgddׄ Zddل Zej-j+ddۄ Zej-j+dd݄ Zej-j+dd߄ Zej-j+dd Zdd Zdd Zej-j+ej-Ddddgej-Ddddgej-Ddddgej-Ddg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfgdd Zej-j%dd Zej3dd Zej-j+ej-jRdd Zej-j+ej-jRdd Zej-j+ej-jRdd Zej-j+ej-jRdd  Zej-j+dd Zej-j+dd Zej-j%dd Zej-j+dd Zej-j+d	d
 Zej-j+dd Zdd Zdd Zdd Zdd Zej-j+dd Zej-j+dd Zdd Zej-jdd Zej-jdd Zdd  Zej-jd!d" Zej-j%d#d$ Zej-j%ej-Dd%g d&d'd( Zd)d* Zd+d, Zd-d. Zej-j%d/d0 Zej-j%d1d2 Zej-j%d3d4 Zd5d6 Zd7d8 Zd9d: Zej-j%ej-Dd%g d;d<d= Zd>d? Zej-j+ej-j%d@dA Zej-j+ej-j%ej-jejdBkdCdDdEdF Zej-j+ej-j%dGdH Zej-j+dIdJ Zej-j+ej-j%dKdL ZdMdN ZdOdP Zej-j+ej-j%dQdR Zej-j+ej-j%dSdT Zej-j+ej-j%dUdV Zej-j+ej-j%dWdX Zej-j+dYdZ Zej-j+ej-j%d[d\ Zej-j+ej-j%d]d^ Zʐd_d` Zej-j%ej-j+dadb Zej-j+ej-j%dcdd Z͐dedf Z	dܐdgdhZej-j+didj Zej-j+ej-j%dkdl Zѐdmdn ZҐdodp ZӐdqdr Zej-j+ej-j	dsdt ZՐdudv Zej-j%dwdx Zאdސd|d}Zؐd~d Zِdd Zej-j+dd Zej-j+dd Zej-j+dd Zej-j+dd Zej-j+ej-j%dd Zej-j+ej-j%dd Zej-j+ej-j%dd Zdd Zdd Zdd Zdd Zdd Zej-jej-j+dd Zdd Zej-j+dd Zej-j+dd Zej-j+ej-j%dd Zdd Zej-j+dd Zej-j+ej-jRdd ZdZej-j+ej-jRdd Zej-j+dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-Ddddgdd Zej-DdddgddÄ ZdĐdń ZdƐdǄ Zej-DdȐdɡdʐd˄ Zd̐d̈́ Zej-j+dΐdτ ZdАdф Z dҐdӄ Zej-DdddgdԐdՄ Zd֐dׄ Zdؐdل Zdڐdۄ ZdS (      N)copytreequote)is_threading_enabled)FSProtocolClassProxyHandler_configure_s3_limited_user_filesystem_uri
change_cwdc                   @   s   e Zd Zdd ZdddZdS )TableStreamWrapperc                 C   s
   || _ d S Ntable)selfr    r   [/var/www/html/Persson_Maskin/env/lib/python3.10/site-packages/pyarrow/tests/test_dataset.py__init__F      
zTableStreamWrapper.__init__Nc                 C   s   | j |S r   )r   __arrow_c_stream__)r   requested_schemar   r   r   r   I      z%TableStreamWrapper.__arrow_c_stream__r   )__name__
__module____qualname__r   r   r   r   r   r   r   E   s    r   c                 C   s~   dd l }dd l}| ddd}|jdd}|g d}g }t| D ]}|||t|t|f ||7 }q"tj	|g ddS )	Nr   i        )days)greenblueyellowredorange)dateindexvaluecolorcolumns)
datetime	itertools	timedeltacyclerangeappendfloatnextpd	DataFrame)nr(   r)   dayintervalcolorsdatair   r   r   _generate_dataM   s   
r8   c              
   C   s\   t t dt  t dt  t dt  t dt  g}t jj| |dd}|	 S )Nr"   r#   r$   r%   F)schemapreserve_index)
par9   fielddate32int64float64stringTablefrom_pandasreplace_schema_metadata)dfr9   r   r   r   r   _table_from_pandas]   s   rE   c              	   C   sx   |   D ]5}| '}t|tjsJ |jrJ | sJ | s$J | r*J W d    n1 s4w   Y  qd S r   )	get_fragmentsopen
isinstancer;   
NativeFileclosedseekablereadablewritable)datasetfragmentnfr   r   r   +assert_dataset_fragment_convenience_methodsh   s   

rQ   c            
      C   s$  t  } ddg}t|D ]\}}| d| d}| | | |e}ttdttttdttt	td|gd dd tdD g}t
dt
 fd	t
 fd
t
 fdt
 fdt
t
 t
 dfg}t
j||d}t
j|g}	t|	| W d    n1 sw   Y  q| S )Nzsubdir/1/xxxzsubdir/2/yyyz/file.parquetr   c                 S   "   g | ]}|d  t |d  dqS    abstr).0jr   r   r   
<listcomp>      " zmockfs.<locals>.<listcomp>i64f64rZ   conststructrV   r9   )fs_MockFileSystem	enumerate
create_diropen_output_streamlistr,   mapr.   rZ   r;   r9   r>   r?   r@   rb   record_batchrA   from_batchespqwrite_table)
mockfsdirectoriesr7   	directorypathoutr6   r9   batchr   r   r   r   ro   s   s6   





ro   c                    sx   ddl m}m} ddlm} |   fddt  fdd}| |d	| || }tjfd
d}||fS )Nr   )LocalFileSystemPyFileSystemr   )r   c                    s    fdd| D S )Nc                    s   h | ]	}  t|qS r   )normalize_pathrZ   r[   plocalfsr   r   	<setcomp>       z6open_logging_fs.<locals>.normalized.<locals>.<setcomp>r   )pathsrz   r   r   
normalized   s   z#open_logging_fs.<locals>.normalizedc                    s$     t|}| | j|S r   )rw   rZ   add_fsopen_input_file)r   rr   )r{   openedr   r   r      s   
z(open_logging_fs.<locals>.open_input_filer   c              	   3   sB       zd V  W   | ksJ d S   | ks J w r   )clear)expected_opened)r   r   r   r   assert_opens   s
   .z%open_logging_fs.<locals>.assert_opens)	
pyarrow.fsru   rv   test_fsr   setsetattr
contextlibcontextmanager)monkeypatchru   rv   r   r   rd   r   r   )r{   r   r   r   open_logging_fs   s   r   module)scopec              	      s  | j jd | j jd td t }t  fddtdd D \}}}|d tt	fd	dtdd
 D D ]'\}}d| d}|
|}tt|| W d    n1 skw   Y  qI|d ||jjj|jgD ]7\}	}d|	d  d|	d  }
|
 d}||
 |
|}tt|| W d    n1 sw   Y  q|d ||jjj|jjjgD ]7\}	}d|	d  d|	d  }
|
 d}||
 |
|}tt|| W d    n1 sw   Y  q|d |dD ]2\}	}d|	 }
|
 d}||
 |
|}tt|| W d    n	1 s9w   Y  q|S )Npandasparquet  c                    "   g | ]} j ||d    qS )   ilocr[   r7   )rD   r2   r   r   r]      r^   z!multisourcefs.<locals>.<listcomp>r   r   plainc                    r   
   r   r   )df_ar2   r   r   r]      r^   r   zplain/chunk-rR   r9   zschema//r   z/chunk.parquethivez
hive/year=z/month=
hive_colorr%   zhive_color/color=)configpyarrowrequiresr8   rd   re   lenr,   rg   rf   rh   rm   rn   rE   groupbyr"   dt	dayofweekr%   yearmonth)requestro   df_bdf_cdf_dr7   chunkrr   rs   partfolderr   )rD   r   r2   r   multisourcefs   sT   (
,



"





r   c              
   C   sf   t  }tjddd}t d}t ttdt	 tdt
 g|_t | |||}| S )NsubdirT	recursivegroupkey)dsParquetFileFormatrd   FileSelectorFileSystemFactoryOptionsDirectoryPartitioningr;   r9   r<   int32r@   partitioningFileSystemDatasetFactoryfinish)ro   formatselectoroptionsfactoryr   r   r   rN      s   
rN   TFthreadedserial)paramsidsc                    s   | j  G  fddd}| S )z]
    Fixture which allows dataset scanning operations to be
    run with/without threads
    c                       sT   e Zd Z fddZ fddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )zdataset_reader.<locals>.readerc                    s
    | _ d S r   use_threads)r   r   r   r   r     r   z'dataset_reader.<locals>.reader.__init__c                    s   d|v rt d |d< d S )Nr   z9Invalid use of dataset_reader, do not specify use_threads)	Exception)r   kwargsr   r   r   _patch_kwargs
  s
   z,dataset_reader.<locals>.reader._patch_kwargsc                 [      |  | |jdi |S Nr   )r   to_tabler   rN   r   r   r   r   r        
z'dataset_reader.<locals>.reader.to_tablec                 [   r   r   )r   
to_batchesr   r   r   r   r     r   z)dataset_reader.<locals>.reader.to_batchesc                 [   r   r   )r   scannerr   r   r   r   r     r   z&dataset_reader.<locals>.reader.scannerc                 [      |  | |j|fi |S r   )r   head)r   rN   num_rowsr   r   r   r   r        
z#dataset_reader.<locals>.reader.headc                 [   r   r   )r   take)r   rN   indicesr   r   r   r   r   !  r   z#dataset_reader.<locals>.reader.takec                 [   r   r   )r   
count_rowsr   r   r   r   r   %  r   z)dataset_reader.<locals>.reader.count_rowsN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   reader  s    r   )param)r   r   r   r   r   dataset_reader   s   	$r   c                    sX  t t dt  g}t  ddg}dd tddD } fddt||D }td	td
k}tj	|| |d}tj	j
|| ||d}||fD ]}t|tj	sYJ t|jtjsbJ |j|sjJ t|jt|ksuJ t| }t|||D ]q\}	}
}|	j|
sJ |	j|ksJ t|	jtjsJ t|	tjsJ |	jdgksJ |	jdksJ t|	 }|	jt|  krdksJ  J t|d tjsJ |d j|ksJ |d jdgksJ |d jdksJ qt|jtddkd}t|dks	J qOtj	|| d}|jtdsJ tj	j
|| d}|jtds5J | D ]}	|	jtdsGJ q9tjtdd t	| | W d    n	1 scw   Y  tjtdd tj	|| dd W d    n	1 sw   Y  tjtdd tj	j
| d W d    d S 1 sw   Y  d S )Nra   subdir/1/xxx/file0.parquetsubdir/2/yyy/file1.parquetc                 S   s   g | ]	}t d |kqS )r   r   r<   r[   xr   r   r   r]   3  r}   z+test_filesystem_dataset.<locals>.<listcomp>r   rU   c                    s   g | ]\}}  ||qS r   make_fragment)r[   rr   r   file_formatro   r   r   r]   4  s    leveli9  )r9   r   
filesystemroot_partition)r9   r   r   
partitionsr   r   filter   r9   r   r   Tzincorrect typematch)r9   r   r   r   )r;   r9   r<   r>   r   r   r,   zipscalarFileSystemDataset
from_pathsrH   r   partition_expressionequalsr   filesri   rF   rr   ParquetFileFragment
row_groupsnum_row_groupssplit_by_row_groupr   pytestraises	TypeError)ro   r9   r~   r   	fragmentsr   dataset_from_fragmentsdataset_from_pathsrN   rO   	partitionrr   row_group_fragmentsr   r   r   test_filesystem_dataset,  sx   "$r  c                 C   s   t t dt  g}t }dg}tjj|||t	 d}|
  tt | | W d    d S 1 s9w   Y  d S )Nf1znonexistingfile.arrowr   )r;   r9   r<   r>   r   IpcFileFormatr   r   rd   ru   rF   r   r   FileNotFoundErrorr   )r   r9   r   r~   rN   r   r   r   1test_filesystem_dataset_no_filesystem_interactiont  s   "r  c           	      C   s  t | tjsJ t | jtjsJ tjg dt d}tjg dt d}|	| D ]}t |tj
s6J |d|s@J |d|sJJ q,||  D ]}t |tjs\J t |jtjseJ qR|| }t |tjssJ t|dks{J tddk}| jd|d}|d	 }|d ddgksJ |d
 ddgksJ t|d	 ddgksJ t|d ddgksJ tddk}| jd|d}|d	 }|d g dksJ |d
 g dksJ |d	 g dksJ |d g dksJ tdtd
tddkd}| jd|d}|d }t|g dks#J |d g dks.J |d
 g dks9J |d g dksDJ t|  d S )Nr   r   r   rU   r   typer   r   r   r_   T)r   r   r   r`         ?r   r   xxxyyy)rb   rX   1)r   r   r   r   )r        @r  r  )r   r   r   r   )r  r  r  r  )r_   r`   new)r   r'   )
r   r   r   r   r   r   rU   rU   r   r   )
        r  r  r         @r        @r  r  r  r  )
FFTTFFFFTT)rH   r   Datasetr9   r;   Schemaarrayr>   r?   r   RecordBatchcolumnr   r   scan_batchesTaggedRecordBatchrO   Fragmentr   rA   r   r<   sort_by	to_pydictsortedri   rQ   )	rN   r   expected_i64expected_f64rt   r   	conditionresult
projectionr   r   r   test_dataset  sN   
r%  c                 C   s(   | j ddd}t|}|jdksJ d S )N      )fragment_readaheadbatch_readahead   )r   r/   num_columns)rN   r   rt   r   r   r   test_scanner_options  s   r,  c           	      C   sV  |j | t d}t|tjsJ ttj |j | dgd W d    n1 s*w   Y  |j | dgt d}|j	| j
ksBJ |jt
dt fgksQJ t|tjsYJ | }| D ]}|j
|jkskJ |jdksrJ qa||  ks}J |j
|jksJ t|jD ]}t|g}||||ksJ qttj |t|jg W d    n1 sw   Y  |j| ksJ |j | g dt d}| }g d}|j|ksJ |d}|d	  d
gd dgd  ksJ |d  dgd dgd  ksJ |d  dgd ksJ |d  dgd ks)J d S )N)memory_poolunknownr&   r_   )r'   r-  r   )
__filename__fragment_index__batch_index__last_in_fragmentr0  r/  r   r   r   r   r1  r   r2  T)r   r;   default_memory_poolrH   r   Scannerr   r   ArrowInvaliddataset_schemar9   projected_schemar>   r   r   r+  	to_readerread_allr,   r   r  r   ArrowIndexErrorr   column_namesr  	to_pylist)	rN   r   r   r   rt   r7   r   expected_namessorted_tabler   r   r   test_scanner  sR   


& r?  c              	   C   sd   t  }t  }t | z| }tj| }| }| |ks$J W t | d S t | w r   )	r;   r3  system_memory_poolset_memory_poolbytes_allocatedr   r4  from_datasetr   )rN   old_poolpoolallocated_beforer   _r   r   r   test_scanner_memory_pool  s   
rH  c                 C   s  | | d}|tjjg | jdksJ |j | ddgd }|ddgiks'J |j | ddgtddkd }|dddgiksBJ |j | d	dgd }|dtt	d
d iks[J t
|  }|j ddgd }|ddgikstJ |j d	dgd }|dtt	d
iksJ d S )Nr   rc   r   r_   r&   r   r'   r   rU      r   )r   r;   rA   rl   r9   r  r   r<   ri   r,   r/   rF   )rN   r   r#  rO   r   r   r   	test_head  s"   rK  c                 C   s
  t |  }ddgtddgfD ]}|||}||||ks%J qtt ||tdg W d    n1 s@w   Y  ddgtddgfD ]}|| ||| |ksbJ qPtt || tdg W d    d S 1 s~w   Y  d S )Nr   rU   r   r*  r   )	r/   rF   r;   r  r   r   r   r   
IndexError)rN   r   rO   r   expectedr   r   r   	test_take(  s    
"rN  c                 C   s   t |  }||dksJ |j|tddkddksJ || dks(J |j| tddkddks8J |j| tddkddksHJ |j| tdd	k dd	ksXJ d S )
Nr   r_   r   r   r   r   r   rU   r   )r/   rF   r   r   r<   )rN   r   rO   r   r   r   test_count_rows8  s    $rO  c               	   C   sN   t jt jt jg} | D ]}tt |  W d    n1 sw   Y  q
d S r   )r   
FileFormatr4  Partitioningr   r   r   )classesklassr   r   r   test_abstract_classesH  s   rT  c                  C   sD  t t dt  t dt  g} tjtjtjfD ]}|| }t	|tj
s)J ||| ks1J |dks7J qt t dt  t dt  g} t| }t|jdksYJ tdd |jD seJ |d	}t	|tjsrJ tdd
ktddk@ }||sJ tt j |d W d    n1 sw   Y  |d}tdd
k}||sJ |tj| ddksJ t t dt  t dt  g} tj| dd}t|jdksJ tdd |jD sJ |d}tdtdktdtd
k@ }||sJ |d}td tdtd
k@ }||s.J dD ]}tt j || W d    n	1 sIw   Y  q0|tj| ddks\J t t dt  t dt  g} t| }t|jdks~J tdd |jD sJ |d}t	|tjsJ tdd
ktddk@ }||sJ tt j |d W d    n	1 sw   Y  |tj| ddksJ t t dt  t dt t  t  g} tj| dt g did}|jd d u sJ |jd  g dksJ |tj| d dks"J tjt t dt  t dt t  t  gdt g did}|jd d u sQJ |jd  g dks_J t jt td t d!d td D t d"gd# d$gd#  gg d%d&}t d't  fg}tjtjtjfD ]9}t )}||}tj||d(|d) tj |d(|d)}	|	! }
|
|sJ W d    n	1 sw   Y  qt B}t|}tj||d(|d) d }	tjt"d*d+ tj |d(t#dd)}	W d    n	1 sw   Y  |	d u sJ W d    d S 1 sw   Y  d S ),Nr_   r`   zother objectr   r   r   c                 s       | ]}|d u V  qd S r   r   r   r   r   r   	<genexpr>e      z$test_partitioning.<locals>.<genexpr>z/3/3.14/rU   gQ	@z/prefix/3/aaaz/3/nonesegment_encodingalphabetaxyz)null_fallbackc                 s   rU  r   r   r   r   r   r   rV  {  rW  z/alpha=0/beta=3/r   z/alpha=xyz/beta=3/)z/alpha=one/beta=2/z/alpha=one/z
/beta=two/otherc                 s   rU  r   r   r   r   r   r   rV    rW  z3_3.14_prefix_3_aaa_)firstsecondthirddictionariesr      c                 s       | ]}t   V  qd S r   randomr[   rG  r   r   r   rV    rW  rW   r   rX   r  f2r   namesr   ipcr   r   z,Expected Partitioning or PartitioningFactoryr   )$r;   r9   r<   r>   r?   r   r   HivePartitioningFilenamePartitioningrH   rQ  r   re  allparse
Expressionr   r   r   r5  r   is_null
dictionaryint8r@   r  r<  r   r,   tempfileTemporaryDirectorywrite_datasetrN   r   
ValueErrorint)r9   rS  r   exprrM  
shouldfailr   partitioning_schematempdir	load_backload_back_tabler   r   r   test_partitioningS  s   




 

"



$r  c              
   C   s   t t dt  t dt  g}t|t|t|tj|ddtj|ddtj|dddg}|D ]}| 	| 
||ksDJ q6d S )Nr_   r`   rX  rY  r]  )rZ  r^  )r;   r9   r<   r>   r?   r   r   rq  rr  loadsdumps)pickle_moduler9   partsr   r   r   r   test_partitioning_pickling  s   	r  z@flavor, expected_defined_partition, expected_undefined_partition))rq  )zfoo=A/bar=ant%20bee r  r  )r   )z	A/ant beer  r  )rr  )r  z
A_ant bee_)r  rG  flavorexpected_defined_partitionexpected_undefined_partitionc                 C   s  t dt  fdt  fg}tt| |d}|tddktddk@ |ks,J |d	|
tddktddk@ sEJ |tddktddk@ tddktddk@ @ |kshJ |tddktddk@ tddktddk@ B |ksJ | dkrtjt jdd	 |tddk W d    d S 1 sw   Y  d S |tddkd
ksJ d S )Nfoobarrc   zant beeAr   rq  zDNo partition key for foo but a key was provided subsequently for barr   )zbar=ant%20beer  )r;   r9   r@   getattrr   r   pcr<   rt  joinr   r   r   r5  )r  r  r  r  r   r   r   r    test_dataset_partitioning_format  s<    
" 
r  c                  C   s   t tg dg dd} t d}t d}| j|d || |d |d| d	d
}tg dg dg dg dd	}||sHJ d S )Nr   r   rU   )r   r   r   rV   rW   rX   r   r   r?   )za+1zb-aza*2za/br&   r   rU   r   )r   r   )r   r      )      ?r  g      ?)r   rN   r;   r   r<   r   castr   )rN   rW   rX   r#  rM  r   r   r   $test_expression_arithmetic_operators,  s   


r  c                  C   s   dd dD \} }}t | ddiksJ t | t | ks!J t | |@ |@ dd dD ks3J t ddk}t |i ksCJ t | |@ ddiksPJ t d }t |dd iksbJ d S )	Nc                 S   s   g | ]	}t ||kqS r   r   r[   fr   r   r   r]   >  r}   z'test_partition_keys.<locals>.<listcomp>abcrW   c                 S   s   i | ]}||qS r   r   r  r   r   r   
<dictcomp>A  s    z'test_partition_keys.<locals>.<dictcomp>drU   )r   get_partition_keys_get_partition_keysr<   rv  )rW   rX   cnopenullr   r   r   test_partition_keys=  s   $r  c                  C   s  t  } t jddgd}t jdd}t jt d}t jtjd}| jt ks)J |jddhks2J | jdks9J |jdks@J | jt	 ksIJ |jt ksRJ | j
tju sZJ |j
tju sbJ | | kshJ | |ksnJ | |kstJ | |kszJ | |ksJ d |_|jt	 ksJ | |ksJ t |_|jt ksJ | |ksJ tj|_
|j
tju sJ || ksJ tj|_
|j
tju sJ || ksJ d S )	NrW   rX   dictionary_columnsmscoerce_int96_timestamp_unitbinary_type	list_typens)r   ParquetReadOptionsr;   binary_viewLargeListTyper  r   r  r  binaryr  ListTypelarge_binary)opts1opts2opts3opts4opts5r   r   r   test_parquet_read_optionsK  s<   
r  c                  C   s   t  } t jdhd}t jdd}t jt d}t jtjd}| jt  ks)J |jt jdgdks5J |jt jddks@J |jt jt dksMJ |jt jtjdksYJ d S )NrW   r  sr  r  r  )r   r   r;   r  r  read_optionsr  )pff1pff2pff3pff4pff5r   r   r   %test_parquet_file_format_read_optionsu  s    r  c                  C   s  t  } t jdd}t jddd}t jddd}t jdd	d
}t jdd}tjdddd}t jd|d}| jdu s;J | jdksBJ t rL| jdu sLJ | jdksSJ | j	dksZJ | j
du saJ |jdu shJ |jdksoJ t ry|jdu syJ |jdu sJ |jdksJ t r|jdu sJ |jdu sJ |jdksJ t r|jdu sJ |jdksJ |j	d	ksJ |j
du sJ t r|jdu sJ |j|ksJ |j| jksJ | | ksJ | |ksJ ||ksJ ||ksJ || ksJ || ksJ || ksJ d S )N   buffer_sizei    T)r  use_buffered_streamF)r  
pre_bufferi@ i )thrift_string_size_limitthrift_container_size_limitpage_checksum_verificationrJ  )hole_size_limitrange_size_limitlazy)r  cache_optionsi @B )r   ParquetFragmentScanOptionsr;   CacheOptionsr  r  r   r  r  r  r  r  )r  r  r  r  r  opts6
cache_optsopts7r   r   r   test_parquet_scan_options  sd   r  c                 C   s  t  t  t tjjdddt jtjjddgddt jtjjddd	dt  t jtjjdd
ddt jtjjddddg}z	|	t 
  W n	 tyT   Y nw td urt|t  t jdhdt jddt jdddddg |D ]}| | ||ksJ qvd S )N	T)	delimiterignore_empty_linesrU   r  )	skip_rowsr;  r  i   )r  
block_sizeignorenewlines_in_valuesunexpected_field_behavior)parse_optionsF   r   r  rW   r  )r  r  {   i  )r  r  r  r  )r   r  CsvFileFormatr;   csvParseOptionsReadOptionsJsonFileFormatjsonr-   OrcFileFormatImportErrorrm   extendr   r  r  )r  formatsr   r   r   r   test_file_format_pickling  sR   



r  c              
   C   s   t  t jtjjdddt jtjjdddt  t tjjddd	t jtjjdd
ddg}t	d urD|
t jddt jddg |D ]}| | ||ksTJ qFd S )NT)strings_can_be_nullconvert_options   r  r  Ferrorr  i   r  r  r  )r  )r   CsvFragmentScanOptionsr;   r  ConvertOptionsr  JsonFragmentScanOptionsr  r  rm   r  r  r  r  )r  r   optionr   r   r   #test_fragment_scan_options_pickling  s2   

r  paths_or_selectorr   r   r   r   r  c                 C   sj  t jt jdhd|d}t d}t ttdt tdt	 g|_
|jdks/J |jddgks8J |jd	u s?J t | |||}| }| jttd
t tdt tdtt t	 tdt tdtt t	 dtdt tdt	 gd	dsJ t| tsJ t||t jsJ |jt dsJ | }t|t jsJ | }tjg dt d}	tjg dt d}
tjtjg dt dtjd  t	 d}tdd t!dD }|" }t#|ddgddgD ]\\}}}}tj|gd t d}tj|gd t	 d}tj|d gd t d}|j$d usEJ |j%dksMJ |d |	sWJ |d |
saJ |d |skJ |d |suJ |d |sJ |d |sJ |d |sJ q|& }t|tj'sJ t(|d ksJ |j%dksJ d S )!NrZ   r  )r  r  r   r   r   .rG  Fr_   r`   ra   rb   rV   check_metadataTr	  r
  z	0 1 2 3 4c                 S   rS   rT   rY   r   r   r   r   r]   :  s    z+test_filesystem_factory.<locals>.<listcomp>r   r   r   r  r  r*  r   rU   r   r  r   ))r   r   r  r   r   r;   r9   r<   r   r@   r   partition_base_dirselector_ignore_prefixesexclude_invalid_filesr   inspectr   r>   r?   rw  rb   rH   inspect_schemasri   r   r   r   r   r   r  DictionaryArrayfrom_arrayssplitr,   r  r   r   r+  r   rA   r   )ro   r  r  r   r   r   inspected_schemarN   r   r   r!  expected_strexpected_structiteratorrt   rO   r   r   expected_groupexpected_keyexpected_constr   r   r   r   test_filesystem_factory  s   

	


"r  c                 C   s   t  }t jd| |d}|jD ]A}||| }|jdgksJ |j|| dgd}||fD ]}t|t js6J |j|ks=J t|j	t
| sGJ q,|jdgksPJ qd S )N/plainr   r   r   r   )r   r   rN   r   r   r   rH   r   rr   r   r  )r   parquet_formatrN   rr   rO   row_group_fragmentr  r   r   r   test_make_fragmentR  s    
r  c                    s  | \}}}}}}}t  |g}fdd|D }	t j|	|jd   }
|
|s0J  fdd jD }fddt||D }t j||jd}  }
|
|s\J dd |D }fddt||D }t j||jd}tj	t
jjdd	 | }W d
   n1 sw   Y  dd |D }fddt||D }t j||jd}tj	tdd	 | }W d
   d
S 1 sw   Y  d
S )z
    Test passing file_size to make_fragment. Not all FS implementations make use
    of the file size (by implementing an OpenInputFile that takes a FileInfo), but
    s3 does, which is why it's used here.
    c                    s   g | ]}  |qS r   r   r[   rr   r   rd   r   r   r]   r  s    z0test_make_fragment_with_size.<locals>.<listcomp>)r   r9   r   c                    s   g | ]	} j |jqS r   )r   get_file_infosizer   rN   r   r   r]   |  r}   c                        g | ]\}} j ||d qS )	file_sizer   r[   rr   r  r  r   r   r]   }      c                 S      g | ]}d qS )r   r   r  r   r   r   r]         c                    r  r  r   r   r  r   r   r]     r!  zParquet file size is 1 bytesr   Nc                 S   r"  )r  r   r  r   r   r   r]     r#  c                    r  r  r   r   r  r   r   r]     r!  zHTTP status 416)r   r   r   r9   r   r   r   r   r   r   r   libr5  OSError)s3_example_simpler   rr   urihostport
access_key
secret_keyr~   r   tbl
sizes_truefragments_with_sizedataset_with_sizesizes_toosmallsizes_toolarger   )rN   r   rd   r   test_make_fragment_with_sizee  sP   





"r2  c                 C   s   t d}t|d}t }||}t|	 tj
s J tjg dg dg dgg dd}| ||s<J |||}| || sPJ d S )NzT
        alpha,num,animal
        a,12,dog
        b,11,cat
        c,10,rabbit
    utf-8rW   rX   r        r   dogcatrabbitr[  numanimalrm  )textwrapdedentr;   	py_bufferencoder   r  r   rH   rG   BufferReaderr   r   r   r  r  )r   r  contentbuffer
csv_formatrO   rM  pickledr   r   r   "test_make_csv_fragment_from_buffer  s   


rH  c                 C   s   d}t |d}t }||}t| t jsJ t j	g dg dg dgg dd}| 
||s9J |||}| 
||
 sMJ d S )Nz{"alpha" : "a", "num": 12, "animal" : "dog"}
{"alpha" : "b", "num": 11, "animal" : "cat"}
{"alpha" : "c", "num": 10, "animal" : "rabbit"}
r3  r4  r5  r8  r<  rm  )r;   rA  rB  r   r  r   rH   rG   rC  r   r   r   r  r  )r   r  rD  rE  json_formatrO   rM  rG  r   r   r   #test_make_json_fragment_from_buffer  s   

rJ  c                 C   s   t g dt g dt g dg}|d  |d |d  g}tjtjddgd	d
dd}|t f||fg}|D ]<\}}t j|g dd}t  }t	|| |
 }	||	}
| |
|sgJ |||
}| ||syJ q=d S )Nr4  r5  r8  r   r   r   r[  r>  r  Tr  )r  r  r  r<  rm  )r;   r  dictionary_encoder   r   r  r   BufferOutputStreamrm   rn   getvaluer   r   r   r  r  )r   r  arraysdictionary_arraysdictionary_formatcasesformat_r   rs   rE  rO   rG  r   r   r   &test_make_parquet_fragment_from_buffer  s8   


	
rS  c                 C   sl   t jtddgd dgd dgd  gg dd}t| d }tj||d	g|d
 tj|dd|d}||fS )Nr'  r   rW   r   rX   rk  rm  test_parquet_datasetr   )partition_cols
chunk_sizer   r   )r   r   r   )r;   r   r,   rZ   rm   write_to_datasetr   rN   )r  rV  r   r   rr   rN   r   r   r   _create_dataset_for_fragments  s   "rX  c                 C   s2  t | \}}t| }t|dksJ |d }ddg}|jj|ks$J |j|j|j	|jks2J |j
tddks?J ||}|j|ksKJ ||dddsYJ |j||jd}|jg d	ksjJ ||ddsuJ |j|jdksJ |j||jtddk d
}|jg d	ksJ d S )Nr   r   r  rl  r   rW   r   rc   rk  )r9   r   )rX  ri   rF   r   physical_schemarn  r   r  rr   r   r   r   r   r<   r   r;  remove_columnslicer9   remove)r  r   r   rN   r   r  physical_namesr#  r   r   r   test_fragments  s&   
r^  c                 C   s   t jtddgd dgd  gddgd}t| d }tj||dgd	 tjt d
gdd}tj	|d|d}|j
tddkd}tt|dksLJ d S )Nr'  r   r   r   colr   rm  rT  rU  )r   rx  r   r  r   rp  r   )r;   r   r,   rZ   rm   rW  r   r   r9   rN   rF   r<   r   ri   )r  r   rr   r   rN   r   r   r   r   test_fragments_implicit_cast'  s   *rb  c           
         s  t | \ }	 d fdd	}t| d }|j}|||}||||ks-J |j|j|j	|j
d}||||sEJ ||d |j|j|j	|j
d}||dtddk d	 |j|j|j	|j
d}||ddgtdd
k d |j|j|j	|j
d}||dtddkd	 d|jddd }	tjt|	d  |j|j|j	|j
d}|j|tddkd	 W d    d S 1 sw   Y  d S )Nc                    sP   | j  j||d}|r|n j}|j|ksJ  j| |}||s&J d S )Nr9   r'   r   )r   r9   r;  r[  selectr   )rO   	row_slicer'   r   actualr;  rM  r   r   r   assert_yields_projected9  s   z;test_fragments_reconstruct.<locals>.assert_yields_projectedr   )r   )r   r   )r   r   r  r   r   r  rI  r   rW   z&No match for FieldRef.Name\(part\) in Fr   NN)rX  ri   rF   r   r  r  r   r   rr   r   r   r   r   r<   rY  	to_stringr   r   r|  )
r  r   r  rN   rg  rO   r  pickled_fragmentnew_fragmentpatternr   r   r   test_fragments_reconstruct5  s`   


"rm  c                 C   s^  t | dd\}}t| d }t| }t||j  kr$dks'J  J |j|d |jd}|jg dks:J t|dksBJ |	|
ddsMJ |d jd usVJ |d jdks_J |d jd jdddddddkstJ t|jtd	dk d
d }t|td	dk }t|dksJ |j|d td	dk d
}t|dksJ d S )Nr   rV  r   rc   rk  r   minmaxr  rl  r  r   )rX  ri   rF   r   r   r   r   r9   r;  r   r[  r   
statisticsr   r<   )r  r   r   rN   rO   r  r#  r   r   r   !test_fragments_parquet_row_groupsr  s.   "
rt  c                 C   s   t dtdi}tj|| d dd tj| d dd}t| d }|j	j
|j|jd	d
gd}|jdks8J |  |jdksCJ t|jdksLJ d S )NrW   r'  test.parquetr   row_group_sizer   r   r   r   rU   r  )r;   r   r,   rm   rn   r   rN   ri   rF   r   r   rr   r   r   ensure_complete_metadatar   r   )r  r   rN   original_fragmentrO   r   r   r   %test_fragments_parquet_num_row_groups  s   rz  c                 C   s   t tddgddgd}|d d|d< tt|| d  d	d lm	} |	| d }|j
||ddkd
}|jd	 | k  sIJ d S )NrW   rX   r   r   )col1col2r{  categoryztest_filter_dictionary.parquetr   r   )r0   r1   dictastyperm   rn   r;   r   pyarrow.datasetrN   r   r<   r   	to_pandasrs  )r  r   rD   r   rN   r#  r   r   r   ,test_fragments_parquet_row_groups_dictionary  s   "r  c                 C   s  |\}}t | d|d\}}t| d }||jg |  W d    n1 s*w   Y  |jddgks8J |g  |  W d    n1 sKw   Y  t|jtj	sYJ |j
j|j|jddgd}|j|jksnJ |  |jd }	|	jdks~J |	jdksJ |	jd usJ |||}
||jg% |
jddgksJ |
jd }	|	jdksJ |	jd usJ W d    d S 1 sw   Y  d S )Nr   rV  r   r   r   r  )rX  ri   rF   rr   rx  r   rH   metadatarm   FileMetaDatar   r   r   idr   rs  r  r  )r  r   r  rd   r   rG  rN   rO   rk  	row_grouprj  r   r   r   &test_fragments_parquet_ensure_metadata  s:   





"r  c           
      C   s   |\}}t | |d\}}t| d }|g  |||}W d    n1 s+w   Y  ||jg |j}	W d    n1 sDw   Y  |	dgksPJ d S )Nr   r   r   )rX  ri   rF   r  r  rr   r   )
r  r   r  rd   r   rG  rN   rO   rj  r   r   r   r   )test_fragments_parquet_pickle_no_metadata  s   
r  c                 C   s  t jt g dt  t g dt  t g dt  t g dt  t g dt  t g dt  t g dt 	 t g dt 
 t g dt  t g dt  t g dt  t g dt  t g dt  t g dt dt g dt dt g dt dt g dt  t g dt  t g dt dt g dt dgg d	d
}t| d }tj|||d |tj|dddfS )N)TNF)r   r   *   )r  g      $@      E@)rW   Nzr  r  us)r   r   l    jt )booleanrx  uint8int16uint16r   uint32r>   uint64r.   doubleutf8r  ts[s]ts[ms]ts[us]r=   date64time32time64rm  test_parquet_dataset_all_typesrn  r   r   rp  )r;   r   r  bool_rx  r  r  r  r   r  r>   r  float32r?   r  r  	timestampr=   r  r  r  rZ   rm   rW  r   rN   )r  rV  r   rr   r   r   r   _create_dataset_all_types  s6   /r  c              
      s  t | \}}t| d }dd l  fdd} fdd} fdd} j} j}t| }	|	d jd us9J |	d jd }
|
jdksGJ |
j	d	ksNJ |
j
i d
ddddddddddddddddddddddddddddddddddddddddddddddddd d!dd"|d|ddd#|d|ddd$|d|ddd%|d&dd'|d&d'd(d|d&dd|d&d'd)d|ddd|dddd|dddd|dddddd*ksJ d S )+Nr   c                    s     ddddd| S N  r   r   r(   r   r  r   r   dt_s,      z.test_parquet_fragment_statistics.<locals>.dt_sc              
      s     dddddd| d S )Nr  r   r   r   r  r  r  r   r   dt_ms-  r}   z/test_parquet_fragment_statistics.<locals>.dt_msc              	      s     dddddd| S r  r  r  r  r   r   dt_us.      z/test_parquet_fragment_statistics.<locals>.dt_usrU   r   r  FTro  rx  r   r  r  r  r  r   r  r>   r  r.   r  r  r  r  rW   r  r     a   zr  r  r  r=   r  r   r6     )r  r  r  )r  ri   rF   r(   r"   timer   r   r   total_byte_sizers  )r  r   rN   rO   r  r  r  r"   r  r  r  r   r  r    test_parquet_fragment_statistics$  sh   








	




r  c                 C   sn   t g dg dd}tj|| d dd tj| d dd}t| d	  }|d
 j	d	 j
i ks5J d S )N)r   r   NN)rW   rX   NNrV   ru  r   rv  r   r   r   r   )r;   r   rm   rn   r   rN   ri   rF   r   r   rs  )r  r   rN   r   r   r   r   &test_parquet_fragment_statistics_nullsP  s
   r  c                 C   st   t g dg ddd d }|j| d dd tj| d dd	}t| d  }|d jd j	i ks8J d S )
N)rW   rX   rX   r   r   r  rV   r   ru  r   enginer   r   )
r0   r1   
to_parquetr   rN   ri   rF   r   r   rs  )r  rD   rN   r   r   r   r   'test_parquet_empty_row_group_statistics[  s
    r  c                 C   s   t | dd\}}t| d }|jtddksJ t|jtddk|jd}t	|dks4J t|jtddk|jd}t	|dksKJ d S )Nr   rn  r   r   rW   r   r9   rX   )
rX  ri   rF   r   r   r   r<   r   r9   r   )r  r   rN   rO   r  r   r   r   +test_fragments_parquet_row_groups_predicateg  s   r  c                 C   sL  t | dd\}}t| d }|j}t| }|||}||||ks-J |j|j	|j
|jdgd}	||	}
|
||d sKJ |j|j	|j
|jdhd}	|j|	|jddgtddk d	}
|
jddgksrJ t|
dkszJ |j|j	|j
|jdhd}	tjtd
d ||	 W d    d S 1 sw   Y  d S )Nr   rn  r   )r   r   r   r  r   rU   rc  zreferences row group 2r   )rX  ri   rF   r   r   r  r  r   r   rr   r   r   r   r9   r   r<   r;  r   r   r   rL  )r  r   r  r   rN   rO   r  r  rj  rk  r#  r   r   r   -test_fragments_parquet_row_groups_reconstruct~  sH   
"r  c           
      C   s  |\}}t | d|d\}}t| d }|jddgd}|g " |jdks)J |jddgks2J |jd jd us<J W d    n1 sFw   Y  ||}	|	 ddgddgdks_J |jg d}|jdkslJ |jg kssJ |j||j	d}	|	j
dksJ |	|d d sJ d S )	Nr   r  r   rU   row_group_idsr   rr  rc   )rX  ri   rF   subsetr   r   rs  r   r  r9   r   r   
r  r   r   rd   r   r   rN   rO   subfragr#  r   r   r   !test_fragments_parquet_subset_ids  s&   


r  c           
      C   sR  |\}}t | d|d\}}t| d }|tddk}|g " |jdks+J t|jdks4J |jd j	d us>J W d    n1 sHw   Y  |
|}	|	 g dg ddksaJ |tdd	k}|jdksrJ |jg ksyJ |j
||jd
}	|	jdksJ |	|d d sJ |jtddk|jd
}|jdksJ d S )Nr   r  r   r  rU   r  )r   r   r   rr  r   rc   r   rW   r   )rX  ri   rF   r  r   r<   r   r   r   rs  r   r  r9   r   r   r  r   r   r   $test_fragments_parquet_subset_filter  s*   


r  c                 C   s   t | dd\}}t| d }tt |jtddkddgd W d    n1 s.w   Y  tt |  W d    d S 1 sHw   Y  d S )Nr   rn  r   r  r   r  )	rX  ri   rF   r   r   r|  r  r   r<   )r  rG  rN   rO   r   r   r   %test_fragments_parquet_subset_invalid  s   
"r  c           
      C   s  t g d}t g d}t g d}t jj||gddgd}t jj||gddgd}t d	|i}tj|| d
 dd tj| d
 dd}t	|
 d }|jdksVJ |td	ddk}	|	jdkshJ |td	ddk}	|	jdkszJ |td	dddk}	|	jdksJ |td	dddk}	|	jdksJ tjt jdd |td	ddk W d    n1 sw   Y  tjtdd |td	ddk W d    d S 1 sw   Y  d S )N)r   r   r   rU   )皙?皙?333333?皙?r   r   rU   r   f21f22rm  r  rl  r_  zdata_struct.parquetr   rv  r   r   r   r   r   zNo match for FieldRef.Nestedr   f3z)Function 'greater' has no kernel matching)r;   r  StructArrayr  r   rm   rn   r   rN   ri   rF   r   r  r<   r   r   r5  NotImplementedError)
r  r  r  r  rl  
struct_colr   rN   rO   r  r   r   r   0test_fragments_parquet_subset_with_nested_fields  s4   "r  c                 C   s   t | d }t|dkst|dksJ t| \}}tj|dd}t | d }t|d|jt| dks=J | d }t	j
|| tj|d	d}t | d }t|d
|jt| dksiJ d S )Nr   zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[key=xxx, group=1]>zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[group=1, key=xxx]>r   r   z*<pyarrow.dataset.ParquetFileFragment path=>data.featherfeatherz,<pyarrow.dataset.FileFragment type=ipc path=)ri   rF   repr_create_single_filer   rN   r   rw   rZ   r;   r  write_feather)r  rN   rO   r   rr   r   r   r   test_fragments_repr  s0   r  rG  c                 C      | S r   r   r   mr   r   r   <lambda><      r  c                 C      | || S r   r  r  r  r   r   r   r  <  r#  c                 C   s   t jddd}t }td}tjddg}|||}t|tjs%J ||_	t
| |||}| }tdt fdt fdt fd	t fd
tt t dfdt fdt fg}	||	skJ tj }
t|
tjsxJ d S )Nr   Tr   r   r   r_   r`   rZ   ra   rb   rV   )rd   r   r   r   r   r   discoverrH   PartitioningFactorypartitioning_factoryr   r  r;   r9   r>   r?   r@   rb   r   r   rq  )ro   rG  r  r  r   r   r  r   r
  expected_schemahive_partitioning_factoryr   r   r   test_partitioning_factory:  s.   







	
r  infer_dictionaryc                 C   r  r   r   r  r   r   r   r  ^  r  c                 C   r  r   r  r  r   r   r   r  ^  r#  c                 C   s4  t jddd}t }td}tjjddg|d}||||_t| |||}|	 }	|rt
t
 t
 }
|	dj|
ksBJ |   }|dd}t
dgd	 d
gd	   }||shJ | jtddkd}|dd}|dd	}||sJ d S |	djt
 ksJ d S )Nr   Tr   r   r   r  r   r  r   r  r   )rd   r   r   r   r   r   r  r  r   r  r;   rw  r   r@   r<   r  r   r   combine_chunksr  r   r  rK  r   r[  )ro   r  rG  r  r  r   r   r  r   inferred_schemaexpected_typer   rf  rM  r   r   r   $test_partitioning_factory_dictionary[  s.   
r  c                 C   r  r   r   r  r   r   r   r    r  c                 C   r  r   r  r  r   r   r   r    r#  c              
   C   s  t  }t }tdt fg}tjtt	dg|d}tdt
dfdt fg}tdt fdt fg}tt|t| }dD ]>}	||	 ||	d (}
tj|
|}|| |  W d    n1 suw   Y  W d    n1 sw   Y  qKt jd	d
d}td	}tjj|d}| |||_t||||}| }||ksJ | jdtdt id}|d d  dksJ tjjddgdd}| |||_t||||}t|  }|d j !tddktddk@ s
J tj|dd}| |||_"t||||}t|  }|d j !tddktddk@ s>J tjj|dd}| |||_t||||}t#j$tj%dd | }W d    n	1 slw   Y  t jdd
d}td}tj&j|d}| |||_t||||}| }||ksJ | jdtdt id}|d d  dksJ tj&jdd}| |||_t||||}t|  }|d j !tddktddk@ sJ tj&|dd|_"t||||}t|  }|d j !tddktddk@ sJ tj&j|dd}| |||_t||||}t#j$tj%dd | }W d    d S 1 sNw   Y  d S )Nr_   r   rc   r"   r  r@   )z%directory/2021-05-04 00%3A00%3A00/%24z,hive/date=2021-05-04 00%3A00%3A00/string=%24
/0.featherrq   Tr   date_intr&   r   逎`rX  rY  2021-05-04 00%3A00%3A00%24r9   rZ  +Could not cast segments for partition fieldr   r   )'rd   re   r   r  r;   r9   r>   r   r  r,   r  r@   ri   rg   rh   ro  new_filern   closer   r   r   r  r  r   r  r   r   r<   r  as_pyrF   r   r   r   r   r   r5  rq  )rG  r  ro   r   r9   r   partition_schemastring_partition_schemafull_schemarq   sinkwriterr   r   r  r   r  rf  r   r   r   r   r   *test_partitioning_factory_segment_encoding  s   














$r  c                 C   r  r   r   r  r   r   r   r    r  c                 C   r  r   r  r  r   r   r   r    r#  c              	   C   s  t  }t }tdt fg}tjtt	dg|d}tdt
dfdt fg}tdt fdt fg}tt|t| }tdt
dfdt fg}	tdt fdt fg}
d	}|| ||d
 (}tj||}|| |  W d    n1 sw   Y  W d    n1 sw   Y  t jddd}td}tjj|d}| |||_t||||}| }||ksJ | jdtdt id}|d d  dksJ tjjdd}| |||_t||||}t|  }|d j !tddktddk@ s#J tj|dd}| |||_"t||||}t|  }|d j !tddktddk@ sWJ tjjdd}| |||_t||||}t|  }|d j !tddktddk@ sJ tj|
dd}| |||_"t||||}t|  }|d j !tddktddk@ sJ tjj|	dd}| |||_t||||}t#j$tj%dd | }W d    d S 1 sw   Y  d S )Nr_   r   rc   ztest'; dater  ztest';[ string'ztest%27%3B%20dateztest%27%3B%5B%20string%27zLhive/test%27%3B%20date=2021-05-04 00%3A00%3A00/test%27%3B%5B%20string%27=%24r  r   Tr   r  r&   r   r  r'  rY  z2021-05-04 00:00:00$rX  r  r  r  r  r   )&rd   re   r   r  r;   r9   r>   r   r  r,   r  r@   ri   rg   rh   ro  r  rn   r  r   r   rq  r  r  r   r  r   r   r<   r  r  rF   r   r   r   r   r   r5  )rG  r  ro   r   r9   r   r  r  r  partition_schema_enstring_partition_schema_enrq   r  r  r   r   r  r   r  rf  r   r   r   r   r   ;test_partitioning_factory_hive_segment_encoding_key_encoded  s   















$r  c              
   C   s   t g dg dd}tt t dt  t dt  g}tt j	 tj
|| d|d W d    d S 1 s=w   Y  d S )Nr   yNr   r  r  rV   rW   rX   ro  rp  )r;   r   r   r   r9   r<   r@   r   r   r5  r{  r  r   r   r   r   r   /test_dictionary_partitioning_outer_nulls_raises<  s   $"r   c                 C   sV   t g dg dd}tt t|| d W d    d S 1 s$w   Y  d S )Nr  r  rV   zbasename-{i}.arrow)r;   r   r   r   r   r   r{  )r  r   r   r   r   test_positional_keywords_raisesD  s   "r  c                 C   s   d}t t d|d t|d d}tj|d | | d dgd tj|d |d  | d dgd tj| d dgd	}|d jdksHJ tj| d dd
gd	}|d jdks\J tj| d dgd	}|d jdksoJ d S )Ni   r   r   )r   r$   oner   r`  twor&   r$   r   )	r;   r   repeatnparangerm   rW  
read_table
num_chunks)r  
BATCH_SIZEr   r   r   r   test_read_partition_keys_onlyJ  s&   


r
  c                    s    t  }t fdd|D S )Nc                    s"   g | ]}t jt j |qS r   )osrr   isdirr  )r[   elbasedirr   r   r]   g  r^   z _has_subdirs.<locals>.<listcomp>)r  listdirany)r  elementsr   r  r   _has_subdirse  s   
r  c                 C   sZ   t | D ]%}t j| |}t j|r*t||}t|r%t||| q|| qd S r   )	r  r  rr   r  r  	posixpathr  _do_list_all_dirsr-   )r  path_so_farr#  r  true_nestednorm_nestedr   r   r   r  j  s   
r  c                 C   s   g }t | d| |S )Nr  )r  )r  r#  r   r   r   _list_all_dirsu  s   r  c                 C   s    t t| }|t |ksJ d S r   )r   r  )r  expected_directoriesactual_directoriesr   r   r   _check_dataset_directories{  s   r  c              
   C   sh   t g dg dd}tt t dt  t dt  g}tj|| d|d t| g d d S )	Nr  r  rV   rW   rX   ro  rp  )zx/xzy/yr  )	r;   r   r   r   r9   r<   r@   r{  r  r  r   r   r   (test_dictionary_partitioning_inner_nulls  s   $r  c              
   C   sl   t g dg dd}tt t dt  t dt  gd d}tj|| d|d t| g d	 d S )
N)r   Nr  r  rV   rW   rX   r]  ro  rp  )za=x/b=xz	a=xyz/b=yz	a=z/b=xyz)	r;   r   r   rq  r9   r<   r@   r{  r  r  r   r   r   test_hive_partitioning_nulls  s   r  c                  C   s0  t dt  fdt  fg} ddg}t| }t|tjs J tj| dd}t|tjs/J tj|d}t|tjs=J t	
t t  W d    n1 sQw   Y  t	j
tdd tj| d W d    n1 snw   Y  t	j
tdd tj| | d W d    n1 sw   Y  tj| d	d
}t|tjsJ tj| dd	d}t|tjsJ tjd	d
}t|tjsJ t	
t tj|d	d
 W d    n1 sw   Y  t	j
tdd tj|d	d W d    n1 sw   Y  t	
t tj| dd
 W d    d S 1 sw   Y  d S )Nr   r   inferrd  )field_nameszExpected listr   zCannot specify bothr   ra  )re  r  zCannot specify 'field_names')r   r  unsupported)r;   r9   r  rx  r   r   rH   r   r  r   r   r|  rq  )r9   rn  r   r   r   r   test_partitioning_function  s@   

$r"  c                 C   s   t t dt t  t  t dt t  t  g}tjj	|d}tj
dd| |d}|jj|ks7J | }|dj|jd sIJ |d dgd	 d
gd	  ks\J |dj|jd sjJ |d dgd	 dgd	  ks}J d S )Nr   r   rc   r   r   r   r   r   r   r   r   r   r  r  )r;   r9   r<   rw  rx  r   r@   r   r   r  rN   r   r   r  r  r   typesr<  )ro   r9   r   rN   r   r   r   r   *test_directory_partitioning_dictionary_key  s   &*r%  c           	      C   s.  t t dt t  t  t dt t  t  g}tjj|d}tj	dd| |d}|j
j|ks7J | }ttdd}ttd	d
}|dj|jd sWJ |djD ]}|j }|  ||ksnJ q]|dj|jd	 s}J |djD ]}|j }|  ||ksJ qd S )Nr   r   rc   r   r   r#  i  i  r      r   )r;   r9   r<   rw  rx  r  r   rq  r  rN   r   r   ri   r,   r  r  r   r$  chunksr<  sort)	r   r9   r   rN   r   year_dictionarymonth_dictionaryr   rf  r   r   r   %test_hive_partitioning_dictionary_key  s.   

r+  c                 C   sL   |d u rt tddgd dgd  d}| d }tj|||d ||fS )	N	   r  r   r  r   rV   ru  rv  r;   r   r,   rm   rn   )base_dirr   rw  rr   r   r   r   r    s
   $r  c                 C   s   t tddgd dgd  d}| d }t|| t tdddgd dgd  d}| d	 }t|| ||f||ffS )
Nr,  r  r   r  r   rV   ztest1.parquetr  ztest2.parquetr-  )r.  table1path1table2path2r   r   r   _create_directory_of_files  s   $&r3  c                 C   sD   | | || fD ]}| j|jsJ || |sJ q
d S r   )r  r  r9   r   r   )rN   r   r   picklerr  r   r   r   _check_dataset  s   r5  c                 K   s   t | tjsJ | t| | gt| gfD ]}tj| fi |}t |tjs'J t|||| qt| j	" tj| j
fi |}t |tjsGJ t|||| W d    d S 1 sYw   Y  d S r   )rH   pathlibPathrZ   r   rN   r   r5  r
   parentname)rr   r   r   r4  r   ry   rN   r   r   r   _check_dataset_from_path  s   "r:  c                 C   s   t | \}}t|||| d S r   r  r:  r  r   r  r   rr   r   r   r   test_open_dataset_single_file	  s   r=  c                 C   s"   t | dd\}}t|||| d S )Nr   rv  r;  r<  r   r   r   test_deterministic_row_order	  s   r>  c                 C   s(   t | \}}t|}t| ||| d S r   )r3  r;   concat_tablesr:  )r  r   r  tablesrG  r   r   r   r   test_open_dataset_directory	  s   
rA  c           
         s   t | \}\}}t|}t||gtt|t|gg}| fdd|D 7 }|D ]}|j|js7J ||}	|	|sCJ q,d S )Nc                    s   g | ]
}   |qS r   r  )r[   r  r  r   r   r]   /	  s    z3test_open_dataset_list_of_files.<locals>.<listcomp>)	r3  r;   r?  r   rN   rZ   r9   r   r   )
r  r   r  r@  r0  r2  r   datasetsrN   r#  r   rB  r   test_open_dataset_list_of_files&	  s   

rD  c                 C   s   t | \}}t|}t|}|j|jsJ tj|t d}|j|js*J t	t
 tj|t d W d    d S 1 sDw   Y  d S )Nr  )r  r   r   rN   r9   r   rd   ru   r   r   r   re   )r  r   rr   fspathdataset1dataset2r   r   r   #test_open_dataset_filesystem_fspath9	  s   
"rH  c                 C   s   | d }|   t|\}}||}t|}tj|t d}tjt|t|d}	|	|
|}
||||  krP||	  krP||
ksSJ  J d S )Nsingle-filer  )mkdirr  relative_tor   rN   rd   ru   rZ   r	   r  r  r   )r  r   r  rq   r   rr   relative_pathd1d2d3d4r   r   r   test_construct_from_single_fileM	  s   


rQ  c                 C   s   | d }|   t|\}}t|}tj|t d}tj|jt| d}||}	||}
||}|	|
  kr@|ksCJ  J |||fD ]}|	|
|}|||	ks[J qHd S )Nsingle-directoryr  )rJ  r3  r   rN   rd   ru   r9  r	   r   r  r  )r  r   r  rq   r@  r~   rM  rN  rO  t1t2t3r  restoredr   r   r   $test_construct_from_single_directorya	  s   



rW  c                    s    d }|   t|\}} fdd|D }t  t|}||}t|ttt|ks3J W d    n1 s=w   Y  tj|t	 d}||}	t|}
||
}tj|t
 d}||}||	  krx|  krx|ks{J  J d S )Nzlist-of-filesc                    s   g | ]}|  qS r   )rK  rx   r  r   r   r]   |	  r  z5test_construct_from_list_of_files.<locals>.<listcomp>r  )rJ  r3  r
   r   rN   r   r   sumrj   r	   rd   ru   )r  r   rq   r@  r~   relative_pathsrM  rS  rN  rT  rO  rU  rP  t4r   rX  r   !test_construct_from_list_of_filesu	  s    






*r\  c                 C   sJ   ddg}t jtdd tj|| d W d    d S 1 sw   Y  d S )Nr   z!subdir/1/xxx/doesnt-exist.parquetzdoesnt-existr   r  )r   r   r  r   rN   )ro   r   r   r   r   -test_construct_from_list_of_mixed_paths_fails	  s   "r]  c                 C   s   t jddg| d}t jd| d}t ||g}t|t jsJ tt| dks+J | }t|dks7J |jdks>J t|j	dksGJ |j	D ]}|j
ddgksUJ qJd S )	Nr   r   r  r   r   rf  r   r   )r   rN   rH   UnionDatasetr   ri   rF   r   r+  childrenr   )ro   rW   rX   rN   r   childr   r   r   (test_construct_from_mixed_child_datasets	  s$   
ra  c                  C   s6   t jg dd} |  }|jdksJ |jdksJ d S )Nro  r   r   )r   rN   r   r   r+  )emptyr   r   r   r   test_construct_empty_dataset	  s   rc  c               	   C   sf   t jg dtdt fdt fgd} tjtdd | 	  W d    d S 1 s,w   Y  d S )Nro  rW   r   r9   zMultiple matches for .*a.* in r   )
r   rN   r;   r9   r>   r@   r   r   r|  r   )rb  r   r   r   *test_construct_dataset_with_invalid_schema	  s   



"re  c                    s|  t j| tdt  d}t j| tdt  d}tjjtt	dgdgd tjjtt	dgdgd}t
jtdd	 t ||g W d    n1 sQw   Y  d
}t
jt|d	 t g d W d    n1 sqw   Y  d}t
jt|d	 t d  W d    n1 sw   Y  d}t
jt|d	 t  fddt	dD  W d    n1 sw   Y  d}t
jt|d	 t g  W d    n1 sw   Y  d}t
jt|d	 t  |g W d    n1 sw   Y  d}t
jt|d	 t  dg W d    n	1 sw   Y  d}t
jt|d	 t  dg W d    d S 1 s7w   Y  d S )Nr  r   /schemar   rW   rm  rX   z"Expected.*FileSystemDatasetFactoryr   zExpected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types: intr  zbExpected a path-like, list of path-likes or a list of Datasets instead of the given type: NoneTypezcExpected a path-like, list of path-likes or a list of Datasets instead of the given type: generatorc                 3       | ]} V  qd S r   r   rj  batch1r   r   rV  	      z<test_construct_from_invalid_sources_raise.<locals>.<genexpr>rU   zEMust provide schema to construct in-memory dataset from an empty listzFItem has schema
b: int64
which does not match expected schema
a: int64z}Expected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types:r   zCExpected a list of tables or batches. The given list contains a int)r   r   rd   r   r   r;   r  r  r  r,   r   r   r   rN   r|  InMemoryDataset)r   child1child2batch2rM  r   rh  r   )test_construct_from_invalid_sources_raise	  sd   $ro  c                 C   s   t jjt tdgdgd}t j|j|g}t j|g}t	j
g dt g d }|t g ks5J |||g|g|fD ]6}t	
|}| ||ksNJ tt| dksZJ t|  |ksfJ t jt| |kstJ q>d S )Nr   rW   rm  ro  rd  r   )r;   r  r  r  r,   RecordBatchReaderrl   r9   rA   r   rN   r   r   r   ri   rF   r/   r   )r   rt   r   r   dataset_tablesourcerN   r   r   r   test_construct_in_memory	  s   
rs  r   c              	      s   t jjt tdgdgd t j gd} fddd ffddd f fdd jffD ]2\}}tj	j| || d	}|
 ksFJ tjt j|d
 |
  W d    n1 s]w   Y  q0d S )Nr   rW   rm  z#OneShotFragment was already scannedc                      s   t j j gS r   )r;   rp  rl   r9   r   rt   r   r   r  
  s    z$test_scan_iterator.<locals>.<lambda>c                      s   t  S r   )r   r   r   r   r   r  
  s    c                      s    fddt dD S )Nc                 3   rg  r   r   rj  rt  r   r   rV  
  rj  z7test_scan_iterator.<locals>.<lambda>.<locals>.<genexpr>r   r,   r   rt  r   r   r  
  r  r9   r   r   )r;   r  r  r  r,   rA   rl   r9   r   r4  r   r   r   r5  )r   r   r   r9   r   r   )rt   r   r   test_scan_iterator
  s$   

rw  c                 C   s   t tddgd dgd  d}| d }|  tdD ]}|d	|  }|  t|d| d|d
  q|dt jdgd dgd  dgd  t 	 d}||fS )Nr,  r  r   r  r   rV   zdataset-partitionedrU   zpart=ru  r   r   r   r   r
  )
r;   r   r,   rJ  rm   rn   r[  append_columnr  r   )r  r   rr   r7   r   
full_tabler   r   r   _create_partitioned_dataset$
  s   $,rz  c           
      C   sj  t | \}}|ddg}t|||| tjt|tjddd}|j|js*J t	|  tjdtjddd}|j|jsCJ W d    n1 sMw   Y  tjt|dd}|j|jsdJ tjt|tjt
dt
 fgddd}|jt
dt
 }|j|sJ | }|dt
jdgd	 d
gd	  dgd	  t
 d}	||	sJ d S )NrW   rX   r   ra  r   zdataset-partitioned/r   r   rU   r   r   r
  )rz  rd  r:  r   rN   rZ   r   r9   r   r
   r;   rx  r-   r<   r   rx  r  )
r  r   r  ry  rr   r   rN   r  r#  rM  r   r   r   'test_open_dataset_partitioned_directory5
  s8   

,r|  c                 C   s   t | \}}tt|}|j|jsJ tjt|t d}|j|js*J t|  tjdt d}W d    n1 sBw   Y  |j|jsPJ t	
t tjt|t d W d    d S 1 slw   Y  d S )Nr  ru  )r  r   rN   rZ   r9   r   rd   ru   r
   r   r   r  re   )r  r   rr   rF  rG  dataset3r   r   r   test_open_dataset_filesystemZ
  s   
"r~  c                 C   sP   t | \}}tjtdd tj|gdd W d    d S 1 s!w   Y  d S )Nz format 'blabla' is not supportedr   blablar   )r  r   r   r|  r   rN   )r  rG  rr   r   r   r   $test_open_dataset_unsupported_formatq
  s   "r  c                 C   s`   t | \}}t|}t||g}t|tjsJ |||}||||ks.J d S r   )r  r   rN   rH   r^  r  r  r   )r  r   r  rG  rr   rN   unionrG  r   r   r   test_open_union_datasetx
  s   
r  c                 C   sT   t jd| dd}tjtdd t j|gdd W d    d S 1 s#w   Y  d S )Nr  r   r  zcannot pass any additionalr   r   )r   rN   r   r   r|  )r   r`  r   r   r   .test_open_union_dataset_with_additional_kwargs
  s   "r  c                   C   s|   t t tjddd W d    n1 sw   Y  t jtjdd tjddd W d    d S 1 s7w   Y  d S )Nzi-am-not-existing.arrowro  r   zcannot be relativer   zfile:i-am-not-existing.arrow)r   r   r  r   rN   r;   r5  r   r   r   r   #test_open_dataset_non_existing_file
  s   "r  r   rq   r   r^  r]  partition_keysr  BCr  )DEFr  )r   NrU   )r  Nr  )Nr   rU   c                    sl  t tddgd dgd  d}d |d v pd |d v }|d	kr&|r&d S |d	kr9tjjd
dg d}d}d }n|rDtjj |d}ntjj d}d}|rR|}nd}| d }	|	  |\}
}|
D ]!}|D ]}|	||pn||pq| }|jdd t	
||d  qfqbtjt|	|d} fdd}|jt d
||
d t d||d }|j|sJ d S )Nr,  r  r   r  r   rV   r   r   rq   part1part2r  z{0}/{1})r  r^  zpart1={0}/part2={1}__HIVE_DEFAULT_PARTITION__rN   T)parentsru  r{  c                    sH    rt | trt nt }tt |S t | tr t S t S r   )rH   rZ   r;   r@   r   rw  )r   
value_typer  r   r   r  
  s   z/test_partition_discovery.<locals>.expected_type)r;   r   r,   r   r   r  rq  rJ  r   rm   rn   rN   rZ   r9   r-   r<   r   )r  r   r^  r  r  r   has_nullfmt
null_valuebasepath
part_keys1
part_keys2r  r  rr   rN   r  r  r   r  r   test_partition_discovery
  sT   $r  c           	      C   sV  t tddgdtdd}tj|dgjdd}tj	|| |d	d
 tj
| d	tjjddd}t |d |d  d}| |sIJ t| d }|j|jd|d d saJ |j}|||}| |suJ |||}|j|jd|d d sJ |j|jd |d d  sJ |j|sJ d S )Nr  r  r   r   r   r_  r   r   ra  r  r   r   Tr  rp  r_  )r_  r   r   rc   )r;   r   r  r  r,   r   r   rd  r9   r{  rN   rq  r  rK  r   r   ri   rF   r   r  r  r  )	r  r  r   r   rN   rM  rO   	part_exprrV  r   r   r   4test_dataset_partitioned_dictionary_type_reconstruct
  s,      r  c              	   C   s   ddl m} | d \}}}}d| d| d| d| d	}||\}}|d td	g d
i}	|d}
t|	|
 W d    n1 sHw   Y  |	|||||||fS )Nr   
FileSystem
connections3://:z5@mybucket/data.parquet?scheme=http&endpoint_override=z&allow_bucket_creation=TruemybucketrW   r  zmybucket/data.parquet)	r   r  from_urirg   r;   r   rh   rm   rn   )	s3_serverr  r(  r)  r*  r+  r'  rd   rr   r   rs   r   r   r   r&  
  s   
r&  c                 C   s^   | \}}}}}}}}t j|dd}|||sJ t j|d|d}|||s-J d S )Nr   r   r   r   )r   rN   r   r   )r&  r   r   rr   rd   r'  rG  rN   r   r   r   test_open_dataset_from_uri_s3  s
   r  c           
      C   sP   | \}}}}}}}}t d}||}tj|d|d}	||	|s&J d S )Nr  r   r  )rd   r   r  r   rN   r   r   )
r&  r   r   rr   r   r'  rG  r   finfosrN   r   r   r    test_open_dataset_from_fileinfos  s
   

r  c                 C   s   | \}}}}}}}}t d}ddlm}	m}
 |j||dd| d| id}tj|d|d	}| 	|s8J |
|	|}tj|d|d	}| 	|sOJ d S )
Ns3fsr   )FSSpecHandlerrv   endpoint_urlzhttp://r  )r   secretclient_kwargsr   r  )
r   importorskipr   r  rv   S3FileSystemr   rN   r   r   )r&  r   rr   rG  r(  r)  r*  r+  r  r  rv   rd   rN   r   r   r   $test_open_dataset_from_uri_s3_fsspec&  s   
	r  c                 C   sD  ddl m} | d \}}}}d}d}d| d| d| d	| d
| d| d}||\}	}|dks4J |	| tdg di}
|	|}t|
| W d    n1 sXw   Y  t	j
|dd}| |
smJ d||||}g d}|D ]\}}||}t	j
||dd}| |
sJ q{tjtjdd |d	}t	j
d|d W d    n1 sw   Y  d}d}||}tt}t	j
d|d W d    n1 sw   Y  t|j|d||ksJ d}||}tt}t	j
d|d W d    n	1 sw   Y  t|j|d||ks J d S )Nr   r  r  theirbucketnested/folder/data.parquetr  r  @r   z?scheme=http&endpoint_override=z&allow_bucket_creation=truez&theirbucket/nested/folder/data.parquetrW   r  r   r   3s3://{}:{}@{{}}?scheme=http&endpoint_override={}:{}))ztheirbucket/nested/folder/z/data.parquet)ztheirbucket/nested/folderdata.parquet)ztheirbucket/nested/folder/data.parquet)ztheirbucket/nestedr  )r  z/nested/folder/data.parquet)r  r  r  zMissing bucket namer   z'/theirbucket/nested/folder/data.parquetr  zThe path component of the filesystem URI must point to a directory but it has a type: `{}`. The path component is `{}` and the given filesystem URI is `{}`ztheirbucket/doesnt/existr  NotFoundFile)r   r  r  rg   r;   r   rh   rm   rn   r   rN   r   r   r   r   r   r5  r|  rZ   r$   )r  r  r(  r)  r*  r+  bucketrr   r'  rd   r   rs   rN   templaterQ  prefixr  excr   r   r   -test_open_dataset_from_s3_with_filesystem_uri@  s\   




"r  c                 C   sD   t | \}}td}|d}tj||d}|j|js J d S )Nfsspecfiler  )r  r   r  r   r   rN   r9   r   )r  r   rr   r  r{   rN   r   r   r   test_open_dataset_from_fsspec  s
   

r  c           	      C   s   t d}tdg di}| d }t|| |d}|| d ds)J t	
 }tt|}|||}||jsCJ |||}|j|jsRJ d S )Nr  rW   r  r  r  r   )r   r  r;   r   rm   rn   r   lsendswithr   r   rd   rv   r  r  r   r9   r   rY  )	r  r  r   rr   	fsspec_fsr   r   r9   rO   r   r   r   test_file_format_inspect_fsspec  s   

r  c                 C   s  | d }t ddgd tdd}tj|dgjdd	}tj|||d
d tjt dt dfgdd	}tj	|d
|d}t
dtdk}|j||d}|d g dks]J dd l}t
d|dddk}|j||d}|d g dksJ d S )Ntest_partition_timestamps
2012-01-01z
2012-01-02r   r   )datesr  r  r   ra  r  r  r  rp  r   r  )r   rU   r   r*  r,  r   i  r   )r;   r   r,   r   r   rd  r9   r{  r  rN   r<   r0   	Timestampr   r  r<  r(   )r  r   rr   r   r   rN   r"  r(   r   r   r   test_filter_timestamp  s$   
r  c                 C   sh   t dt jg dt  di}t| |\}}tt|}tddk}t	|j
||ddks2J d S )NrW   )r   r   r   rU   r   r   r
  r   r   rU   )r;   r   r  rx  r  r   rN   rZ   r<   r   r   )r  r   r   rG  rr   rN   filter_r   r   r   test_filter_implicit_cast  s
    r  c                 C   s^   t dg di}t| |\}}tt|}|j|tdtd kd}|j	dks-J d S )Nr  )rW   rX   Nr   r   )
r;   r   r  r   rN   rZ   r   r<   r   r   )r  r   r   rG  rr   rN   r   r   r   test_filter_equal_null  s   r  c           	      C   s   t g ddd tdD dd tddD d}t| |\}}tt|}tt	d	t 
d
dg}|j||djdksBJ tt	ddk}|j||djdksXJ tt	dt	d}|j|d|id}|d  g dksyJ d S )N)rW   rX   NrW   r  c                 S   s   g | ]
}t  d dd|qS i  r   r  r   r   r   r   r]         z2test_filter_compute_expression.<locals>.<listcomp>r   c                 S   s   g | ]	}t  d d|qS r  r  r   r   r   r   r]     r}   r   r  r  r  rW   rX   r   rU   r  r   r  r   r&   r	  )r;   r   r,   r  r   rN   rZ   r  is_inr<   r  r   r   hourdays_betweenr<  )	r  r   r   rG  rr   rN   r  r   r#  r   r   r   test_filter_compute_expression  s   r  c                 C   s   t j| tdt  d}t |g}t| dksJ tdd | D s*J | d 	|
 s7J |
 	|
 sBJ t| t jsLJ d S )Nr  r   r   c                 s   s    | ]	}t |tjV  qd S r   )rH   r;   r  )r[   r  r   r   r   rV    s    z%test_dataset_union.<locals>.<genexpr>r   )r   r   rd   r   r   UnionDatasetFactoryr   r  rs  r   r  rH   r   r  )r   r`  r   r   r   r   test_dataset_union  s   
r  c                 C   s  t jd|dd}t jd|dddgd}t jd|dd	d}|j|j  kr*|jks-J  J t |||g}t|t js=J d
}tjt|d t j||g|d W d    n1 sZw   Y  tdt	 fdt
 fdt fdt fdt fdt fdt fg}|j|sJ | j|sJ t ||g}tdt	 fdt
 fdt fdt fdt fdt fg}|j|sJ | j|sJ tdt fdt fdt	 fg}t j||g|d}| j|sJ tdt fdt fdt fg}t j||g|d}| j|s$J tjtddgd dgd  dgg dd}t| |d\}	}
t |
}tjtjdd t ||g W d    d S 1 scw   Y  d S )Nr  r   r  rf  weekr%   r   r   r   /hiver   z$cannot pass any additional argumentsr   r  r"   r#   r$   r   r   rc   r.  r,  r  r   r  r   	abcdefghj)r"   r$   r#   rm  r   zUnable to merge)r   rN   r9   rH   r^  r   r   r|  r;   r=   r>   r?   r@   r   r   r   r   r,   r  ArrowTypeError)r  r   rl  rm  child3	assembledmsgr  r   rG  rr   child4r   r   r   &test_union_dataset_from_other_datasets  st   

"






	











 
$r  c                 C   sJ   d}t jt|d tjg d| d W d    d S 1 sw   Y  d S )Nz8points to a directory, but only file paths are supportedr   )r  rf  r  r  )r   r   IsADirectoryErrorr   rN   )r   r  r   r   r   4test_dataset_from_a_list_of_local_directories_raisesC  s   "r  c              
   C   s   t t jd| dt jd| dt jd| dg}tdt fdt fdt fdt fg}|j|s8J t t jd| dt jd| dt jd| d	d
g}tdt fdt fdt fdt fdt	 fdt	 fg}|j|s{J d S )Nr  r  rf  r  r"   r#   r$   r%   r   )r   r   r   r   )
r   rN   r;   r9   r=   r>   r?   r@   r   r   )r   rN   r  r   r   r   &test_union_dataset_filesystem_datasetsI  s4   









r  c                    s  t g dg dd}t|d  d fdd	}d }|}||||jd |j}|}||| t dd	g}t jg dg dgd
dgd}||| t d	g}t jg dgdgd}||| t d	dg}t jg dt jg dddgddgd}||| t ddg}tjtd |d}t j|d 	d|d
 gdd
gd}||| t dt 
t  fdg}tjtd |d}|j|sJ tjtdd  | W d    d S 1 sw   Y  d S )Nr  r  r  r  rV   r  c                    s\   t jtd | d}|d ur|j|sJ n|j| s J  |}||s,J d S )Nr  rc   )r   rN   rZ   r9   r   r   )r9   rM  r  rN   r#  r   r  r   r   r5  n  s   
z-test_specified_schema.<locals>._check_dataset)r  )rX   r?   )rW   r>   rX   rW   rm  )r  r   NNNr   r
  r  )rW   r   rc   z#Unsupported cast from int64 to listr   r   )r;   r   rm   rn   r9   r  r   rN   rZ   r  list_r   r   r   r   r  r   )r  r   r   r5  r9   rM  rN   r   r  r   test_specified_schemai  sL   






"r  c                 C   s   | d }t dg di}t|| t dt  fg}tjt|gd |d}|j	|s1J |
|}tjtdd | }|  W d    d S 1 sQw   Y  d S )Nr  rW   r  d   rc   z#Unsupported cast from int64 to nullr   )r;   r   rm   rn   r9   r  r   rN   rZ   r   r   r   r   r  r8  r9  )r  r   fnr   r9   rN   r   r   r   r   r   test_incompatible_schema_hang  s   

"r  c           	      C   s   t t jg dddt jg dddd}t| d }t |}t ||j}|| d  |	  W d    n1 s@w   Y  t
j|t
 d	}||}||sZJ t| d
D ]}t
j||d	}||}||suJ q`d S )Nr  rx  r
  r  r?   rV   z
test.arrowr   r   )ro  arrow)r;   r   r  rZ   output_streamRecordBatchFileWriterr9   write_batchr   r  r   rN   r  r   r   rQ   )	r  r   r   rr   r  r  rN   r#  
format_strr   r   r   test_ipc_format  s$   


r  c              	   C   s  ddl m} ttjg dddtjg dddd}t| d	 }||| tj|t	 d
}t
| }t|d tjsAJ ||}|jdd ||sSJ t| tj|dd
}||}|jdd ||spJ |j|dgd}|jdd ||dgsJ |j|dtdd id}|jdd |tdtjg dddisJ ||dksJ |j|tddkddksJ d S )Nr   orcr  rx  r
  r  r?   rV   test.orcr   T)fullr  rX   r&   b2r   )r  r  g333333?rU   rW   r   r   )r   r  r;   r   r  rZ   rn   r   rN   r  ri   rF   rH   FileFragmentr   validater   rQ   rd  r<   r   )r  r   r  r   rr   rN   r   r#  r   r   r   test_orc_format  s:   

$r  c                 C   s   ddl m} ttjg dddtjg dddd}t| d	 }||| tj|d
d}t	|
|}t|dks>J |d jdksGJ |d |
 d sTJ d S )Nr   r  r  rx  r
  r  r?   rV   r  r  r   r   rU   )r   r  r;   r   r  rZ   rn   r   rN   ri   r   r   r   r   )r  r   r  r   rr   rN   r#  r   r   r   test_orc_scan_options  s   r  c                  C   sh   z	ddl m}  W d S  ty3   tjtdd tjddd W d    Y d S 1 s+w   Y  Y d S w )Nr   r  z'not built with support for the ORC filer   r  r  r   )r  r  r  r   r   r|  r   rN   r  r   r   r   test_orc_format_not_supported	  s   &r  c                  C   s   t jtdd tjtdtdiddd W d    n1 s!w   Y  t } t jtdd | 	  W d    d S 1 sAw   Y  d S )Nz9Writing datasets not yet implemented for this file formatr   rW   r   r  z/tmp)r   r.  )
r   r   r  r   r{  r;   r   r,   r  make_write_options)ofr   r   r   +test_orc_writer_not_implemented_for_dataset  s   
"r  c                 C   s   t t jg dddt jg dddd}t| d }| j|dd	 tj|t d
}|	|}|
|s:J t| tj|dd
}|	|}|
|sQJ d S )Nr  r>   r
  r  r?   rV   test.csvFr#   r   r  )r;   r   r  rZ   r  to_csvr   rN   r  r   r   rQ   )r  r   r   rr   rN   r#  r   r   r   test_csv_format&  s   

r   compression)bz2gziplz4zstdc                 C   s   t j|st| d ttjg dddtjg dddd}t	 }|dkr.|nd	}t
| d
|  }|j||d}| jdd}||d W d    n1 s[w   Y  tj|t d}	||	}
|
|suJ d S )Nz support is not builtr  r>   r
  r  r?   rV   r  gzz	test.csv.r  Fr  r3  r   )r   Codecis_availabler   skipr;   r   r  rd   ru   rZ   rh   r  r  writerB  r   rN   r  r   r   )r  r  r   r   r   suffixrr   r  csv_strrN   r#  r   r   r   test_csv_format_compressed9  s   
r  c              	   C   s  t | d }t|d}|d W d    n1 sw   Y  tj|dd}||}|tdt	g dis=J tj|tj
tjjdd	d
d}||}|tdt	ddgiscJ tj|tj
tjjdgdd
d}||}|tdt	g disJ d S )Nr  wzskipped
col0
foo
bar
r  r   skipped)col0r  r  r   )r  r  r  r  r  r;  )r  r  r  r  )rZ   rG   r  r   rN   r   r   r;   r   r  r  r  r  )r  r   rr   r  rN   r#  r   r   r   test_csv_format_optionsT  s*   



"


r  c              
   C   s   t | d }t|d}|d W d    n1 sw   Y  tj|tjtjjdddd}|	|}g d}|j
|ks@J |ttd	gtd
gtdgtd	gds_J d S )Nr  r  z1,a,true,1
T)autogenerate_column_namesr  r   )f0r  rl  r  r   rW   )rZ   rG   r  r   rN   r  r;   r  r  r   r;  r   r   r  )r  r   rr   r  rN   r#  expected_column_namesr   r   r   (test_csv_format_options_generate_columnsi  s   





r  c           	   	   C   s*  t | d }t|d}|d W d    n1 sw   Y  tj|dd}tjjdgdd}tj|t	jj
d	d
d}|j||d}|t	dt	g disTJ tj|d}tj||d}||}|t	dt	g diswJ t }|j||d}|t	dt	g disJ d S )Nr  r  zcol0
foo
spam
MYNULL
r  r   MYNULLT)null_valuesr  r  r  )r  r  )fragment_scan_optionsr  )r  spamNr  )r  r  r  )rZ   rG   r  r   rN   r   r  r  r  r;   r  r   r   r   r  r  )	r  r   rr   r  rN   r  r   r#  rF  r   r   r   test_csv_fragment_optionsy  s.   
"
"r  c                 C   s   t t jg dddt jg dddd}t| d }| jdd	d
d dd}t|d}|| W d    n1 sAw   Y  t	j
|t	 d}||}||s[J t| t	j
|dd}||}||srJ d S )Nr  r>   r
  r  r?   rV   	test.jsonrecordsorientr   r  },{}
{r  r   r  )r;   r   r  rZ   r  to_jsonreplacerG   r  r   rN   r  r   r   rQ   r  r   r   rr   rs   r  rN   r#  r   r   r   test_json_format  s    

r&  c                 C   s  t t jg dddt jg dddd}t| d }| jdd	d
d dd}t|d}|| W d    n1 sAw   Y  t	j
tdd tj|tjt jjdddd}W d    n1 shw   Y  tj|tjt jjdddd}||}||sJ d S Nr  r>   r
  r  r?   rV   r  r  r  r   r  r!  r"  r  ztry to increase block sizer   r   r  r  r   @   )r;   r   r  rZ   r  r#  r$  rG   r  r   r   r|  r   rN   r  r  r  r   r   r%  r   r   r   test_json_format_options  s(    



r)  c           	      C   s*  t t jg dddt jg dddd}t| d }| jdd	d
d dd}t|d}|| W d    n1 sAw   Y  t	j
tdd tjt jjddd}tj|t|d}W d    n1 smw   Y  tjt jjddd}tj|t|d}||}||sJ d S r'  )r;   r   r  rZ   r  r#  r$  rG   r  r   r   r|  r   r  r  r  rN   r  r   r   )	r  r   r   rr   rs   r  r   rN   r#  r   r   r   test_json_fragment_options  s,    
r*  c              	   C   s   t | d }dD ]^\}}t|d}|| W d    n1 s!w   Y  tdt fdt fg}tjdgdgd|d	}tjj|d
}t	j
|d}	t	j||	d}
|
j|s]J |
 |sfJ qd S )Nr  ))latin-1s   a,b
un,lphant)utf16s    a , b 
 u n ,  l  p h a n t wbrW   rX   un
   éléphantrV   rc   encodingr  r   )rZ   rG   r  r;   r9   r@   r   r  r  r   r  rN   r   r   )r  r   rr   r1  
input_rowsr  r  expected_tabler  r   dataset_transcodedr   r   r   test_encoding  s"   r5  c           
      C   s  t | d }t|d}|d W d    n1 sw   Y  tdt fdt fg}tjdgdgd|d	}tj|d
|d}t	j
tjjdd || W d    n1 s\w   Y  tjjdd}tj|d}tj||d}	|	j|s}J |	 |sJ d S )Nr  r-  s   ,b
un,lphant   érX   r.  r/  )r6  rX   rc   r  rd  zinvalid UTF8r   r+  r0  r  r   )rZ   rG   r  r;   r9   r@   r   r   rN   r   r   r   r$  r5  r   r  r  r  r   )
r  r   rr   r  r  r3  rN   r  r   r4  r   r   r   test_column_names_encoding  s&   r7  c                 C   sT  ddl m} ttjg dddtjg dddd}| d	 }|  ||t|d
  tj|t	 d}|
|}||sBJ t| tj|dd}|
|}||sYJ |j
|ddgd}|jddgkskJ |j
|ddgd}|jddgks}J ||t|d dd tt |
tj|dd W d    d S 1 sw   Y  d S )Nr   )r  r  rx  r
  r  r?   rV   feather_datasetr  r   r  rX   rW   r&   zdata1.featherr   version)pyarrow.featherr  r;   r   r  rJ  rZ   r   rN   r  r   r   rQ   r;  r   r   r|  )r  r   r  r   r  rN   r#  r   r   r   test_feather_format  s,   

"r<  )r  r  brotlic                 C   s  t t jdgd ddt jg dd ddd}t j|s#t  | d	 }|  t	 }| d
 }|  tj
|t|d ||jd dd |dkrtjtdd |j|d}W d    n1 sdw   Y  tjtdd t |}|j|d}W d    d S 1 sw   Y  d S |j|d}tj
|t|d ||d tj|t	 d}	||	}
|
|sJ |d d }| j}|d d }| j}||k sJ d S )Nr   ,  rx  r
  r  r  r?   rV   feather_dataset_compressedfeather_dataset_uncompressedz
data.arrowr  r   file_optionsr=  zCompression typer   r   part-0.arrow)r;   r   r  r  r	  r   r
  rJ  r   r  r{  rZ   r  r   r|  rN   r   r   statst_size)r  r  r   r   r  r   uncompressed_basedirwrite_optionscodecrN   r#  compressed_filecompressed_sizeuncompressed_fileuncompressed_sizer   r   r   test_feather_format_compressed#  sX   







rM  c                 C   sp   g }t dD ]}t|gd dd t dD d}tj|t| |d qt| d }tj|j||d ||fS )zO
    Creates a simple (flat files, no nested partitioning) Parquet dataset
    r   r   c                 S   s   g | ]}t   qS r   rh  rj  r   r   r   r]   `  r  z2_create_parquet_dataset_simple.<locals>.<listcomp>rr  metadata_collector	_metadata)r,   r;   r   rm   rW  rZ   write_metadatar9   )	root_pathrO  r7   r   metadata_pathr   r   r   _create_parquet_dataset_simpleX  s   $
rT  c                 C   s\   | d }t |\}}t|}|j|jsJ t|jdks!J | }|jdks,J d S )NrT  r   (   )	rT  r   parquet_datasetr9   r   r   r   r   r   )r  rR  rS  r   rN   r#  r   r   r   test_parquet_dataset_factoryn  s   
rW  win32z'Results in FileNotFoundError on Windows)reasonc           	      C   s   t d}| d }t|\}}|d}tt|}tj||d}|j	
|j	s,J t|jdks5J | }|jdks@J d S )Nr  rT  r  r  r   rU  )r   r  rT  r   rd   rv   r  r   rV  r9   r   r   r   r   r   )	r  r  rR  rS  r   r  r   rN   r#  r   r   r   #test_parquet_dataset_factory_fsspecz  s   

rZ  c                 C   s   | d }t dgd tjdd}g }tj|t||d t|d }tj|j	||d t
|}|j	|j	s<J | }|jdksGJ d S )NrT  r   r   rr  rN  rP  )r;   r   r  ri  randnrm   rW  rZ   rQ  r9   r   rV  r   r   r   )r  rR  r   rO  rS  rN   r#  r   r   r   &test_parquet_dataset_factory_roundtrip  s   	

r\  c           	   	   C   s   g }t dD ]-}tdtt |d |d d i}| | d }tj|||d |d | d qt| d }t|j	|| t
|}| }|d }|tt dd	ks]J d S )
Nr   r  r   rR   rN  r  rP  r   r  )r,   r;   r   ri   rm   rn   set_file_pathrZ   rQ  r9   r   rV  r   r  r<  )	r  	metadatasr7   r   
table_pathrS  rN   scanned_tablescanned_colr   r   r   "test_parquet_dataset_factory_order  s   
rb  c                 C   s   | d }t |\}}t|dd   t|}|j|js#J t|j	dks,J t
t |  W d    d S 1 sAw   Y  d S )Ntest_parquet_dataset_invalid	*.parquetr   r   )rT  ri   globunlinkr   rV  r9   r   r   r   r   r   r  r   )r  rR  rS  r   rN   r   r   r   $test_parquet_dataset_factory_invalid  s   

"rg  c                 C   sz   t t| d}t|d j }g }|D ]}t|j}|t	|
|  || q| d }tj|||d |S )Nrd  r   rP  rN  )ri   r  rglobrm   ParquetFiler9   to_arrow_schemar  r]  rZ   rK  r-   rQ  )rR  parquet_pathsr9   rO  rr   r  rS  r   r   r   _create_metadata_file  s   rl  c              	   C   sr   t jt tdt tjdt tddgdgg dd}|ddi}t	j
|t| d	gd
 t| |fS )Nrf  rW   rX   r   rk  rm  r   r$   r   r`  )r;   r   r  r,   r  ri  r[  r  rC   rm   rW  rZ   rl  )rR  r   r   r   r   #_create_parquet_dataset_partitioned  s   rm  c                 C   s   | d }t |\}}tjdd}tj||d}|j|js J t|jdks)J | }|j	dks4J |
 djdd	}|
 }tj|| d S )
N(test_parquet_dataset_factory_partitionedr   ra  r{  r   rf  r  Tdrop)rm  r   r   rV  r9   r   r   r   r   r   r  sort_valuesreset_indexr0   testingassert_frame_equal)r  rR  rS  r   r   rN   r#  rM  r   r   r   rn    s   rn  c                 C   sh   | d }t |\}}tj|dd}|j|jsJ d|jjv s"J t| }d|d jjv s2J d S )N%test_parquet_dataset_factory_metadatar   r{     keyr   )	rm  r   rV  r9   r   r  ri   rF   rY  )r  rR  rS  r   rN   r   r   r   r   ru    s   ru  c           
      C   sX  |\}}| d }t |\}}||g tj|tjdd|d}W d    n1 s*w   Y  |g  t| }W d    n1 sDw   Y  |g  t|tddk W d    n1 sdw   Y  |g  |d tddk W d    n1 sw   Y  |g  |d  }	|	d   W d    d S 1 sw   Y  d S )N#test_parquet_dataset_lazy_filteringr   ra  )r   r   r     r   )	rT  r   rV  r   ri   rF   r<   r   rx  )
r  r   rd   r   rR  rS  rG  rN   r   rg_fragmentsr   r   r   rw    s.   




"rw  c                 C   sp   t dg di}| d }|| t|}||j}|j|dgdj}d|jv s-J |j|dds6J d S )NrW   r  ru  r&   s   pandasTr   )	r0   r1   r  r   rN   r   r9   r  r   )r  r   rD   rr   rN   r9   r7  r   r   r   test_dataset_schema_metadata:  s   

rz  c                 C   s   t dt jg dddi}t|t| d  t dt  fg}tj	| d d|d}|j
|tddkd	}|d |d d
dsIJ t| d }|j
|tddk|d}|d |d d
dsoJ d S )Nr_  r  r   r
  r  r   rd  r   r   r>   r   r  )r;   r   r  rm   rn   rZ   r9   r>   r   rN   r   r<   r   r  r[  ri   rF   )r  r   r   r9   rN   filteredrO   r   r   r   test_filter_mismatching_schemaL  s   
"&r|  c                 C   s   t d ttdd}t| d }tj||dgd tj	|dd}|
|}|j
|dgd	}|d|ds>J d S )
Nza a b br   r  r%  r   r`  r   r{  r&   )r;   r   r	  ri   r,   rZ   rm   rW  r   rN   r   r  r   )r  r   r   rr   rN   all_cols	part_onlyr   r   r   +test_dataset_project_only_partition_columnsb  s   
r  c                 C   s   t dtjg dddi}| d }|j|dd tj|dtdt	 fgd	}t
dtg dt	 i}|||sBJ d S )
Nr_  r  objectdtypez(test_dataset_project_null_column.parquetr   r  r   rd  )r0   r1   r  r  r  r   rN   r;   r9   r>   r   r   r   )r  r   rD   r  rN   rM  r   r   r    test_dataset_project_null_columnr  s   r  c                 C   s   ddl m} tg dg dg dd}||| d  tj| d dd	}|j|td
tdj	dddtddkdd}tg dtj
g dddg dd}||s\J tjtdd |j|d
d
id W d    d S 1 sxw   Y  d S )Nr   r  r  )r  r  r  r4  r  r  r  r   r  r  r   Fsafer  rW   )	A_renamedB_as_intC_is_ar&   r
  )TFFzExpected an Expressionr   )r   r  r;   r   r  r   rN   r   r<   r  r  r   r   r   r   )r  r   r  r   rN   r#  rM  r   r   r   test_dataset_project_columns  s$   
"r  c           	      C   sr  t | \}}t|}t|jtjsJ t| \}}t|}t|jtjs(J tj|dd}|j}|d us8J t|tjs@J |jt	dt	
 fgksOJ t|jdksXJ |jd t	g dt	
 ksiJ tjt	dt	
 fgdd}t|tjsJ t|jdksJ tdd	 |jD sJ tj||d}|j}t|tjsJ |jt	dt	
 fgksJ t|jdksJ td
d	 |jD sJ tj|dd}tjt| |j|j|jd}|jd u sJ | d }t|\}}tj|dd}|j}|d usJ t|tjsJ |jt	dt	 fgksJ t|jdks'J t|jd  ddhks7J d S )Nr   r{  r   r   r   )r   r   r   ra  c                 s   rU  r   r   r   r   r   r   rV    rW  z6test_dataset_preserved_partitioning.<locals>.<genexpr>c                 s   rU  r   r   r   r   r   r   rV    rW  r   zdata-partitioned-metadatarW   rX   )r  r   rN   rH   r   r   rz  rq  r9   r;   r   r   re  r  rs  r   ri   rF   r   r   rm  rV  r@   r   r<  )	r  rG  rr   rN   ry  r   rG  rR  rS  r   r   r   #test_dataset_preserved_partitioning  sL   

" $r  c                 C   s   t t dt  t dt t  t  g}t jg dtt	dd|d}t
| d }tj||dgd t| d }|d |d ksOJ |d|ds\J d S )	Nr_  r   )NNrW   rW   r   r  rc   r%  r`  )r;   r9   r<   r>   rw  r   r@   r   ri   r,   rZ   rm   rW  r  r  r<  r   )r  r9   r   rr   actual_tabler   r   r   +test_write_to_dataset_given_null_just_works  s    

r  c                 C   s2   dd l m} |j| ||dfgd}|| |S )Nr   	ascending)r   )pyarrow.computecomputesort_indicesSortOptionsr   )tabsort_colr  sorted_indicesr   r   r   _sort_table  s
   r  c                 C   st   |p|}t j| |d|dd t|d}t|t|ksJ t j|d|d}t| |t|  |s8J d S )Nr  Fr   r   r   *rp  )	r   r{  ri   rh  r   rN   r  r   r   )rN   r.  expected_filesr  base_dir_pathr   
file_pathsrG  r   r   r   _check_dataset_roundtrip  s   
r  c                 C   s   | d }|   t|}t|}| d }|d g}t|t||d| | d }|d g}t|||d| | d }|   t|}t|}| d }|d g}t|t||d| d S )NrI  zsingle-file-targetrC  rW   zsingle-file-target2rR  zsingle-directory-target)rJ  r  r   rN   r  rZ   r3  )r  rq   rG  rN   targetr  r   r   r   test_write_dataset  s"   





r  c                 C   s   | d }t |}tjdd}tj||d}| d }|d |d d |d |d d g}tjtd	t fgdd}t|t||d
||d | d }|d |d d |d |d d g}ttd	t fg}t|t||d
||d d S )Npartitionedr   ra  r{  zpartitioned-hive-targetpart=arC  part=br   r  partitioned-dir-targetrW   rX   )	rm  r   r   rN   r;   r9   r@   r  rZ   )r  rq   rG  r   rN   r  expected_pathsr  r   r   r   test_write_dataset_partitioned*  s4   
r  c                    s   t g dg dd}tj| ddgd tj ddgd}|j} fdd|D }|h d	ks3J | }||s>J d S )
Nr  r  rV   ro  rX   rp  c                    "   h | ]}t t| jqS r   rZ   r6  r7  rK  r8  r  rX  r   r   r|   S      z6test_write_dataset_with_field_names.<locals>.<setcomp>>   r   r  r  r;   r   r   r{  rN   r   r   r   r  r   r  r   partitioning_dirsr  r   rX  r   #test_write_dataset_with_field_namesK  s   

r  c                    s   t g dg dd}tj| ddgdd tj ddd}|j} fd	d
|D }|h dks3J | }||s>J d S )Nr  r  rV   ro  rX   r   )r   r   partitioning_flavorrp  c                    r  r   r  r  rX  r   r   r|   d  r  z;test_write_dataset_with_field_names_hive.<locals>.<setcomp>>   b=xb=yb=zr  r  r   rX  r   (test_write_dataset_with_field_names_hive\  s   

r  c                 C   s   t g dg dg dd}tj|| ddgd tj| ddgd}t 5}tj|jddgd	|ddgd tj|ddgd}| }t	|
 |d

 ksSJ W d    d S 1 s^w   Y  d S )Nr  r  r  r4  ro  rX   rp  r  r&   rW   )r;   r   r   r{  rN   ry  rz  r   r   r~  r  drop_columnsr  r   rN   tempdir2r  r  r   r   r   test_write_dataset_with_scannerm  s"   



"r  c           	         sH  t  G fdddt}t|t ttdt	 g}tj
tttdg|d dd}dd	 fd
d}tjj| |d	dt jfddd}|  z;t fdd}d}d}| dk r|kr~|kr|d	}n}td | dk sq|sJ W d  |  d S d  |  w )Nc                       s   e Zd Z fddZdS )z6test_write_dataset_with_backpressure.<locals>.GatingFsc                    s       | jj||dS )Nr  )waitr   rh   )r   rr   r  consumer_gater   r   rh     s   zItest_write_dataset_with_backpressure.<locals>.GatingFs.open_output_streamN)r   r   r   rh   r   r  r   r   GatingFs  s    r  r6   r  rc   r          Tc                   3   s:    k rs	d S t d d7  V  k sd S d S )Ng{Gz?r   )r  sleepr   )rt   batches_readend
keep_goingr   r   counting_generator  s   
z@test_write_dataset_with_backpressure.<locals>.counting_generatorrv  c                      s   t jtd dS )Nr   r  )r   r{  rZ   r   )	gating_fsr   r  r   r   r    s    z6test_write_dataset_with_backpressure.<locals>.<lambda>)r  c                      s   t     S r   )r  r   )startr   r   duration  r   z6test_write_dataset_with_backpressure.<locals>.durationFr   r  )	threadingEventr   rd   rv   ru   r;   r9   r<   r   rk   r  ri   r,   r   r4  rl   Threadr  r  r  r   r  )	r  r  r9   min_backpressurer  write_threadr  
last_valuebackpressure_probably_hitr   )	rt   r  r  r  r  r  r   r  r  r   $test_write_dataset_with_backpressure  sJ   	




r  c                 C   s   t g dg dd}tj|| ddgd tj| ddgd}t ,}tj||ddgd tj|ddgd}| }t|	 |	 ksGJ W d    d S 1 sRw   Y  d S )Nr  r  rX   r  ro  rX   rp  )
r;   r   r   r{  rN   ry  rz  r   r~  r  r  r   r   r   test_write_dataset_with_dataset  s   

"r  c           	      C   s  | d }t g dg dd}tjt t dt  gdd}dd	 }tj|||d
d t g dg dd}t	t j
 tj|||d
d W d    n1 sTw   Y  t ddgi}|d d }tj|| tj|||d
dd t g dg dd}tj| d
|d }||| | sJ tj|||d
dd t g dg dd}tj| d
|d }||| | rJ d S )Nr   r  r  r  r  r   )r9   r  c                 S   s>   |   djdd}|  djdd}||sJ d S )NrX   Tro  )r  rq  rr  r   )rS  rT  df1df2r   r   r   compare_tables_ignoring_order  s   zGtest_write_dataset_existing_data.<locals>.compare_tables_ignoring_orderro  r  r4  r  rX   ezc=2z	foo.arrowoverwrite_or_ignore)r   r   existing_data_behavior)r  r   rW   rX   r  )r   r   r   rU   r   rp  delete_matching)r   rW   rX   r  r  )r;   r   r   r   r9   r<   r>   r{  r   r   r5  r   r  r  rN   r   exists)	r  rq   r   r   r  extra_table
extra_fileoverwrittenreadbackr   r   r    test_write_dataset_existing_data  sV   



r  r   r   r   c                    s    fddt | D S )Nc                    s   g | ]}t  qS r   )ri  randintrj  rq  rp  r   r   r]         z._generate_random_int_array.<locals>.<listcomp>ru  r  rp  rq  r   r  r   _generate_random_int_array  s   r  c                 C   sN   g }g }t | D ]}|t|d|d |dt|  qtj||d}|S )Nr   r  r  r6   rn  )r,   r-   r  rZ   r;   rk   )num_of_columnsnum_of_recordsr6   r;  r7   rk   r   r   r   _generate_data_and_columns  s   r  c                 C   s   t tt| d| S )Nz**/*.)r   ri   r6  r7  re  base_directoryr   r   r   r   _get_num_of_files_generated  s   r  c                    s   | d }d d}d}d}t ||}tj||d |d t|}|  d }t||ks.J g }t|D ]\}	}
|t|
 }tj|dd}|	|
 jd	  q4|t|ksXJ |t|ks`J t fd
d|D smJ d S )Nr   r   r   #   r   )r   max_rows_per_filemax_rows_per_groupr   r   r   c                 3   s    | ]}| kV  qd S r   r   )r[   file_rowcountr  r   r   rV  D  s    z7test_write_dataset_max_rows_per_file.<locals>.<genexpr>)r  r   r{  r  r  r   rf   rZ   rN   r-   r   shaperY  rs  )r  rq   r  r  r  rk   files_in_direxpected_partitionsresult_row_combinationrG  f_filef_pathrN   r   r  r   $test_write_dataset_max_rows_per_file#  s2   

r  c                    s   | d }d}d}d g d} fdd|D }|d }t j||||d	d
 t|}t|D ]>\}}	|t|	 }
t j|
d	d}| }| }t|D ] \}}|j	}|t
|d k re||krb||ksdJ qK||kskJ qKq.d S )Nr   r  r'  r   )
r   r   r   r   r   r   r   r   r   r   c                    s   g | ]}t  |qS r   )r  )r[   r  r  r   r   r]   Q  s
    z9test_write_dataset_min_rows_per_group.<locals>.<listcomp>min_rows_groupr   )r6   r.  min_rows_per_groupr  r   r   r   )r   r{  r  r  rf   rZ   rN   r   r   r   r   )r  rq   r  r  record_sizesrecord_batchesdata_sourcer  rG  r  r  rN   r   batchesr  rt   rows_per_batchr   r  r   %test_write_dataset_min_rows_per_groupH  s8   

r  c                 C   s   | d }d}d}d}t ||}|d }tj|||dd t|}g }|D ]"}	|t|	 }
tj|
dd}| }| }|D ]}|	|j
 q>q%|dd	gksPJ d S )
Nr   r  r      max_rows_groupr   )r6   r.  r  r   r   r6  )r  r   r{  r  r  rZ   rN   r   r   r-   r   )r  rq   r  r  r  rk   r  r  batched_datar  r  rN   r   r  rt   r   r   r   %test_write_dataset_max_rows_per_groupl  s.   
r  c                 C   s:  | d }d}d}ddg}t jg dg dg|d}t jg d	g d
g|d}t jg dg dg|d}t jg dg dg|d}t j||||g}	tjt || t  fgdd}
|d }tj|	||
|d dd }|||||\}}||ks{J |d }d}tj|	||
||dd |||||\}}||ksJ d S )Nr   r   r   c1c2)r   r   rU   r   r   r   )rW   rX   r  r  r  rW   r  )r   r  r*  r'  r   r   )rW   rX   r  r  r  r  )r,  r   r7  r6  r   r   )rW   rX   r  r  r  r  )r&  r  rx  r&  r   r   )rW   rX   r  r  r  rX   r   ra  default)r6   r.  r   r   c                 S   s(   t | |d}ttj|| }||fS )Nr  )r  r   r;   r  unique)r  rk   r   col_idnum_of_files_generatednumber_of_partitionsr   r   r   _get_compare_pair  s
   z<test_write_dataset_max_open_files.<locals>._get_compare_pairmax_1rU   F)r6   r.  r   r   max_open_filesr   )	r;   rk   rA   rl   r   r   r9   r@   r{  )r  rq   r   partition_column_idr;  record_batch_1record_batch_2record_batch_3record_batch_4r   r   data_source_1r  r  r  data_source_2r	  r   r   r   !test_write_dataset_max_open_files  sh   




r  c                 C   s   | d }t |}tj|tjjddd}| d }|d |d d |d |d d g}tjt|jd	gd	t	ddgid
}t
|t||d||d d S )Nr  Tr  r{  r  rW   rC  rX   r   rd  r  )rm  r   rN   rq  r  r   r;   r9   r<   r  r  rZ   )r  rq   rG  rN   r  r  r   r   r   r   #test_write_dataset_partitioned_dict  s&   

r  c                    s   | d }t |}tj|dd}tjtdt fgdd}| d }g   fdd}tj||d	|d
|d |d d |d d h}tt	t
j }||ksOJ | d }	tj||	d	|dd tj|d	|d}
tj|	d	|d}|
 | sxJ d S )Nr  r   r{  r   ra  partitioned1c                    s     | j d S r   )r-   rr   written_filepaths_writtenr   r   file_visitor  s   z4test_write_dataset_use_threads.<locals>.file_visitorr  Tr   r   r   r  r  part-0.featherr  partitioned2Fr  rp  )rm  r   rN   r   r;   r9   r@   r{  r   rj   r6  r7  r   r   )r  rq   rG  rN   r   target1r  r  paths_written_settarget2result1result2r   r  r   test_write_dataset_use_threads  s4   

r!  c                 C   s   t dtdi}|jdd}tj|| dddd t| jdd	d  }d
}|D ]}t	|}||ks;J d| |}q*d S )NrW   rJ  r   )max_chunksizer   T)r   r   preserve_orderFr   r  z!Sequence expected to be ordered: )
r;   r   r,   r   r   r{  rN   r   to_numpyr}  )r  r   r  seqprevitemcurrr   r   r   -test_write_dataset_use_threads_preserve_order  s   
r)  c           
         s  t jt tdt dd tdD t dgd dgd  gg dd}| d	 }tj||d
dd t|d}|d g}t|t|ksIJ tj	|dd
 }||sYJ | d }|d |d d |d |d d g}g  g  fdd}tjt dt  fgdd}tj||dd
||d t|d}t|t|ksJ dd  D }|ksJ tj	|d|d}|
 |sJ t dksJ  D ]}	t|	|v sJ qd S )Nrf  c                 s   rg  r   rh  rj  r   r   r   rV  "  rW  z#test_write_table.<locals>.<genexpr>rW   r   rX   rk  rm  singledat_{i}.arrowr  basename_templater   r  zdat_0.arrowro  r   r  r  r  c                    s     | j  | j d S r   )r-   rr   r  r  visited_pathsvisited_sizesr   r   r  ;  s   z&test_write_table.<locals>.file_visitorr   r   ra  )r   r-  r   r  c                 S   s   g | ]}t j|qS r   )r  rr   getsizer  r   r   r   r]   F  r  z$test_write_table.<locals>.<listcomp>rp  r   )r;   r   r  r,   r   r{  ri   rh  r   rN   r   r   r   r9   r@   r   r6  r7  )
r  r   r.  r  r  r#  r  r   actual_sizesvisited_pathr   r.  r   test_write_table   sN   "

r4  c                 C   s  t jt tdt dd tdD t dgd dgd  gg dd}t |gd	 }| d
 }tj||dd t|dt|d gksJJ tj	|dd
 |sXJ | d }tj|g|dd t|dt|d gksuJ tj	|dd
 |sJ | d }tj| |dd t|dt|d gksJ tj	|dd
 |sJ | d }tj||g|dd t|dt|d gksJ tj	|dd
 t |gd	 sJ d S )Nr   c                 s   rg  r   rh  rj  r   r   r   rV  Q  rW  z6test_write_table_multiple_fragments.<locals>.<genexpr>rW   r   rX   rk  rm  r   r*  r  r   r  r  ro  zsingle-listmultiplezmultiple-table)r;   r   r  r,   r?  r   r{  r   rh  rN   r   r   r   )r  r   r.  r   r   r   #test_write_table_multiple_fragmentsO  s:   "  

r6  c                 C   s,  t jt tdt dd tdD t dgd dgd  gg dd}| d	 }tjd
d | D ||jddd tj|dd	 }|
|sLJ | d }t j|j| }tj||ddd tj|dd	 }|
|ssJ | d }t|}tj||ddd tj|dd	 }|
|sJ d S )Nrf  c                 s   rg  r   rh  rj  r   r   r   rV  u  rW  z&test_write_iterable.<locals>.<genexpr>rW   r   rX   rk  rm  inmemory_iterablec                 s   s    | ]}|V  qd S r   r   )r[   rt   r   r   r   rV  z  rj  r+  ro  )r9   r-  r   r   inmemory_readerr,  inmemory_pycapsule)r;   r   r  r,   r   r{  r   r9   rN   r   r   rp  rl   r   )r  r   r.  r#  r   streamr   r   r   test_write_iterables  s2   "
r;  c                 C   s2  t jt tdt dd tdD t dgd dgd  gg dd}t|}| d	 }tj|||d
d |tj|dd}|	|sKJ | d }tj|j|dgd|d
d |tj|dd}|	|
dgsrJ tjtdd tj||||jd
d W d    d S 1 sw   Y  d S )Nrf  c                 s   rg  r   rh  rj  r   r   r   rV    rW  z%test_write_scanner.<locals>.<genexpr>rW   r   rX   rk  rm  dataset_from_scannerr  r   ro  dataset_from_scanner2r  r&   zCannot specify a schemar   )r9   r   )r;   r   r  r,   r   rN   r{  r   r   r   rd  r   r   r|  r9   )r  r   r   rN   r.  r#  r   r   r   test_write_scanner  s4   "
"r>  c                 C   s   t jt tdt dgd dgd   gddgd}t|dgj}| d }tj	||d	|d
 tj
jdgdd}tj|d|d
 }||sNJ d S )Nrf  rW   r   rX   r_  r   rm  rN   r  rp  Tr  ro  )r;   r   r  r,   rK  r   r   rd  r9   r{  r   r  rN   r   r   )r  r   r   r.  partitioning_readr#  r   r   r   !test_write_table_partitioned_dict  s(   r@  c              	   C   s  t jt jtdddt tjdddddt tdd	gd
gg dd}| d }tj	||dd t
|d}|d g}t|t|ksJJ tj|dd }||sZJ dD ]w}t }|j|d}dt|v spJ | d|  }tj	||||d t|d }	|dkrdnd}
|	j|
ksJ tj|dd }|j}|dkr|d|dt  }|dv r|d|dt d}||}||sJ q\d S )Nrf  r  r
  r  zdatetime64[D]r  zdatetime64[ns]rW   rX   r   rk  rm  rV  r   r   r  part-0.parquet)1.02.42.6r9  z(<pyarrow.dataset.ParquetFileWriteOptionsparquet_dataset_versionrA  rB  rD  r   )rB  rC  r   r  )r;   r   r  r,   r  r  r  r  r   r{  ri   rh  r   rN   r   r   r   r  r  rm   read_metadataformat_versionr9   r<   	with_typer>   r  r  )r  r   r.  r  r  r#  r:  r   optsmetaexpected_versionr9   rM  r   r   r   test_write_dataset_parquet  sD   	

rL  c                 C   s  t jt tdt dd tdD t dgd dgd  gg dd}| d	 }tj||d
d t|d}|d g}t|t|ksHJ tj	|d
d
 }||sXJ tjtjj|jjdd}|jdd}| d }tj||||d tj	||d
 }||sJ d S )Nrf  c                 s   rg  r   rh  rj  r   r   r   rV    rW  z)test_write_dataset_csv.<locals>.<genexpr>rW   r   rX   )r  rl  chr1rm  csv_datasetr  r   r  z
part-0.csvr  r  F)include_headercsv_dataset_noheaderrA  )r;   r   r  r,   r   r{  ri   rh  r   rN   r   r   r  r   r  r  r9   rn  r  )r  r   r.  r  r  r#  r   rI  r   r   r   test_write_dataset_csv  s*   "


rQ  c                    s   t jt tdt dd tdD t dgd dgd  gg dd}d	  fd
d}| d }tj||d|d  s?J d S )Nrf  c                 s   rg  r   rh  rj  r   r   r   rV  
  rW  z:test_write_dataset_parquet_file_visitor.<locals>.<genexpr>rW   r   rX   rk  rm  Fc                    s&   | j d ur| j jdkrd d S d S d S )NrU   T)r  r+  r  visitor_calledr   r   r    s
   
z=test_write_dataset_parquet_file_visitor.<locals>.file_visitorrV  r   )r   r  )r;   r   r  r,   r   r{  )r  r   r  r.  r   rR  r   'test_write_dataset_parquet_file_visitor  s   "
rT  c           	         s   dd t dD }dd t dD }t||dgd dgd  d}| d	 }tjtd
t fgdd}g  d  fdd}tj||d|d|d |d d |d d h}tt	t
j }||ksfJ d uslJ jdkssJ d S )Nc                 S   s    g | ]}|gd  D ]}|q	qS r   r   r[   r   r'  r   r   r   r]     s     z?test_partition_dataset_parquet_file_visitor.<locals>.<listcomp>r   c                 S   s$   g | ]}|gd  D ]}|d  q	qS r   r   rU  r   r   r   r]      s   $ rW   rf  rX   rk  r  r   r   ra  c                    s   | j r| j  | j d S r   )r  r-   rr   r  r  sample_metadatar   r   r  ,  s   zAtest_partition_dataset_parquet_file_visitor.<locals>.file_visitorr   Tr  r  rA  r  r   )r,   r;   r   r   r   r9   r@   r{  r   rj   r6  r7  r+  )	r  f1_valsf2_valsr   rR  r   r  r  r  r   rV  r   +test_partition_dataset_parquet_file_visitor  s.   

rZ  c                 C   sd   t dtjdddgi}|d jjdksJ tj|| dd t	| d }|d jjdks0J d S )NrW   r  zEurope/Brussels)tzr   r   rA  )
r;   r   r0   r  r  r[  r   r{  rm   r  )r  r   r#  r   r   r   (test_write_dataset_arrow_schema_metadataA  s
   r\  c                 C   sb   ddl m} tdg di}|ddi}tj|| dd || d	 j}|j	ddiks/J d S )
Nr   r  rW   r  rv     valuer  r   r  )
r   r  r;   r   rC   r   r{  r  r9   r  )r  r  r   r9   r   r   r   "test_write_dataset_schema_metadataN  s   r^  c                 C   sV   t dg di}|ddi}tj|| dd t| d j}|jddiks)J d S )NrW   r  rv  r]  r   r   rA  )	r;   r   rC   r   r{  rm   r  r9   r  )r  r   r9   r   r   r   *test_write_dataset_schema_metadata_parquetZ  s
   r_  c                 C   sL  | \}}}}}}}}d ||||}tjttdtdd tdD tdgd dgd  gg dd	}tjtd
t fgdd}	tj	|d|d|	d tj
d|ddd }
|
|scJ | d}tj	||d|	d tj
d|ddd }
|
|sJ | d}tj	|d|d|	d tj
d|ddd }
|
|sJ d S )Nr  rf  c                 s   rg  r   rh  rj  r   r   r   rV  p  rW  z(test_write_dataset_s3.<locals>.<genexpr>rW   r   rX   rk  rm  r   r   ra  zmybucket/datasetr  r  ro  zmybucket/dataset2rp  r  r}  zmybucket/dataset3)r   r;   r   r  r,   r   r   r9   r@   r{  rN   r   r   )r&  rG  rd   r(  r)  r*  r+  uri_templater   r   r#  r'  r   r   r   test_write_dataset_s3e  sP   "


ra  aC  {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:PutObject",
                "s3:ListBucket",
                "s3:GetObjectVersion"
            ],
            "Resource": [
                "arn:aws:s3:::*"
            ]
        }
    ]
}c           	   	   C   s  ddl m} | d \}}}}t| tdd |dd| d| dd}tjttd	td
d td	D tdgd dgd  gg dd}tj	t
dt fgdd}tj|d|dd|dd tjd|ddd }||suJ tj|d|dd|dd tjd|ddd }||sJ tjtdd tj|d|dddd W d    n1 sw   Y  |d d| d| ddd!}tjtd"d tj|d|dddd W d    d S 1 sw   Y  d S )#Nr   )r  r  test_dataset_limited_user
limited123r  http)r*  r+  endpoint_overrideschemerf  c                 s   rg  r   rh  rj  r   r   r   rV    rW  z1test_write_dataset_s3_put_only.<locals>.<genexpr>rW   r   rX   rk  rm  r   r   ra  zexisting-bucketr  Fr  )r   r   rg   r   r  ro  r  Tz&Bucket 'non-existing-bucket' not foundr   znon-existing-bucket)r   r   rg   r  limited)r*  r+  re  rf  allow_bucket_creationz(Access Denied|ACCESS_DENIED))r   r  r   _minio_put_only_policyr;   r   r  r,   r   r   r9   r@   r{  rN   r   r   r   r   r%  )	r  r  r(  r)  rG  rd   r   r   r#  r   r   r   test_write_dataset_s3_put_only  s~   "	"rj  c              
   C   s   t dd d gi}t|| d  t t dt t  t  g}t	j
j| d g|t	 t d}||}|j|ks@J d S )NrW   ru  )r~   r9   r   r   )r;   r   rm   rn   r9   r<   rw  r   r@   r   r   r   r   rd   ru   r   )r  r   r   r9   fsdsr   r   r   $test_dataset_null_to_dictionary_cast  s   
rl  c                 C   s   t g dg dd}tj|| d dd tj| d dd}t g dg dd	}tj|| d
 dd tj| d
 dd}||dd}| t g dg dg ddksZJ |j|dddd}| dt g dg dg ddks{J d S )Nr   r   r  rW   rX   r  colAr|  rS  ro  r   c   r   r   Zr  r  )colBcol3rT  rp  ru  r  r  Nrp  r|  rv  
full outer)	join_typer   r   r  rr  rW   rX   r  Nr  r  Nrt  r;   r   r   r{  rN   r  r   r  r  rS  ds1rT  ds2r#  r   r   r   test_dataset_join	  s0   
r  c                 C   s   t g dg dd}tj|| d dd tj| d dd}t g dg dd	}tj|| d
 dd tj| d
 dd}||d}| t g dg dg ddksYJ |j|dddd}| dt g dg dg ddkszJ d S )Nrm  rn  ro  rS  ro  r   rq  rs  )rp  rv  rT  rp  rw  rx  ry  _rrz  right_suffixr{  r|  r}  r~  r  r   r   r   test_dataset_join_unique_key(  s0   
r  c                 C   s   t g dg dg dd}tj|| d dd tj| d dd}t g dg d	g d
d}tj|| d dd tj| d dd}|j|dddd}| dt jg dg dg dg dg dgg ddksnJ d S )Nrm  r   rf  <   rn  )rp  ru  colValsrS  ro  r   rq  rr  rf  r   rs  rT  rp  ry  r  r  r{  )r   rf  r  Nr|  )r   rf  Nrr  r}  )rp  ru  r  colB_r	colVals_rrm  r~  r  r   r   r   test_dataset_join_collisionsG  s0   r  c                 C   s   t jg dg dd}tj|| d dd tj| d dd}t jg dg dg d	d
}tj|| d dd tj| d dd}|j|dddddd}| dt 	g dg dg ddksfJ d S )N)r   r   r   r  r*  )rW   rX   rW   rX   r  ro  rS  ro  r   )r   r,  rx  )rW   rX   g)r  r  g      @)ru  rv  colCrT  rp  r|  r   ru  rv  onby	toleranceright_onright_by)r  NNNN)rp  r|  r  )
r;   rA   from_pydictr   r{  rN   	join_asofr   r  r   r  r   r   r   test_dataset_join_asofc  s,   r  c                 C   s   t g dg dg dd}tj|| d dd tj| d dd}t g dg d	g d
g dd}tj|| d dd tj| d dd}|j|dddgdd}| dt g dg dg dg ddksmJ d S )Nrm  r  r  )rp  ru  r  rS  ro  r   r  rs  rq  r  )ru  r  rp  r  rT  r  rp  ru  r   r  r  r  )Nr  Nrp  ru  r  r  )r;   r   r   r{  rN   r  r   r  r  r   r   r   "test_dataset_join_asof_multiple_by  s0   r  c                 C   s   t dg di}tj|| d dd tj| d dd}t g dg dd}tj|| d	 dd tj| d	 dd}|j|dg d
d}| t g dg ddksVJ d S )Nr  r  rS  ro  r   rs  r  )r  r  rT  r   r  )rt  rt  r  )r  r  )r;   r   r   r{  rN   r  r   r  r   r   r   test_dataset_join_asof_empty_by  s$   
r  c              	   C   s   t g dg dg dg dd}tj|| d dd tj| d dd}t g d	g d
g dg dg dd}tj|| d dd tj| d dd}d}tjt|d |j|dddgddddgd W d    d S 1 sqw   Y  d S )Nrm  r  r  rn  r  rS  ro  r   r  rs  )r  r  r>  rq  r  )ru  r  colUniqrp  r  rT  zXColumns {'colVals'} present in both tables. AsofJoin does not support column collisions.r   r  rp  ru  r   r  )	r;   r   r   r{  rN   r   r   r|  r  )r  rS  r  rT  r  r  r   r   r   !test_dataset_join_asof_collisions  s2   "r  dstyperd   memc                 C   s  t g dg dd}|dkr$tj|| d dd tj| d dd}n|dkr.t|}nt|td	d
k tddk}|dkrItj	ntj
}t||sSJ | t dgdgdkscJ |dt dgdgdkstJ |td	dk td	dkjtd	dkd}| t dgdgdksJ tj|| d dd tj| d dd}| t dgdgdksJ |jtt ddgddgdddd}| dt dd gddgddgdksJ tt |d  W d    n	1 sw   Y  tt |  W d    n	1 sw   Y  |jd}	|td	d
k |	}
|
 t d	ddgiksGJ tt j ||	  W d    d S 1 saw   Y  d S )Nr   r   r  r'  rW   rX   r  r  ro  rd   rS  ro  r   r  rp  rU   r|  rW   r   r   r'  r  r   r   rX   r{  r   rf  ru  r|  zright outerkeysrz  ru  )rp  ru  r|  )r;   r   r   r{  rN   r  r   r  r<   r   rk  rH   r   r   r   r  r  r   r   r   r|  rF   r9   r\  replace_schemar5  )r  r  rS  r  r#  rM  r2r{  joinedschema_without_col2	newschemar   r   r   test_dataset_filter  s   $




$r  c           
      C   s  t g dg dd}t g dg dd}|dkrCtj|| d dd	 tj| d dd	}tj|| d
 dd	 tj| d
 dd	}n|dkrRt|}t|}ntt||ftddk tddkB }|	 t g dg ddks|J |j
tt ddgddgdddd}|	 dt g dg dg ddksJ |tddk }|tddk }	tjtdd t||	f W d    d S 1 sw   Y  d S )Nr  r  ro  )r,  r   r7  )hr7   lrd   rS  ro  r   rT  r  rp  rU   r,  )r   r   r,  )rW   rX   r  r   rf  rW   rX   r  r|  z
left outerr  )r   rf  N)rp  r|  ru  zcurrently not supportedr   )r;   r   r   r{  rN   r  r   r  r<   r   r  r  r   r   r|  )
r  r  rS  rT  r  r  filtered_union_dsr  filtered_ds1filtered_ds2r   r   r   test_union_dataset_filter&  sP   

"r  c                 C   s   | d }t |\}}t|}| }|jdksJ |tddk }| jdks-J t	t
 |  W d    d S 1 sBw   Y  d S )Ntest_parquet_dataset_filterrU  r  r   rf  )rT  r   rV  r   r   r   r  r<   r   r   r|  rF   )r  rR  rS  rG  rN   r#  filtered_dsr   r   r   r  V  s   

"r  c                 C   s   t jt tdgdgd}t|}dtdi}|j|d}tj|| dgdd t	j
tdd	 tj|| dgdd W d
   d
S 1 sGw   Y  d
S )z
    Ensure the projected schema is used to validate partitions for scanner

    https://issues.apache.org/jira/browse/ARROW-17228
    rf  original_columnrm  renamed_columnr&   ro  r  z0'Column original_column does not exist in schemar   N)r;   r   r  r,   r   rN   r<   r   r{  r   r   KeyError)r  r   table_datasetr'   r   r   r   r   4test_write_dataset_with_scanner_use_projected_schemae  s    



"r  r   )ro  r   c              
   C   s   |dkr	t d tddgddgd dddgdd	id gd
ddg dddigd
gd}tj|| d |d tj| d |d}|jg dd}| dd ddgd d	dd gddddg ddd dgddgkskJ d S )Nr   zpyarrow.parquetabc123qrs456r   r   buttonr  r  )r  elementvaluesstructsscrollwindow)NrU   r   fizzbuzz)user_ida.dotted.fieldinteractionr   r   )r  zinteraction.typezinteraction.valueszinteraction.structsr  r&   )r  r  )r  r  r  r  r  )	r   r  r;   r   r   r{  rN   r   r<  )r  r   r   r  r   r   r   test_read_table_nested_columns~  s2   



r  c                 C   s   ddl m} | d }tjtg dt tg dt gddg}|j||ddgd	d
 |j|dd	t	t
dt t
dt gd  }||dksWJ |d }tt|}dd |D }tt|}||ksxJ d S )Nr   r  zslash-writer-xr   r   rU   r   r   )experiment/A/f.csvzexperiment/B/f.csvr  zexperiment/C/k.csvzexperiment/M/i.csvexp_idexp_metaro  r   )r6   r.  r   r   r  )rr  r   r   r9   r   c                 S   s   g | ]
}d t |dd qS )z	exp_meta=r  r  r   r  r   r   r   r]     r  z5test_dataset_partition_with_slash.<locals>.<listcomp>)r   rN   r;   rA   r  r  r   r  r{  r9   r<   r   r  r  r  r<  r  r   r  r  )tmpdirr   rr   dt_tabler  r  encoded_pathsr  r   r   r   !test_dataset_partition_with_slash  sB   
r  c                 C   s   t t jdt  ddt jdt  ddg}g dg dg}t jj||d}t|| d	  tj	| d	 d
d}|
 j|sBJ tj|| d d
d tj	| d d
d}|
 j|s_J tj||g| d d
d tj	| d d
d}|
 j|s~J d S )Nr   F)nullabler  Tr  Nr   Nrc   	nulltest1r   r   	nulltest2	nulltest3)r;   r9   r<   r>   rA   r  rm   rW  r   rN   r   r   r{  )r  schema_nullablerN  r   rN   r   r   r   'test_write_dataset_preserve_nullability  s   r  c                 C   sP  t t jdt  ddidt dt  g}t t dt  t dt  g}g dg dg}t jj||d}t jj||d}tj||g| d	 d
d tj| d	 d
d}|	 jj
|ddscJ tj||g| d d
d tj| d d
d}|	 jj
|ddsJ tj||g| d d
|d tj| d d
d}|	 jj
|ddsJ d S )Nr   s   foos   barr  r  r  r  rc   test1r   r   Tr   test2test3rd  )r;   r9   r<   r>   rA   r  r   r{  rN   r   r   )r  schema_metadataschema_no_metarN  r   table_no_metarN   r   r   r   *test_write_dataset_preserve_field_metadata  s,   r  c              
   C   s   dD ]n}dD ]i}t t dt  t dt  g}g dg dg}t jj||d}t }| d|  }tj||d|j	||d	d
d tj
|dd}|jD ]}	t|	}
|
dd}|j|u seJ |j||@ u snJ qOqqd S )N)TFr   r  r  r  rc   write_page_index_r   )write_statisticswrite_page_indexr  )r   rB  r  r   r   )r;   r9   r<   r>   rA   r  r   r   r{  r  rN   r   rm   rF  r  r  has_offset_indexhas_column_index)r  r  r  r9   rN  r   r   r.  r  r  r  ccr   r   r   #test_write_dataset_write_page_index  s:   


r  c                 C   s  t jt g dt g dgddgd}|dkr-tj|| d dd	 tj| d dd	}n|d
kr7t|}nt|d 	 g dg ddksMJ |dg 	 g dg ddksbJ |
tddk d 	 g dg ddks~J t jjt jg dt  dt g dgddgd}t|}|dg}| 	 }|d g dksJ |d g dksJ |dg}| 	 }|d g dksJ |d g dksJ d S )N)rU   r   r   r   r   )rX   rW   rX   rW   r  r  r  rm  rd   rS  ro  r   r  )rW   rW   rX   rX   r  r  )r  r  )r  
descending)r  rX   rX   rW   rW   )r   r   rU   r   r   r   )rW   rW   rX   r  )r   r*  r*  r  r
  )r  carr  foobarrW   rX   )rW   r  )r  r*  r*  r   )r  r  r  r  )rW   r  )r;   r   r  r   r{  rN   r  r  r   r  r   r  r<   rA   r  r>   )r  r  r   r   
sorted_tabsorted_tab_dictr   r   r   test_dataset_sort_by  sV   
r  c                 C   s  t dg di}t j }|jdd}| d }tj||||d tjdd}t jj|d}tj||d	 }||ks=J | d
 }t	|| t
| }	t|	dksTJ |	d }
t|
 }|d |d kshJ |d |d |d< |d< |
| tjdd}t jj|d}tj||d	 }||ksJ |t dg diksJ tjtdd tj||d	 }W d   dS 1 sw   Y  dS )zwCheck that checksum verification works for datasets created with
    ds.write_dataset and read with ds.dataset.to_tablerW   r  T)write_page_checksumcorrect_dir)r6   r.  r   rB  r  )default_fragment_scan_optionsr   corrupted_dirr   r      $   F)r   rU   r   r   zCRC checksum verificationr   N)r;   r   rN   r   r  r   r{  r  r   r   ri   iterdirr   	bytearray
read_byteswrite_bytesr   r   r%  )r  
table_origpq_write_formatrG  original_dir_pathpq_scan_opts_crcpq_read_format_crctable_checkcorrupted_dir_pathcorrupted_file_path_listcorrupted_file_pathbin_datapq_scan_opts_no_crcpq_read_format_no_crctable_corruptrG  r   r   r   1test_checksum_write_dataset_read_dataset_to_tableG  sn   


"r  c                  C   s   d} d}t t}tjjd W d    n1 sw   Y  | t|jv s0|t|jv s0J tj }d}t jt|d |d W d    d S 1 sOw   Y  d S )NzImake_write_options() should be called on an instance of ParquetFileFormatzqdescriptor 'make_write_options' for 'pyarrow._dataset_parquet.ParquetFileFormat' objects doesn't apply to a 'int'+   z;make_write_options\(\) takes exactly 0 positional argumentsr   )	r   r   r   r;   rN   r   r  rZ   r$   )msg_1msg_2excinfopformatr  r   r   r   test_make_write_options_error  s    
"r  c                 C   st   zdd l m} W n ty   td Y nw d}d}| j|j||j|d }|	 dddgiks8J d S )Nr   zsubstrait NOT enableds   
SOhttps://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml	
u64
	u32

str"i
i64
f64
str
const
struct
a
b
group
key7
:
Z
b
:

:
b
*
bs3  
/functions_comparison.yaml
SOhttps://github.com/apache/arrow/blob/main/format/substrait/extension_types.yamlequal:any1_any1	
u64
	u32

"
 "
("i
i64
f64
str
const
struct
a
b
group
key7
:
Z
b
:

:
b
*
brI  rZ   4)
pyarrow.substrait	substraitr  r   r
  r   BoundExpressionsfrom_substraitr   r  )rN   psr$  	filteringr#  r   r   r   test_scanner_from_substrait  s   

r  rh  r   )r   r   r   (  r   r(   r  r6  r  ri  sysry  r?  r  r  shutilr   urllib.parser   numpyr  r  r   r   r;   r  r  r  pyarrow.csvr;  r   rd   pyarrow.jsonpyarrow.libr   pyarrow.tests.utilr   r   r   r	   r
   r   r0   r  rN   r   pyarrow.parquetr   rm   mark
pytestmarkr   r8   rE   rQ   fixturero   r   r   r   r  r  r%  r,  r?  rH  rK  rN  rO  rT  r  r  parametrizerZ   tupler  r  r  r  r  r  r  r  r   r  r  s3r2  rH  rJ  rS  rX  r^  rb  rm  rt  rz  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r
  r  r  r  r  r  r  r"  r%  r+  r  r3  r5  r:  r=  r>  rA  rD  rH  rQ  rW  r\  r]  ra  rc  re  ro  rs  rw  rz  r|  r~  r  r  r  r  r  r  r&  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r&  r)  r*  r5  r7  r<  rM  rT  rW  skipifplatformrZ  r\  rb  rg  rl  rm  rn  ru  rw  rz  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!  r)  r4  r6  r;  r>  r@  rL  rQ  rT  rZ  r\  r^  r_  ra  ri  rj  rl  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   <module>   s  
"

 
3

.
G
;

0



 ;
)

9'H
8
%
 
<


(
8*




)



## 
d
U%












	B

$



	9

B

B 9& /
'=#K0$#D&/$+#
/L

N-%
 0I