o
    GhY                     @   sL  d dl Z d dlmZ d dlmZ d dlZd dlZd dlZd dlZd dl	Z	zd dl
ZW n ey5   dZY nw d dlZd dlZd dlmZmZmZmZ dd Zd d
dZdd Zdd Zdd ZG dd de jZG dd deZG dd deZG dd dee	jZG dd dee	jZ G dd dee	jZ!ej"j#G dd dee	jZ$dS )!    N)OrderedDict)Decimal)	read_json	open_jsonReadOptionsParseOptionsc                  c   s4    t j} | E d H  | D ]}| D ]}|| V  qqd S N)stringascii_lowercase)lettersfirstsecond r   X/var/www/html/Persson_Maskin/env/lib/python3.10/site-packages/pyarrow/tests/test_json.pygenerate_col_names%   s   
r      
   
c                 C   s   t jdjdd| |fd}ttt | }g }|jD ]}t	dd t
||D }|t| q|| }dd |D }	tj|	|}
||
fS )N*   r     )sizec                 S   s   g | ]
\}}|t |fqS r   )int).0kvr   r   r   
<listcomp>3   s    z$make_random_json.<locals>.<listcomp>c                 S   s   g | ]}t j|t  d qS ))type)paarrayint64)r   colr   r   r   r   6   s    )nprandomRandomStaterandintlist	itertoolsislicer   Tr   zipappendjsondumpsjoinencoder   Tablefrom_arrays)num_colsnum_rowslineseparr	col_nameslinesrowjson_objdatacolumnsexpectedr   r   r   make_random_json.   s   
r<   c                 K   sL   | di |}| |j||jd}| D ]\}}t|||ks#J qd S )N)protocolr   )loadsr,   HIGHEST_PROTOCOLitemsgetattr)clspicklerattr_valuesoptsnew_optsnamevaluer   r   r   check_options_class_pickling;   s   
rI   c                 C   s   t }| }|jdksJ d|_|jdksJ |jdu sJ d|_|jdu s'J |ddd}|jdks4J |jdu s;J t|| ddd d S )Nr   i90  TFi  )
block_sizeuse_threads)rC   rJ   rK   )r   rJ   rK   rI   )pickle_modulerB   rE   r   r   r   test_read_optionsC   s   
rM   c                 C   s   t }| }|jdu sJ |jd u sJ d|_|jdu sJ ttdt g}||_|j|ks3J |jdks:J dD ]}||_|j|ksHJ q<t	t
 d|_W d    n1 s\w   Y  t|| |ddd d S )	NFTfooinfer)ignoreerrorrO   zinvalid-valuerP   )rC   explicit_schemanewlines_in_valuesunexpected_field_behavior)r   rS   rR   r   schemafieldint32rT   pytestraises
ValueErrorrI   )rL   rB   rE   rU   rH   r   r   r   test_parse_optionsX   s*   
r[   c                   @   s   e Zd Zejdd Zdd Zdd Zdd Zd	d
 Z	dd Z
dd Zdd Zdd Zdd Zdd Zejjdd Zejjdd Zejjdd ZdS )BaseTestJSONc                 K   s   t )z
        :param b: bytes to be parsed
        :param kwargs: arguments passed on to open the json file
        :return: b parsed as a single Table
        )NotImplementedErrorselfbkwargsr   r   r   
read_bytest   s   zBaseTestJSON.read_bytesc                 C   s.   |j t|ks	J dd |jD |ksJ d S )Nc                 S   s   g | ]}|j qS r   )rG   )r   cr   r   r   r      s    z,BaseTestJSON.check_names.<locals>.<listcomp>)num_columnslenr:   )r_   tablenamesr   r   r   check_names}   s   zBaseTestJSON.check_namesc              
   C   s   d}t  }t }||d fD ]K}dD ]F}||_d|_tjtdd | j|||d W d    n1 s4w   Y  tdd	D ]}||_| j|||d}|	 d
g diksWJ q>qqd S )Ns   {"a": 1}
{"a": 2}
{"a": 3}   
FT   ztry to increase block sizematchread_optionsparse_options	      a)   r      )
r   r   rS   rJ   rX   rY   rZ   rb   range	to_pydict)r_   rowsro   rp   r9   rS   rJ   rf   r   r   r   test_block_sizes   s.   zBaseTestJSON.test_block_sizesc                 C   s6   d}|  |}| ddgddgddgdksJ d S )	Ns/   {"a": 1,"b": 2, "c": 3}
{"a": 4,"b": 5, "c": 6}rt   rk   r      ru      rs   r`   rc   )rb   rw   )r_   rx   rf   r   r   r   test_no_newline_at_end   s   
z#BaseTestJSON.test_no_newline_at_endc                 C   sl   d}|  |}tdt fdt fdt fg}|j|ks"J | ddgddgd	d
gdks4J d S )Ns0   {"a": 1,"b": 2, "c": 3}
{"a": 4,"b": 5, "c": 6}
rs   r`   rc   rt   rk   r   rz   ru   r{   r|   )rb   r   rU   r   rw   r_   rx   rf   rU   r   r   r   test_simple_ints   s   


zBaseTestJSON.test_simple_intsc                 C   s|   d}|  |}tdt fdt fdt fdt fg}|j|ks'J | ddgdd	gd
dgddgdks<J d S )NsQ   {"a": 1,"b": 2, "c": "3", "d": false}
{"a": 4.0, "b": -5, "c": "foo", "d": true}
rs   r`   rc   d      ?      @r   3rN   FTrs   r`   rc   r   )rb   r   rU   float64r   r	   bool_rw   r~   r   r   r   test_simple_varied   s   



zBaseTestJSON.test_simple_variedc              	   C   s   d}|  |}tdt fdt fdt fdt fdt fg}|j|ks,J | g dg dg d	g d
g ddksDJ d S )Ns   {"a": 1, "b": 2, "c": null, "d": null, "e": null}
{"a": null, "b": -5, "c": "foo", "d": null, "e": true}
{"a": 4.5, "b": null, "c": "nan", "d": null,"e": false}
rs   r`   rc   r   e)r   Ng      @)r   r   N)NrN   nan)NNN)NTF)rs   r`   rc   r   r   )	rb   r   rU   r   r   r	   nullr   rw   r~   r   r   r   test_simple_nulls   s    




zBaseTestJSON.test_simple_nullsc                 C   sP   d}|  |}tdtt fg}|j|ksJ | dg giks&J d S )N	   {"a": []}rs   )rb   r   rU   list_r   rw   r~   r   r   r   test_empty_lists   s
   
zBaseTestJSON.test_empty_listsc                 C   sF   d}|  |}tg }|j|ksJ |jdksJ |jdks!J d S )Ns   {}
{}
r   r   )rb   r   rU   rd   r2   r~   r   r   r   test_empty_rows   s   

zBaseTestJSON.test_empty_rowsc                 C   s   d}dt dt dt dd gi}tjtjtjtjf}|D ](}td|ddfg}t|d}| j||d	}|j|ks<J |	 |ksDJ qd S )
Ns'   {"a": 1}
{"a": 1.45}
{"a": -23.456}
{}
rs   1z1.45z-23.456rq   rk   rR   rp   )
r   r   	decimal32	decimal64
decimal128
decimal256rU   r   rb   rw   )r_   rx   r;   decimal_typestype_factoryrU   rE   rf   r   r   r   test_explicit_schema_decimal   s   
z)BaseTestJSON.test_explicit_schema_decimalc                 C   s  d}t dt  fg}t|d}| j||d}|jt dt  fdt  fgks,J | ddgdd	gd
ks;J t|dd}| j||d}|jt dt  fgksWJ | dddgikscJ t|dd}tjt j	dd | j||d W d    d S 1 sw   Y  d S )Ns2   {"foo": "bar", "num": 0}
{"foo": "baz", "num": 1}
rN   r   r   nums   bars   bazr   rt   )rN   r   rP   )rR   rT   rQ   "JSON parse error: unexpected fieldrl   )
r   rU   binaryr   rb   r   rw   rX   rY   ArrowInvalid)r_   rx   rU   rE   rf   r   r   r   .test_explicit_schema_with_unexpected_behaviour   s@   





"z;BaseTestJSON.test_explicit_schema_with_unexpected_behaviourc                 C   sP   t ddd\}}| |}|j|jksJ ||sJ | | ks&J d S )Nr   r   r1   r2   )r<   rb   rU   equalsrw   )r_   r9   r;   rf   r   r   r   test_small_random_json  s
   
z#BaseTestJSON.test_small_random_jsonc                 C   sH   t ddd\}}tdd}| j||d}|jdksJ |jdks"J d S )Nr   i r   i   rJ   ro   )r<   r   rb   r2   )r_   r9   r;   ro   rf   r   r   r   test_load_large_json!  s
   
z!BaseTestJSON.test_load_large_jsonc           	      C   s   t ddd\}}t }t }||dfD ]1}dD ],}||_dD ]$}||_| j|||d}|j|jks5J ||sD|	 |	 ksDJ q qqd S )Nr   d   r   s   
rj   )      %   rn   )
r<   r   r   rstriprS   rJ   rb   rU   r   rw   )	r_   	data_baser;   ro   rp   r9   rS   rJ   rf   r   r   r   test_stress_block_sizes*  s$   
z$BaseTestJSON.test_stress_block_sizesN)__name__
__module____qualname__abcabstractmethodrb   rh   ry   r}   r   r   r   r   r   r   r   rX   marknumpyr   r   r   r   r   r   r   r\   s   s&    
	%

r\   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )BaseTestJSONReadc                 K      | j t|fi |S r   )r   r   	py_bufferr^   r   r   r   rb   @     zBaseTestJSONRead.read_bytesc                 C   s~   d}dgdgd}t |}| |}| |ksJ t | }tt | | W d    d S 1 s8w   Y  d S )N   {"a": 1, "b": 2}
rt   r   rs   r`   )	ioBytesIOr   rw   StringIOdecoderX   rY   	TypeError)r_   r9   expected_databiorf   sior   r   r   test_file_objectC  s   

"z!BaseTestJSONRead.test_file_objectc              
   C   s   d}t t|d}dd dgfdd g gfdd g dgggfdd i gfd	d d
d id
ddiigffD ]#\}}| j|| |d}d|i}| |ksGJ |djdksQJ q.d S )Ns"   {                               }
r   s   {"a": 0}r   r   s   {"a": []}
{"a": [[1]]}rt   s	   {"a": {}}s    {"a": {}}
{"a": {"b": {"c": 1}}}r`   rc   r   rs   )r   re   rb   rw   column
num_chunks)r_   	first_rowro   	next_rowsexpected_pylistrf   r;   r   r   r   test_reconcile_across_blocksN  s"   



z-BaseTestJSONRead.test_reconcile_across_blocksN)r   r   r   rb   r   r   r   r   r   r   r   >  s    r   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd ZdS ) BaseTestStreamingJSONReadc                 O   s,   | dt }| j|_t|g|R i |S )z
        Reads the JSON file into memory using pyarrow's open_json
        json The JSON bytes
        args Positional arguments to be forwarded to pyarrow's open_json
        kwargs Keyword arguments to be forwarded to pyarrow's open_json
        ro   )
setdefaultr   rK   r   )r_   r+   argsra   ro   r   r   r   r   c  s   z#BaseTestStreamingJSONRead.open_jsonc                 K   r   r   )r   r   r   r^   r   r   r   
open_bytesn  r   z$BaseTestStreamingJSONRead.open_bytesc                 C   sl   |j |ksJ t|}t|t|ksJ t||D ]\}}|jdd |j |ks+J | |ks3J qd S )NTfull)rU   r%   re   r)   validaterw   )r_   readerexpected_schemar   batchesbatchexpected_batchr   r   r   check_readerq  s   z&BaseTestStreamingJSONRead.check_readerc                 K   s   | j |fi | S r   )r   read_allr^   r   r   r   rb   z  s   z$BaseTestStreamingJSONRead.read_bytesc                 C   sX   d}dgdgd}t |}| |}tdt fdt fg}| |||g d S )Nr   rt   r   r   rs   r`   )r   r   r   r   rU   r   r   )r_   r9   r   r   r   r   r   r   r   r   }  s   


z*BaseTestStreamingJSONRead.test_file_objectc                 C   T   d}t  }d|_tjtjdd | j||d W d    d S 1 s#w   Y  d S )Ns   {"i": 0            }
{"i": 1}ru   1straddling object straddles two block boundaries*rl   r   r   rJ   rX   rY   r   r   r   )r_   bad_first_chunkro   r   r   r   test_bad_first_chunk  s   "z.BaseTestStreamingJSONRead.test_bad_first_chunkc                 C      d}t  }d|_tdt fg}| j||d}|j|ks J |  ddgiks-J tj	tj
dd |  W d    n1 sDw   Y  t	t |  W d    d S 1 s^w   Y  d S )Ns   {"i": 0}
{"i":     1}
{"i": 2}r   ir   r   r   rl   r   rJ   r   rU   r   r   read_next_batchrw   rX   rY   r   StopIterationr_   bad_middle_chunkro   r   r   r   r   r   test_bad_middle_chunk  s$   



"z/BaseTestStreamingJSONRead.test_bad_middle_chunkc                 C   r   )Ns   {"n": }
{"n": 10000}   !JSON parse error: Invalid value.*rl   r   r   r_   bad_first_blockro   r   r   r   test_bad_first_parse     "z.BaseTestStreamingJSONRead.test_bad_first_parsec                 C   r   )Ns"   {            }{"n": }
{"n": 10000}r   r   rl   r   r   r   r   r   r   !test_bad_middle_parse_after_empty  r   z;BaseTestStreamingJSONRead.test_bad_middle_parse_after_emptyc                 C   r   )Ns%   {"n": 1000}
{"n": 200 00}
{"n": 3000}r   nr   r   z@JSON parse error: Missing a comma or '}' after an object member*rl   r   r   r   r   r   test_bad_middle_parse  s$   



"z/BaseTestStreamingJSONRead.test_bad_middle_parsec                 C      d}t dd}tdd}tdt fg}| j|||d}|j|ks%J |  ddgiks2J tj	tj
d	d
 |  W d    n1 sIw   Y  t	t |  W d    d S 1 scw   Y  d S Ns   {"n": 0}{1}
{"n": 2}r   r   TrS   r   rn   r   JSON parse error *rl   r   r   r   rU   r   r   r   rw   rX   rY   r   r   r_   r   ro   rp   r   r   r   r   r   %test_non_linewise_chunker_first_block  *   





"z?BaseTestStreamingJSONRead.test_non_linewise_chunker_first_blockc                 C   r   r   r   r   r   r   r   )test_non_linewise_chunker_bad_first_block  r   zCBaseTestStreamingJSONRead.test_non_linewise_chunker_bad_first_blockc                 C   s   d}t dd}tdd}tdt fg}| j|||d}|j|ks%J |  ddgiks2J |  dd	giks?J tj	tj
d
d |  W d    n1 sVw   Y  t	t |  W d    d S 1 spw   Y  d S )Ns&   {"n": 0}
{"n":    1}
{}"n":2}
{"n": 3}r   r   Tr   r   rn   r   rt   r   rl   r   r   r   r   r   *test_non_linewise_chunker_bad_middle_block  s0   







"zDBaseTestStreamingJSONRead.test_non_linewise_chunker_bad_middle_blockc                 C   sh   d}t dt  fdt  fg}tdd}t|d}dgdgd	}| j|||d
}| |||g d S )Ns       
{"b": true, "s": "foo"}r`   s   r   r   TrN   )r`   r   rn   )r   rU   r   utf8r   r   r   r   )r_   leading_empty_chunkrR   ro   rp   r   r   r   r   r    test_ignore_leading_empty_blocks  s   



z:BaseTestStreamingJSONRead.test_ignore_leading_empty_blocksc                 C   s  d}t dt  fdt  fg}dgdgd}tdd}td	d
}| j|||d}|j|ks1J |  |ks;J t	j
t jdd |  W d    n1 sRw   Y  t dt  fdt  fdt  fg}ddgdd gd dgd}tdd}| j|||d}|j|ksJ |  |ksJ t	j
t jdd |  W d    n1 sw   Y  t dt  fdt  fdt  fdt  fg}g dg dg dg dd}tdd}| j|||d}|j|ksJ |  |ksJ d S )NsI   {"a": 0, "b": "foo"    }
        {"a": 1, "c": true  }
{"a": 2, "d": 4.0}rs   r`   r   rN   r       r   rO   )rT   rn   r   rl   rc   rt   Tr|   @   r   )r   rt   r   )rN   NN)NTN)NNr   r   `   )r   rU   r   r   r   r   r   r   rw   rX   rY   r   r   r   )r_   rx   r   r   ro   rp   r   r   r   r   test_inference  sh   














z(BaseTestStreamingJSONRead.test_inferenceN)r   r   r   r   r   r   rb   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   b  s     		
r   c                   @      e Zd Zdd ZdS )TestSerialJSONReadc                 O   s2   | dt }d|_t|i |}|jdd |S )Nro   FTr   r   r   rK   r   r   r_   r   ra   ro   rf   r   r   r   r   S  
   zTestSerialJSONRead.read_jsonNr   r   r   r   r   r   r   r   r  Q      r  c                   @   r   )TestParallelJSONReadc                 O   s2   | dt }d|_t|i |}|jdd |S )Nro   Tr   r  r  r   r   r   r   ]  r  zTestParallelJSONRead.read_jsonNr  r   r   r   r   r  [  r  r  c                   @      e Zd ZdZdS )TestSerialStreamingJSONReadFNr   r   r   rK   r   r   r   r   r	  e  s    r	  c                   @   r  )TestThreadedStreamingJSONReadTNr
  r   r   r   r   r  j  s    r  )r   r   r   )%r   collectionsr   decimalr   r   r&   r+   r	   unittestr   r!   ImportErrorrX   pyarrowr   pyarrow.jsonr   r   r   r   r   r<   rI   rM   r[   ABCr\   r   r   TestCaser  r  r	  r   	threadingr  r   r   r   r   <module>   s>   
	 L$ p

