o
    Fh=                  
   @   s  d dl mZmZmZ d dlmZmZ zd dlmZm	Z	m
Z
mZmZmZmZmZmZ W n eyA Z zedee dddZ[ww zd dlmZ d dlmZ W n eyb   G dd	 d	ZeZY nw dddZddd
dedfddZd
efddZdd ZefddZdddZdS )    )TableRecordBatcharray)
Expressionfield)	DeclarationExecNodeOptionsTableSourceNodeOptionsFilterNodeOptionsProjectNodeOptionsAggregateNodeOptionsOrderByNodeOptionsHashJoinNodeOptionsAsofJoinNodeOptionsz@The pyarrow installation is not built with support for 'acero' ()N)ScanNodeOptionsc                   @   s(   e Zd ZG dd dZG dd dZdS )DatasetModuleStubc                   @      e Zd ZdS )zDatasetModuleStub.DatasetN__name__
__module____qualname__ r   r   N/var/www/html/Persson_Maskin/env/lib/python3.10/site-packages/pyarrow/acero.pyDataset3       r   c                   @   r   )z!DatasetModuleStub.InMemoryDatasetNr   r   r   r   r   InMemoryDataset6   r   r   N)r   r   r   r   r   r   r   r   r   r   2   s    r   TFc                 C   sn   t dt| ||d}dd | jjD }t |t dt|g}| jd}|d ur5t |t dt|g}|S )Nscanuse_threadsimplicit_orderingc                 S   s   g | ]}t |qS r   )r   ).0fr   r   r   
<listcomp>B   s    z$_dataset_to_decl.<locals>.<listcomp>projectfilter)	r   r   schemanamesfrom_sequencer   _scan_optionsgetr
   )datasetr   r    declprojectionsfilter_exprr   r   r   _dataset_to_decl;   s   r/   c                    s  t |ttjfstdt| t |ttjfs"tdt| i  t |ttfs.|g}t|D ]\}}| |< q2i t |ttfsG|g}t|D ]\}}||< qK|j	j
}|j	j
}| dksd| dkrgg }n*| dkso| dkrrg }n| dksz| dkrfdd	|D }n| d
kr fdd	|D }i }t|D ]\}}||v r|||< qi }t|D ]\}}||v r|||< qt |tjrt||d}ntdt|}t |tjrt||d}ntdt|}|rt| |||||pd|pd|
d}nt| |||pd|pd|
d}td|||gd}|r| dkrt|}t|}t|}g }g }t|| D ]n\}}|t|k rY||v rY|| || |   }|tdt|t|| g q'||kre||v req'|rv||k rv||v rv||7 }|r||kr||v r||7 }|| |t| q'tdt||}t||g}|j|d}|	tkr|S |	tjkrt|S td)a  
    Perform join of two tables or datasets.

    The result will be an output table with the result of the join operation

    Parameters
    ----------
    join_type : str
        One of supported join types.
    left_operand : Table or Dataset
        The left operand for the join operation.
    left_keys : str or list[str]
        The left key (or keys) on which the join operation should be performed.
    right_operand : Table or Dataset
        The right operand for the join operation.
    right_keys : str or list[str]
        The right key (or keys) on which the join operation should be performed.
    left_suffix : str, default None
        Which suffix to add to left column names. This prevents confusion
        when the columns in left and right operands have colliding names.
    right_suffix : str, default None
        Which suffix to add to the right column names. This prevents confusion
        when the columns in left and right operands have colliding names.
    use_threads : bool, default True
        Whether to use multithreading or not.
    coalesce_keys : bool, default False
        If the duplicated keys should be omitted from one of the sides
        in the join result.
    output_type: Table or InMemoryDataset
        The output type for the exec plan result.
    filter_expression : pyarrow.compute.Expression
        Residual filter which is applied to matching row.

    Returns
    -------
    result_table : Table or InMemoryDataset
    Expected Table or Dataset, got z	left semiz	left antiz
right semiz
right antiinnerz
left outerc                       g | ]}| vr|qS r   r   r!   col)right_keys_orderr   r   r#          z!_perform_join.<locals>.<listcomp>zright outerc                    r2   r   r   r3   )left_keys_orderr   r   r#      r6   r   table_source )output_suffix_for_leftoutput_suffix_for_rightfilter_expressionhashjoinoptionsinputsz
full outercoalescer$   Unsupported output type)
isinstancer   dsr   	TypeErrortypetuplelist	enumerater&   r'   r/   r   r	   r   setlenappendr   _call_fieldr   r(   to_tabler   )	join_typeleft_operand	left_keysright_operand
right_keysleft_suffixright_suffixr   coalesce_keysoutput_typer=   idxkeyleft_columnsright_columnsleft_column_keys_indicescolnameright_column_keys_indicesleft_sourceright_source	join_optsr,   left_columns_setright_columns_setright_operand_indexprojected_col_namesr-   r4   right_key_index
projectionresult_tabler   )r7   r5   r   _perform_joinR   s   *















rk   c	                    sV  t | ttjfstdt|  t |ttjfs"tdt| t |ttfs,|g}t  ttfs6 g  fdd|jj	D }	t
| jj	t
|	@ }
|
rVtd|
 dt | tjrdt| |dd}ntdt| }t |tjryt||dd}ntdt|}t|| |}td	|||gd
}|j|d}|tkr|S |tjkrt|S td)a-  
    Perform asof join of two tables or datasets.

    The result will be an output table with the result of the join operation

    Parameters
    ----------
    left_operand : Table or Dataset
        The left operand for the join operation.
    left_on : str
        The left key (or keys) on which the join operation should be performed.
    left_by: str or list[str]
        The left key (or keys) on which the join operation should be performed.
    right_operand : Table or Dataset
        The right operand for the join operation.
    right_on : str or list[str]
        The right key (or keys) on which the join operation should be performed.
    right_by: str or list[str]
        The right key (or keys) on which the join operation should be performed.
    tolerance : int
        The tolerance to use for the asof join. The tolerance is interpreted in
        the same units as the "on" key.
    output_type: Table or InMemoryDataset
        The output type for the exec plan result.

    Returns
    -------
    result_table : Table or InMemoryDataset
    r0   c                    s   g | ]}|g  vr|qS r   r   r3   right_byright_onr   r   r#   3  s
    z&_perform_join_asof.<locals>.<listcomp>zColumns zE present in both tables. AsofJoin does not support column collisions.Tr   r9   asofjoinr?   r8   rC   )rD   r   rE   r   rF   rG   rH   rI   r&   r'   rK   
ValueErrorr/   r   r	   r   rP   r   )rR   left_onleft_byrT   rn   rm   	tolerancer   rY   r]   columns_collisionsra   rb   rc   r,   rj   r   rl   r   _perform_join_asof  sZ   !




ru   c                 C   s   d}t | trt| g} d}ttdt| dtdt|dg}|jdd}|rI|j	dkr9|
  d }|S dd	 |jD }tj||jd
}|S )a  Filter rows of a table based on the provided expression.

    The result will be an output table with only the rows matching
    the provided expression.

    Parameters
    ----------
    table : Table or RecordBatch
        Table that should be filtered.
    expression : Expression
        The expression on which rows should be filtered.

    Returns
    -------
    Table or RecordBatch
    FTr9   )r@   r%   r8   r   c                 S   s   g | ]	}t g |jd qS ))rG   )r   rG   )r!   r   r   r   r   r#     s    z!_filter_table.<locals>.<listcomp>)r&   )rD   r   r   from_batchesr   r(   r	   r
   rP   num_rowscombine_chunks
to_batchesr&   from_arrays)table
expressionis_batchr,   resultarraysr   r   r   _filter_tableb  s    

r   c                 K   s   t | tjrt| dd}ntdt| }tdt|fi |}t||g}|jdd}|t	kr2|S |tj
kr<t
|S td)NTr8   r9   order_byrC   )rD   rE   r   r/   r   r	   r   r(   rP   r   r   rF   )table_or_dataset	sort_keysrY   kwargsdata_sourcer   r,   rj   r   r   r   _sort_source  s   

r   c              	   C   s2   t t dt| t dt||dg}|j|dS )Nr9   	aggregate)keysr8   )r   r(   r	   r   rP   )r{   
aggregatesr   r   r,   r   r   r   	_group_by  s
   r   )TF)T) pyarrow.libr   r   r   pyarrow.computer   r   pyarrow._aceror   r   r	   r
   r   r   r   r   r   ImportErrorexcstrpyarrow.datasetr+   rE   pyarrow._datasetr   r   r/   rk   ru   r   r   r   r   r   r   r   <module>   s>   0


 8
[$