o
    LhF                     @  s  U d dl mZ d dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d d	lmZ erd d
lmZm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m  m*Z+ d dl,m-Z-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9 d dlm:Z:m;Z; d dl<m=Z= d dl>m?Z? d dl@mAZA d dlmBZBmCZCmDZD dZEdeFd< 	 G dd ded eZGdS )    )annotations)TYPE_CHECKINGAnyN)add_row_indexevaluate_exprs)ExprKind)native_to_narwhals_dtypeselect_columns_by_name)assert_never)	ImplementationValidateBackendVersion_remap_full_join_keyscheck_column_names_are_uniquecheck_columns_existgenerate_temporary_column_namenot_implementedparse_columns_to_drop
zip_strict)CompliantLazyFrame)IterableIteratorMappingSequence)BytesIO)Path)
ModuleType)Self	TypeAliasTypeIs)CompliantDataFrameAny)DaskExprDaskLazyGroupByDaskNamespace)_EagerAllowedImpl)Version_LimitedContext)	LazyFrame)DType)ColumnNotFoundError)AsofJoinStrategyJoinStrategyLazyUniqueKeepStrategyr   r   
Incompletec                   @  s  e Zd ZejZdddddZedddZe	dddZ
dddZdddZddd Zdd!d"Zdd#d$Zdd'd(Zdd,d-Zdd/d0Zdd3d4Zdd9d:Zedd<d=Zdd?d@ZddCdDZddEdFZddGdHZddJdKZeddMdNZddOdPZddSdTZddWdXZdd[d\Z dd_d`Z!ddcddZ"ddidjZ#ddndoZ$ddtduZ%ddvdwZ&ddxdyZ'ddzd{Z(dd|d}Z)dd~dZ*dddZ+dddZ,dddZ-dddZ.dddZ/dddZ0dddZ1dddZ2e3 Z4dS )DaskLazyFrameF)validate_backend_versionnative_dataframedd.DataFrameversionr&   r0   boolreturnNonec                C  s,   || _ || _d | _d | _|r|   d S d S N)_native_frame_version_cached_schema_cached_columns_validate_backend_version)selfr1   r3   r0    r>   Y/var/www/html/Persson_Maskin/env/lib/python3.10/site-packages/narwhals/_dask/dataframe.py__init__:   s   zDaskLazyFrame.__init__objdd.DataFrame | AnyTypeIs[dd.DataFrame]c                 C  s   t | tjS r7   )
isinstancedd	DataFrame)rA   r>   r>   r?   
_is_nativeH   s   zDaskLazyFrame._is_nativedatacontextr'   r   c               C  s   | ||j dS Nr3   )r9   )clsrH   rI   r>   r>   r?   from_nativeL   s   zDaskLazyFrame.from_nativeLazyFrame[dd.DataFrame]c                 C  s   | j j| ddS )Nlazy)level)r9   	lazyframer=   r>   r>   r?   to_narwhalsP      zDaskLazyFrame.to_narwhalsr   c                 C  s.   | j tju r| j  S dt| j  }t|)NzExpected dask, got: )_implementationr   DASKto_native_namespacetypeAssertionError)r=   msgr>   r>   r?   __native_namespace__S   s   
z"DaskLazyFrame.__native_namespace__r$   c                 C  s   ddl m} || jdS )Nr   r#   rK   )narwhals._dask.namespacer$   r9   )r=   r$   r>   r>   r?   __narwhals_namespace__Z   s   z$DaskLazyFrame.__narwhals_namespace__c                 C  s   | S r7   r>   rR   r>   r>   r?   __narwhals_lazyframe___   s   z$DaskLazyFrame.__narwhals_lazyframe__c                 C  s   | j | j|dS rJ   )	__class__native)r=   r3   r>   r>   r?   _with_versionb   rT   zDaskLazyFrame._with_versiondfr   c                 C  s   | j || jdS rJ   )r_   r9   )r=   rb   r>   r>   r?   _with_nativee   rT   zDaskLazyFrame._with_nativesubsetSequence[str]ColumnNotFoundError | Nonec                 C  s   t || jdS )N)	available)r   columns)r=   rd   r>   r>   r?   _check_columns_existh   s   z"DaskLazyFrame._check_columns_existIterator[dx.Series]c                 c  s     | j  D ]\}}|V  qd S r7   )r`   items)r=   _colserr>   r>   r?   _iter_columnsk   s   zDaskLazyFrame._iter_columnsexprsr    c                 G  s,   t | g|R  }| | jjdi t|S )Nr>   )r   rc   r`   assigndict)r=   ro   
new_seriesr>   r>   r?   with_columnso   s   zDaskLazyFrame.with_columnsbackend_EagerAllowedImpl | Nonekwargsr   c           
      K  s   | j jd
i |}|d u s|tju r#ddlm} ||tjd| jddS |tju r=dd l}ddl	m
} |||d| jdS |tju rYdd l}ddlm} ||j|d| jddS d	| }	t|	)Nr   )PandasLikeDataFrameT)implementationr0   r3   validate_column_names)PolarsDataFrame)r0   r3   )ArrowDataFrame)r0   r3   ry   zUnsupported `backend` value: r>   )r`   computer   PANDASnarwhals._pandas_like.dataframerw   r9   POLARSpolarsnarwhals._polars.dataframerz   from_pandasPYARROWpyarrownarwhals._arrow.dataframer{   Table
ValueError)
r=   rt   rv   resultrw   plrz   par{   rZ   r>   r>   r?   collects   s:   



zDaskLazyFrame.collect	list[str]c                 C  s2   | j d u r| jd urt| jn| jj | _ | j S r7   )r;   r:   listschemar`   rh   tolistrR   r>   r>   r?   rh      s   



zDaskLazyFrame.columns	predicatec                 C  s   || d }|  | jj| S )Nr   )rc   r`   loc)r=   r   maskr>   r>   r?   filter   s   zDaskLazyFrame.filtercolumn_namesstrc                 G  s"   | j }t|t|| j}| |S r7   )r`   r	   r   rU   rc   )r=   r   rb   r`   r>   r>   r?   simple_select   s   
zDaskLazyFrame.simple_selectc                 G  s2   t | g|R  }tjdd |D dd}| |S )Nc                 S  s   g | ]	\}}| |qS r>   )rename).0namevalr>   r>   r?   
<listcomp>   s    z+DaskLazyFrame.aggregate.<locals>.<listcomp>   )axis)r   rE   concatrc   r=   ro   rr   rb   r>   r>   r?   	aggregate   s   
zDaskLazyFrame.aggregatec                 G  sH   t | g|R  }| j}t|jdi t|dd |D | j}| |S )Nc                 S  s   g | ]}|d  qS )r   r>   )r   sr>   r>   r?   r      s    z(DaskLazyFrame.select.<locals>.<listcomp>r>   )r   r`   r	   rp   rq   rU   rc   r   r>   r>   r?   select   s   
zDaskLazyFrame.selectSequence[str] | Nonec                 C  sD   |d u r|  | j S |  }|j|j|  dd }| |S )NT)ignore_nulls)rc   r`   dropnar]   any_horizontalcolis_nullr   )r=   rd   plxr   r>   r>   r?   
drop_nulls   s
   
zDaskLazyFrame.drop_nullsdict[str, DType]c                   s2   j d u rjj  fddjjD _ j S )Nc                   s"   i | ]}|t  | jjqS r>   )r   r9   rU   )r   r   native_dtypesr=   r>   r?   
<dictcomp>   s    z(DaskLazyFrame.schema.<locals>.<dictcomp>)r:   r`   dtypesrh   rR   r>   r   r?   r      s   
zDaskLazyFrame.schemac                 C  s   | j S r7   r   rR   r>   r>   r?   collect_schema   s   zDaskLazyFrame.collect_schemarh   strictc                C  s"   t | ||d}| | jj|dS )Nr   rh   )r   rc   r`   drop)r=   rh   r   to_dropr>   r>   r?   r      s   zDaskLazyFrame.dropr   order_byc                 C  s|   |d u r|  t| j|S |  }| j}|jdd d|tj	}|
|jddjg |dd }| |||j
| S )Nr   )valuedtypeF)reverse)partition_byr   )rc   r   r`   r]   rh   litalias	broadcastr   LITERALr   cum_sumoverrs   r   )r=   r   r   r   rh   
const_exprrow_index_exprr>   r>   r?   with_row_index   s   zDaskLazyFrame.with_row_indexmappingMapping[str, str]c                 C  s   |  | jj|dS )Nr   )rc   r`   r   )r=   r   r>   r>   r?   r      s   zDaskLazyFrame.renamenintc                 C  s   |  | jj|dddS )NFr   r|   npartitions)rc   r`   head)r=   r   r>   r>   r?   r      s   zDaskLazyFrame.headkeepr-   c          	      C  s   |r|  | }r||dkr=|p| j}td|d}| j| |}||dk }| j|d}| jj	||dd}ndd	i
||}| jj||d
}| |S )Nnone   n_bytesrh   r   r   inner)onhowanyfirst)rd   r   )ri   rh   r   r`   groupbysizer   reset_indexr   mergegetdrop_duplicatesrc   )	r=   rd   r   errortokenrm   uniquer   mapped_keepr>   r>   r?   r      s   

zDaskLazyFrame.uniqueby
descendingbool | Sequence[bool]
nulls_lastc                G  sH   t |tr	| }ndd |D }|rdnd}| | jjt|||dS )Nc                 S  s   g | ]}| qS r>   r>   )r   dr>   r>   r?   r     s    z&DaskLazyFrame.sort.<locals>.<listcomp>lastr   )	ascendingna_position)rD   r4   rc   r`   sort_valuesr   )r=   r   r   r   r   positionr>   r>   r?   sort   s   
zDaskLazyFrame.sortkIterable[str]r   c                  s   | j }| j t|}t|tr.t fdd|D r.|r%| |||S | |||S t|tr:|gt	| }| |j
|t|dj|dddS )Nc                 3  s    | ]	} |   V  qd S r7   )
is_numeric)r   xr   r>   r?   	<genexpr>  s    z&DaskLazyFrame.top_k.<locals>.<genexpr>)r   Fr   r   )r`   r   r   rD   r4   allrc   	nsmallestnlargestlenr   r   )r=   r   r   r   rb   r>   r   r?   top_k
  s    
zDaskLazyFrame.top_kotherleft_onright_onsuffixc                C  s   | j j|j ||dd|fdS )Nr    r   r   r   suffixes)r`   r   )r=   r   r   r   r   r>   r>   r?   _join_inner  s   zDaskLazyFrame._join_innerc                  sB    j j|j d||dfd} fddt||D }|j|dS )Nleftr   r   r   r   r   c                   s2   g | ]\}}||kr| j vr|n|  qS r>   r   )r   left_key	right_keyr=   r   r>   r?   r   /  s
    z,DaskLazyFrame._join_left.<locals>.<listcomp>r   )r`   r   r   r   )r=   r   r   r   r   result_nativeextrar>   r  r?   
_join_left%  s   zDaskLazyFrame._join_leftc                C  sJ   t |||}|jj|d}t|j t| }| jj|||dd|fdS )Nr   outerr   r   )r   r`   r   r   rh   r   valuesr   )r=   r   r   r   r   right_on_mapperother_nativeright_suffixedr>   r>   r?   
_join_full6  s   
zDaskLazyFrame._join_fullc                C  s^   t dg | j|jR d}| jjdi |dij|jjdi |did||d|fdj|dS )	Nr   r   r   r   r   r   r   r>   )r   rh   r`   rp   r   r   )r=   r   r   	key_tokenr>   r>   r?   _join_crossH  s   zDaskLazyFrame._join_crossc                C  s2   | j |t|tt||d}| jj|d||dS )Nr   columns_to_selectcolumns_mappingr   )r   r   r   )_join_filter_renamer   rq   zipr`   r   )r=   r   r   r   r	  r>   r>   r?   
_join_semiX  s   zDaskLazyFrame._join_semic                C  sh   t dg | j|jR d}| j|t|tt||d}| jj|d|||d}||| dk j|gdS )Nr   r   r  r   )r   	indicatorr   r   	left_onlyr   )	r   rh   r  r   rq   r  r`   r   r   )r=   r   r   r   indicator_tokenr	  rb   r>   r>   r?   
_join_antid  s    zDaskLazyFrame._join_antir  r  dict[str, str]c                 C  s    |j }t||| jj|d S )zHelper function to avoid creating extra columns and row duplication.

        Used in `"anti"` and `"semi`" join's.

        Notice that a native object is returned.
        r   )r`   r	   rU   r   r   )r=   r   r  r  r	  r>   r>   r?   r  x  s   	z!DaskLazyFrame._join_filter_renamer   r,   c                C  s   |dkr| j ||d}nU|d u s|d u rt|||dkr'| j||||d}n:|dkr4| j|||d}n-|dkrA| j|||d}n |dkrO| j||||d}n|d	kr]| j||||d}nt| | |S )
Ncross)r   r   r   )r   r   r   r   anti)r   r   r   semir   full)	r  r   r   r  r  r  r  r
   rc   )r=   r   r   r   r   r   r   r>   r>   r?   join  s,   	

zDaskLazyFrame.joinby_leftby_rightstrategyr+   c          	      C  s0   |   }| |j| j|j|||||d|fdS )Nr   )r   r   left_byright_by	directionr   )r[   rc   
merge_asofr`   )	r=   r   r   r   r  r  r   r   r   r>   r>   r?   	join_asof  s   zDaskLazyFrame.join_asofkeys"Sequence[str] | Sequence[DaskExpr]drop_null_keysr"   c                C  s   ddl m} || ||dS )Nr   r!   )r(  )narwhals._dask.group_byr"   )r=   r&  r(  r"   r>   r>   r?   group_by  s   zDaskLazyFrame.group_byc                 C  s6   | j }|j}|dkr| | j j|ddS d}t|)Nr   F)r   r|   zL`LazyFrame.tail` is not supported for Dask backend with multiple partitions.)r`   r   rc   tailNotImplementedError)r=   r   native_framen_partitionsrZ   r>   r>   r?   r+    s   zDaskLazyFrame.tailoffsetc                 C  sV   t d| jd}|  }| j|d d|||k||| | dk@ j|gddS )Nr   r   )r   r   Fr   )r   rh   r]   r   r   r   r   )r=   r   r/  row_index_tokenr   r>   r>   r?   gather_every  s   zDaskLazyFrame.gather_everyr   indexvariable_name
value_namec                 C  s   |  | jj||||dS )N)id_vars
value_varsvar_namer4  )rc   r`   melt)r=   r   r2  r3  r4  r>   r>   r?   unpivot  s   zDaskLazyFrame.unpivotfilestr | Path | BytesIOc                 C  s   | j | d S r7   )r`   
to_parquet)r=   r:  r>   r>   r?   sink_parquet  rT   zDaskLazyFrame.sink_parquetN)r1   r2   r3   r&   r0   r4   r5   r6   )rA   rB   r5   rC   )rH   r2   rI   r'   r5   r   )r5   rN   )r5   r   )r5   r$   )r5   r   )r3   r&   r5   r   )rb   r   r5   r   )rd   re   r5   rf   )r5   rj   )ro   r    r5   r   )rt   ru   rv   r   r5   r   )r5   r   )r   r    r5   r   )r   r   r5   r   )rd   r   r5   r   )r5   r   )rh   re   r   r4   r5   r   )r   r   r   r   r5   r   )r   r   r5   r   )r   r   r5   r   )rd   r   r   r-   r5   r   )r   r   r   r   r   r4   r5   r   )r   r   r   r   r   r   r5   r   )
r   r   r   re   r   re   r   r   r5   r2   )r   r   r   r   r5   r2   )r   r   r   re   r   re   r5   r2   )r   r   r  r   r  r  r5   r2   )r   r   r   r,   r   r   r   r   r   r   r5   r   )r   r   r   r   r   r   r  r   r  r   r   r+   r   r   r5   r   )r&  r'  r(  r4   r5   r"   )r   r   r/  r   r5   r   )
r   r   r2  r   r3  r   r4  r   r5   r   )r:  r;  r5   r6   )5__name__
__module____qualname__r   rV   rU   r@   staticmethodrG   classmethodrM   rS   r[   r]   r^   ra   rc   ri   rn   rs   r   propertyrh   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r%  r*  r+  r1  r9  r=  r   exploder>   r>   r>   r?   r/   4   sd    









*
	






















#





r/   )r    r2   rN   )H
__future__r   typingr   r   dask.dataframe	dataframerE   narwhals._dask.utilsr   r   narwhals._expression_parsingr   narwhals._pandas_like.utilsr   r	   narwhals._typing_compatr
   narwhals._utilsr   r   r   r   r   r   r   r   r   narwhals.typingr   collections.abcr   r   r   r   ior   pathlibr   typesr   dask.dataframe.dask_expr	dask_exprdxtyping_extensionsr   r   r   narwhals._compliant.typingr   narwhals._dask.exprr    r)  r"   r\   r$   narwhals._typingr%   r&   r'   narwhals.dataframer(   narwhals.dtypesr)   narwhals.exceptionsr*   r+   r,   r-   r.   __annotations__r/   r>   r>   r>   r?   <module>   s@    ,
