o
    LhV                     @  s  U d dl mZ d dlmZ d dlmZ d dlmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZmZmZmZ d dlmZ d d	lmZmZmZmZmZmZ d d
lmZ erd dl m!Z!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+Z,d dl-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@Z@ d dlmAZAmBZB d dlCmDZD d dlEmFZF d dlGmHZHmIZI e0eeeeef ZJeZKdeLd< 	 G dd  d ed! eZMdS )"    )annotations)reduce)and_)TYPE_CHECKINGAny)issue_warningis_native_spark_like)catch_pyspark_connect_exceptioncatch_pyspark_sql_exceptionevaluate_exprsimport_functionsimport_native_dtypesimport_windownative_to_narwhals_dtype)SQLLazyFrame)ImplementationValidateBackendVersiongenerate_temporary_column_namenot_implementedparse_columns_to_drop
zip_strict)InvalidOperationError)IterableIteratorMappingSequence)BytesIO)Path)
ModuleTypeN)Column)BaseDataFrameWindow)Self	TypeAliasTypeIs)CompliantDataFrameAny)SparkLikeExprSparkLikeLazyGroupBySparkLikeNamespace)_EagerAllowedImpl)Version_LimitedContext)	LazyFrame)DType)JoinStrategyLazyUniqueKeepStrategyr%   
Incompletec                   @  s  e Zd ZdddddZedddZedd Zedd ZedddZe	dddZ
edd"d#Zdd%d&Zdd(d)Zdd+d,Zdd-d.Zdd/d0Zdd2d3Zdd5d6Zdd8d9Zdd;d<Zedd>d?ZddEdFZddGdHZddKdLZddOdPZddQdRZddSdTZddVdWZeddYdZZdd[d\Zdd`daZ ddddeZ!ddjdkZ"ddpdqZ#ddudvZ$ddydzZ%dd}d~Z&dddZ'dddZ(dddZ)dddZ*dddZ+dddZ,e-.dZ/e- Z0e-.dZ1dS )SparkLikeLazyFrameF)validate_backend_versionnative_dataframeSQLFrameDataFrameversionr.   implementationr   r6   boolreturnNonec                C  s2   || _ || _|| _d | _d | _|r|   d S d S N)_native_frame_implementation_version_cached_schema_cached_columns_validate_backend_version)selfr7   r9   r:   r6    rF   _/var/www/html/Persson_Maskin/env/lib/python3.10/site-packages/narwhals/_spark_like/dataframe.py__init__=   s   zSparkLikeLazyFrame.__init__tuple[int, ...]c                 C  
   | j  S r>   )r@   _backend_versionrE   rF   rF   rG   rK   M   s   
z#SparkLikeLazyFrame._backend_versionc                 C     t r
ddlm} |S t| jS )Nr   )	functions)r   sqlframe.baserN   r   r@   )rE   rN   rF   rF   rG   _FQ      
zSparkLikeLazyFrame._Fc                 C  rM   )Nr   )types)r   rO   rR   r   r@   )rE   rR   rF   rF   rG   _native_dtypesY   rQ   z!SparkLikeLazyFrame._native_dtypestype[Window]c                 C  rM   )Nr   r"   )r   sqlframe.base.windowr#   r   r@   )rE   r#   rF   rF   rG   _Windowa   rQ   zSparkLikeLazyFrame._WindowobjSQLFrameDataFrame | AnyTypeIs[SQLFrameDataFrame]c                 C  s   t | S r>   r   )rW   rF   rF   rG   
_is_nativei   s   zSparkLikeLazyFrame._is_nativedatacontextr/   r$   c               C  s   | ||j |jdS Nr9   r:   )rA   r@   )clsr[   r\   rF   rF   rG   from_nativem   s   zSparkLikeLazyFrame.from_nativeLazyFrame[SQLFrameDataFrame]c                 C  s   | j j| ddS )Nlazy)level)rA   	lazyframerL   rF   rF   rG   to_narwhalsq   s   zSparkLikeLazyFrame.to_narwhalsr   c                 C  rJ   r>   )r@   to_native_namespacerL   rF   rF   rG   __native_namespace__t   s   
z'SparkLikeLazyFrame.__native_namespace__r,   c                 C  s   ddl m} || j| jdS )Nr   r+   r^   )narwhals._spark_like.namespacer,   rA   r@   )rE   r,   rF   rF   rG   __narwhals_namespace__w   s   z)SparkLikeLazyFrame.__narwhals_namespace__c                 C  s   | S r>   rF   rL   rF   rF   rG   __narwhals_lazyframe__~   s   z)SparkLikeLazyFrame.__narwhals_lazyframe__c                 C  s   | j | j|| jdS r]   )	__class__nativer@   )rE   r9   rF   rF   rG   _with_version      
z SparkLikeLazyFrame._with_versiondfc                 C  s   | j || j| jdS r]   )rk   rA   r@   )rE   ro   rF   rF   rG   _with_native   rn   zSparkLikeLazyFrame._with_native	pa.Schemac                 C  s   dd l }ddlm} g }|  }| jj}| D ]H\}}z||| j}W n4 tyX }	 z(|| j	}
| j
j}t|
|sEtd|
 d|	t ||| f W Y d }	~	qd }	~	ww |||f q||S )Nr   )narwhals_to_native_dtypezCould not convert dtype z to PyArrow dtype, )pyarrownarwhals._arrow.utilsrr   collect_schemarl   schemaitemsrA   	ExceptiondataTyperS   NullType
isinstancer   UserWarningappendnull)rE   parr   rv   	nw_schemanative_schemakeyvaluenative_dtypeexcnative_spark_dtype	null_typerF   rF   rG   _to_arrow_schema   s*   


z#SparkLikeLazyFrame._to_arrow_schemapa.Tablec              
   C  s   | j  rF| jdk rFdd l}z
|j| j W S  tyE } z!dt	|v r@dd | j
D }|  }|jj||dW  Y d }~S  d }~ww | j  rc| jdk rcdd l}|  }|jj| j |dS | j S )N)   r   zat least one RecordBatchc                 S  s   i | ]}|g qS rF   rF   ).0krF   rF   rG   
<dictcomp>   s    z8SparkLikeLazyFrame._collect_to_arrow.<locals>.<dictcomp>rv   )r@   
is_pysparkrK   rs   Tablefrom_batchesrl   _collect_as_arrow
ValueErrorstrcolumnsr   from_pydictis_pyspark_connectfrom_pandastoPandastoArrow)rE   r   r   r[   	pa_schemarF   rF   rG   _collect_to_arrow   s"   
z$SparkLikeLazyFrame._collect_to_arrowIterator[Column]c                 c  s     | j D ]	}| j|V  qd S r>   )r   rP   col)rE   r   rF   rF   rG   _iter_columns   s   
z SparkLikeLazyFrame._iter_columns	list[str]c                 C  s.   | j d u r| jd urt| jn| jj| _ | j S r>   )rC   rB   listrv   rl   r   rL   rF   rF   rG   r      s   


zSparkLikeLazyFrame.columnsbackend_EagerAllowedImpl | Nonekwargsr   r'   c                 K  s   |t ju rddlm} || j t jd| jddS |d u s"|t ju r3ddlm	} || 
 d| jddS |t ju rOdd l}ddlm} ||| 
 d| jdS d	| }t|)
Nr   )PandasLikeDataFrameT)r:   r6   r9   validate_column_names)ArrowDataFrame)r6   r9   r   )PolarsDataFrame)r6   r9   zUnsupported `backend` value: )r   PANDASnarwhals._pandas_like.dataframer   rl   r   rA   PYARROWnarwhals._arrow.dataframer   r   POLARSpolarsnarwhals._polars.dataframer   
from_arrowr   )rE   r   r   r   r   plr   msgrF   rF   rG   _collect   s6   


zSparkLikeLazyFrame._collectc              
   K  sT   | j  r!z
| j|fi |W S  ty  } zt|d d }~ww | j|fi |S r>   )r@   r   r   rx   r
   )rE   r   r   erF   rF   rG   collect   s   

zSparkLikeLazyFrame.collectcolumn_namesr   c                 G  s   |  | jj| S r>   )rp   rl   select)rE   r   rF   rF   rG   simple_select      z SparkLikeLazyFrame.simple_selectexprsr(   c              
   G  t   t | g|R  }dd |D }| j r1z
| | jj| W S  ty0 } zt|| d d }~ww | | jj| S )Nc                 S     g | ]	\}}| |qS rF   aliasr   col_namer   rF   rF   rG   
<listcomp>       z0SparkLikeLazyFrame.aggregate.<locals>.<listcomp>)r   r@   r   rp   rl   aggrx   r   rE   r   new_columnsnew_columns_listr   rF   rF   rG   	aggregate   s   
zSparkLikeLazyFrame.aggregatec              
   G  r   )Nc                 S  r   rF   r   r   rF   rF   rG   r   
  r   z-SparkLikeLazyFrame.select.<locals>.<listcomp>)r   r@   r   rp   rl   r   rx   r   r   rF   rF   rG   r     s   
zSparkLikeLazyFrame.selectc              
   G  sn   t | g|R  }| j r,z| | jt|W S  ty+ } zt|| d d }~ww | | jt|S r>   )	r   r@   r   rp   rl   withColumnsdictrx   r   )rE   r   r   r   rF   rF   rG   with_columns  s   
zSparkLikeLazyFrame.with_columns	predicatec              
   C  sd   | | d }| j r)z
| | j|W S  ty( } zt|| d d }~ww | | j|S Nr   )_callr@   r   rp   rl   whererx   r   )rE   r   	conditionr   rF   rF   rG   filter  s   
zSparkLikeLazyFrame.filterdict[str, DType]c                   s(    j d u r fdd jjD  _  j S )Nc                   s(   i | ]}|j t|j j j jjqS rF   )namer   ry   rA   rS   rl   sparkSession)r   fieldrL   rF   rG   r   )  s    z-SparkLikeLazyFrame.schema.<locals>.<dictcomp>)rB   rl   rv   rL   rF   rL   rG   rv   &  s
   

	zSparkLikeLazyFrame.schemac                 C  s   | j S r>   r   rL   rF   rF   rG   ru   4  s   z!SparkLikeLazyFrame.collect_schemar   Sequence[str]strictc                C  s    t | ||d}| | jj| S )N)r   )r   rp   rl   drop)rE   r   r   columns_to_droprF   rF   rG   r   7  s   zSparkLikeLazyFrame.dropnintc                 C  s   |  | j|S r>   )rp   rl   limit)rE   r   rF   rF   rG   head;  r   zSparkLikeLazyFrame.headkeys'Sequence[str] | Sequence[SparkLikeExpr]drop_null_keysr*   c                C  s   ddl m} || ||dS )Nr   r)   )r   )narwhals._spark_like.group_byr*   )rE   r   r   r*   rF   rF   rG   group_by>  s   zSparkLikeLazyFrame.group_byby
descendingbool | Sequence[bool]
nulls_lastc                  sh   t |tr|gt| }|r fdd|D }n	 fdd|D }dd t||D }  jj| S )Nc                 3  $    | ]}|r
 j jn j jV  qd S r>   rP   desc_nulls_lastasc_nulls_lastr   drL   rF   rG   	<genexpr>J  
    
z*SparkLikeLazyFrame.sort.<locals>.<genexpr>c                 3  r   r>   )rP   desc_nulls_firstasc_nulls_firstr   rL   rF   rG   r   O  r   c                 S     g | ]\}}||qS rF   rF   r   r   sort_frF   rF   rG   r   T      z+SparkLikeLazyFrame.sort.<locals>.<listcomp>)r{   r;   lenr   rp   rl   sort)rE   r   r   r   
sort_funcs	sort_colsrF   rL   rG   r   E  s   


zSparkLikeLazyFrame.sortr   Iterable[str]reversec                  s^   t |}t|tr|gt| } fdd|D }dd t||D }  jj| |S )Nc                 3  s$    | ]}|s
 j jn j jV  qd S r>   r   r   rL   rF   rG   r   [  s    
z+SparkLikeLazyFrame.top_k.<locals>.<genexpr>c                 S  r   rF   rF   r   rF   rF   rG   r   ^  r   z,SparkLikeLazyFrame.top_k.<locals>.<listcomp>)	r   r{   r;   r   r   rp   rl   r   r   )rE   r   r   r   r   r   rF   rL   rG   top_kW  s   

zSparkLikeLazyFrame.top_ksubsetSequence[str] | Nonec                 C  s$   |rt |nd }| | jj|dS )Nr   )r   rp   rl   dropna)rE   r   rF   rF   rG   
drop_nullsa  s   zSparkLikeLazyFrame.drop_nullsmappingMapping[str, str]c                   s8    fddj D }jfdd| D S )Nc                   s   i | ]	}|  ||qS rF   )getr   colname)r   rF   rG   r   f  s    z-SparkLikeLazyFrame.rename.<locals>.<dictcomp>c                   "   g | ]\}} j ||qS rF   rP   r   r   r   oldnewrL   rF   rG   r   k     " z-SparkLikeLazyFrame.rename.<locals>.<listcomp>)r   rp   rl   r   rw   )rE   r   rename_mappingrF   )r   rE   rG   renamee  s   
zSparkLikeLazyFrame.renamekeepr3   c                C  s   |r|  | }r||rt|nd }|dkrMtd| j}| j|p$| j}| j|| j	d
|| j|| jdk| j|}| |S | | jj|dS )Nnone   *   r   )_check_columns_existr   r   r   rV   partitionByrl   
withColumnrP   countoverr   r   litr   rp   dropDuplicates)rE   r   r  errortmpwindowro   rF   rF   rG   uniqueo  s   
zSparkLikeLazyFrame.uniqueotherhowr2   left_onright_onsuffixc                   sP  j  |j }|d urt|ng |d urt|ng }|dkr'fdd|D n|}i tt| fdd|D |jfdd D   }	|dv ra|	fdd	|D  n|dkrl|		  fd
dD }
|dkrt
tfdd	t||
D n|dkrd n|}|dkrdn|}jj||d|	S )Nfullc                   s   g | ]}| vr|qS rF   rF   r   c)	right_on_rF   rG   r     r   z+SparkLikeLazyFrame.join.<locals>.<listcomp>c                   s&   i | ]}|| v r|  n|qS rF   rF   r  )left_columnsr!  rF   rG   r     s    z+SparkLikeLazyFrame.join.<locals>.<dictcomp>c                   r  rF   r  r  rL   rF   rG   r     r
  >   leftcrossinnerc                 3  s     | ]}|vr | V  qd S r>   rF   r  )r  r%  rF   rG   r     s    z*SparkLikeLazyFrame.join.<locals>.<genexpr>c                   s   g | ]} | qS rF   rF   r#  )r  rF   rG   r     s    c                 3  s*    | ]\}}t j|t  |kV  qd S r>   )getattrrl   )r   left_key	right_key)other_nativerE   rF   rG   r     s
    
r(  
full_outer)onr  )r   r   r   ziprl   r   rw   copyextendvaluesr   r   r   rp   join)rE   r  r  r  r   r!  right_columnsleft_on_right_cols_to_rename	col_orderright_on_remappedon_
how_nativerF   )r&  r-  r  r%  rE   r!  rG   r4    sR   
		zSparkLikeLazyFrame.joinc           	   	     s  j j} } D ]}|| }||jkrd| d}t|q
j}t dkr/d}t|j	 s9j
 rJjj fdd|D  S j r}dfdd}jj fdd|D  j| d j fdd|D  S d}t|)Nz-`explode` operation not supported for dtype `z`, expected List typer  zExploding on multiple columns is not supported with SparkLike backend since we cannot guarantee that the exploded columns have matching element counts.c                   <   g | ]}| d  krj ||nj ||qS r   )rP   r   r   explode_outerr   r   r   rE   rF   rG   r         z.SparkLikeLazyFrame.explode.<locals>.<listcomp>r   r   r<   r    c                   s    j |  j | dkB S r   )rP   isnull
array_size)r   rL   rF   rG   null_condition  s   z2SparkLikeLazyFrame.explode.<locals>.null_conditionc                   r<  r=  )rP   r   r   exploder?  r@  rF   rG   r     rA  r   c                   s<   g | ]}| d  krj ||nj d|qS )r   N)rP   r   r   r  r?  r@  rF   rG   r     rA  z[Unreachable code, please report an issue at https://github.com/narwhals-dev/narwhals/issues)r   r   r<   r    )rA   dtypesru   Listr   r   r   NotImplementedErrorr@   r   r   rp   rl   r   is_sqlframeunionr   AssertionError)	rE   r   rF  rv   col_to_explodedtyper   r   rD  rF   r@  rG   rE    sP   



zSparkLikeLazyFrame.exploder/  indexvariable_name
value_namec           	      C  s   | j  r|dkrd}t||dkrd}t|n	 |r!t|nd}|d u r3tt| jt|nt|}| jj||||d}|d u rJ|j	| }| 
|S )N z<`variable_name` cannot be empty string for sqlframe backend.z9`value_name` cannot be empty string for sqlframe backend.rF   )idsr3  variableColumnNamevalueColumnName)r@   rI  rH  tuplesetr   
differencerl   unpivotr   rp   )	rE   r/  rN  rO  rP  r   rR  r3  unpivoted_native_framerF   rF   rG   rX    s*   
&

zSparkLikeLazyFrame.unpivotr   order_byc                 C  s`   |d u r
d}t || j | j| jdj| d |}| 	| j
j|g| jR  S )Nz;Cannot pass `order_by` to `with_row_index` for PySpark-liker  )	TypeErrorrP   
row_numberr  rV   r  r  orderByr   rp   rl   r   r   )rE   r   rZ  r   row_index_exprrF   rF   rG   with_row_index%  s   
z!SparkLikeLazyFrame.with_row_indexfilestr | Path | BytesIOc                 C  s   | j j| d S r>   )rl   writeparquet)rE   r`  rF   rF   rG   sink_parquet1  r   zSparkLikeLazyFrame.sink_parquetzO`LazyFrame.gather_every` is deprecated and will be removed in a future version.zG`LazyFrame.tail` is deprecated and will be removed in a future version.N)
r7   r8   r9   r.   r:   r   r6   r;   r<   r=   )r<   rI   )r<   rT   )rW   rX   r<   rY   )r[   r8   r\   r/   r<   r$   )r<   ra   )r<   r   )r<   r,   )r<   r$   )r9   r.   r<   r$   )ro   r8   r<   r$   )r<   rq   )r<   r   )r<   r   )r<   r   )r   r   r   r   r<   r'   )r   r   r<   r$   )r   r(   r<   r$   )r   r(   r<   r$   )r<   r   )r   r   r   r;   r<   r$   )r   r   r<   r$   )r   r   r   r;   r<   r*   )r   r   r   r   r   r;   r<   r$   )r   r   r   r   r   r   r<   r$   )r   r   r<   r$   )r   r  r<   r$   )r   r   r  r3   r<   r$   )r  r$   r  r2   r  r   r   r   r!  r   r<   r$   )r   r   r<   r$   )
r/  r   rN  r   rO  r   rP  r   r<   r$   )r   r   rZ  r   r<   r$   )r`  ra  r<   r=   )2__name__
__module____qualname__rH   propertyrK   rP   rS   rV   staticmethodrZ   classmethodr`   re   rg   ri   rj   rm   rp   r   r   r   r   r   r   r   r   r   r   r   rv   ru   r   r   r   r   r   r   r  r  r4  rE  rX  r_  rd  r   
deprecatedgather_every	join_asoftailrF   rF   rF   rG   r5   9   sp    












	
&





















C
B
 
r5   )r(   r8   ra   )N
__future__r   	functoolsr   operatorr   typingr   r   narwhals._exceptionsr   narwhals._namespacer	   narwhals._spark_like.utilsr
   r   r   r   r   r   r   narwhals._sql.dataframer   narwhals._utilsr   r   r   r   r   r   narwhals.exceptionsr   collections.abcr   r   r   r   ior   pathlibr   rR   r   rs   r   sqlframe.base.columnr    sqlframe.base.dataframer!   rU   r#   typing_extensionsr$   r%   r&   narwhals._compliant.typingr'   narwhals._spark_like.exprr(   r   r*   rh   r,   narwhals._typingr-   r.   r/   narwhals.dataframer0   narwhals.dtypesr1   narwhals.typingr2   r3   r8   r4   __annotations__r5   rF   rF   rF   rG   <module>   sH    $	 
