o
    LhtO                     @  s  d dl mZ d dlmZ d dlmZ d dlmZmZ d dl	Z	d dl	m
Z
 d dlmZmZmZmZmZmZmZmZ d dlmZ d d	lmZmZmZmZmZmZmZmZ d d
lm Z  d dl!m"Z" erd dl#m$Z$m%Z%m&Z&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.Z/d dl0Z1d dl	m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZCmDZD d dlmEZE d dlFmGZG d dlHmIZI d dlJmKZL d dlMmNZNmOZOmPZP G dd ded  eZQdS )!    )annotations)reduce)and_)TYPE_CHECKINGAnyN)StarExpression)DeferredTimeZoneFcatch_duckdb_exceptioncolevaluate_exprslitnative_to_narwhals_dtypewindow_expression)SQLLazyFrame)ImplementationValidateBackendVersionVersiongenerate_temporary_column_namenot_implementedparse_columns_to_droprequires
zip_strict
get_duckdb)InvalidOperationError)IterableIteratorMappingSequence)BytesIO)Path)
ModuleType)
Expression)DuckDBPyType)SelfTypeIs)CompliantDataFrameAny)
DuckDBExprDuckDBGroupByDuckDBNamespaceDuckDBInterchangeSeries)_EagerAllowedImpl_LazyAllowedImpl)_LimitedContext)	LazyFrame)DType	DataFrame)AsofJoinStrategyJoinStrategyLazyUniqueKeepStrategyc                   @  s  e Zd ZejZdddddZedddZe	dddZ
edddZddd Zdd!d"Zdd#d$Zdd&d'Zdd)d*Zdd.d/Zdd1d2Zdd7d8Zdd;d<Zdd>d?ZddBdCZddDdEZddIdJZdddMdNZddOdPZddRdSZeddUdVZeddXdYZdd[d\Zdd^d_Z dd`daZ!ddbdcZ"ddhdiZ#ddldmZ$ddudvZ%dd{d|Z&dd}d~Z'dddZ(dddZ)dddZ*dddZ+dddZ,dddZ-e./ddddZ0dddZ1e23dZ4e23dZ5dKS )DuckDBLazyFrameF)validate_backend_versiondfduckdb.DuckDBPyRelationversionr   r:   boolreturnNonec                C  s,   || _ || _d | _d | _|r|   d S d S N)_native_frame_version_cached_native_schema_cached_columns_validate_backend_version)selfr;   r=   r:    rH   [/var/www/html/Persson_Maskin/env/lib/python3.10/site-packages/narwhals/_duckdb/dataframe.py__init__E   s   zDuckDBLazyFrame.__init__tuple[int, ...]c                 C  
   | j  S rA   )_implementation_backend_versionrG   rH   rH   rI   rN   S      
z DuckDBLazyFrame._backend_versionobjduckdb.DuckDBPyRelation | AnyTypeIs[duckdb.DuckDBPyRelation]c                 C  s   t | tjS rA   )
isinstanceduckdbDuckDBPyRelation)rQ   rH   rH   rI   
_is_nativeW   s   zDuckDBLazyFrame._is_nativedatacontextr1   r%   c               C  s   | ||j dS Nr=   )rC   )clsrX   rY   rH   rH   rI   from_native[   s   zDuckDBLazyFrame.from_nativeargsr   kwdsILazyFrame[duckdb.DuckDBPyRelation] | DataFrameV1[duckdb.DuckDBPyRelation]c                 O  s4   | j tju rddlm} || ddS | j j| ddS )Nr   r4   interchange)levellazy)rC   r   V1narwhals.stable.v1r5   	lazyframe)rG   r^   r_   DataFrameV1rH   rH   rI   to_narwhalsa   s   zDuckDBLazyFrame.to_narwhalsc                 C  s   | j tjurd}t|| S )Nz=__narwhals_dataframe__ is not implemented for DuckDBLazyFrame)rC   r   rd   AttributeError)rG   msgrH   rH   rI   __narwhals_dataframe__j   s   z&DuckDBLazyFrame.__narwhals_dataframe__c                 C  s   | S rA   rH   rO   rH   rH   rI   __narwhals_lazyframe__q   s   z&DuckDBLazyFrame.__narwhals_lazyframe__r"   c                 C  s   t  S rA   r   rO   rH   rH   rI   __native_namespace__t      z$DuckDBLazyFrame.__native_namespace__r,   c                 C  s   ddl m} || jdS )Nr   r+   r[   )narwhals._duckdb.namespacer,   rC   )rG   r,   rH   rH   rI   __narwhals_namespace__w   s   z&DuckDBLazyFrame.__narwhals_namespace__namestrr.   c                 C  s"   ddl m} || j|| jdS )Nr   r-   r[   )narwhals._duckdb.seriesr.   nativeselectrC   )rG   rq   r.   rH   rH   rI   
get_column|   s   zDuckDBLazyFrame.get_columnIterator[Expression]c                 c  s    | j D ]}t|V  qd S rA   )columnsr   )rG   rq   rH   rH   rI   _iter_columns   s   
zDuckDBLazyFrame._iter_columnsbackend_EagerAllowedImpl | Nonekwargsr'   c                 K  s   |d u s	|t ju rddlm} || j d| jddS |t ju r4ddlm	} || j
 t jd| jddS |t ju rJddlm} || j d| jdS d	| }t|)
Nr   )ArrowDataFrameT)r:   r=   validate_column_names)PandasLikeDataFrame)implementationr:   r=   r~   )PolarsDataFrame)r:   r=   zUnsupported `backend` value: )r   PYARROWnarwhals._arrow.dataframer}   rt   arrowrC   PANDASnarwhals._pandas_like.dataframer   r;   POLARSnarwhals._polars.dataframer   pl
ValueError)rG   rz   r|   r}   r   r   rj   rH   rH   rI   collect   s0   


zDuckDBLazyFrame.collectnintc                 C  s   |  | j|S rA   )_with_nativert   limit)rG   r   rH   rH   rI   head      zDuckDBLazyFrame.headcolumn_namesc                 G  s   |  | jj| S rA   )r   rt   ru   )rG   r   rH   rH   rI   simple_select   r   zDuckDBLazyFrame.simple_selectexprsr(   c              
   G  sT   dd t | g|R  D }z
| | j|W S  ty) } zt|| d d }~ww )Nc                 S  s   g | ]	\}}| |qS rH   alias.0rq   valrH   rH   rI   
<listcomp>   s    z-DuckDBLazyFrame.aggregate.<locals>.<listcomp>)r   r   rt   	aggregate	Exceptionr
   rG   r   	selectionerH   rH   rI   r         zDuckDBLazyFrame.aggregatec              
   G  sT   dd t | g|R  D }z
| | jj| W S  ty) } zt|| d d }~ww )Nc                 s      | ]
\}}| |V  qd S rA   r   r   rH   rH   rI   	<genexpr>       z)DuckDBLazyFrame.select.<locals>.<genexpr>)r   r   rt   ru   r   r
   r   rH   rH   rI   ru      r   zDuckDBLazyFrame.selectrx   Sequence[str]strictc                  s4   t | ||d  fdd| jD }| | jj| S )N)r   c                 3  s    | ]	}| vr|V  qd S rA   rH   r   rq   columns_to_droprH   rI   r          z'DuckDBLazyFrame.drop.<locals>.<genexpr>)r   rx   r   rt   ru   )rG   rx   r   r   rH   r   rI   drop   s   zDuckDBLazyFrame.dropN_LazyAllowedImpl | Nonec                 C  s   |d ur
d}t || S )Nz.`backend` argument is not supported for DuckDB)r   )rG   rz   rj   rH   rH   rI   rc      s   zDuckDBLazyFrame.lazyc              
     sz   t t| g|R    fdd| jD }|dd   D  z
| | jj| W S  ty< } zt	|| d d }~ww )Nc                   s,   g | ]}| v r  ||nt|qS rH   )popr   r   r   new_columns_maprH   rI   r      s    z0DuckDBLazyFrame.with_columns.<locals>.<listcomp>c                 s  r   rA   r   )r   rq   valuerH   rH   rI   r      r   z/DuckDBLazyFrame.with_columns.<locals>.<genexpr>)
dictr   rx   extenditemsr   rt   ru   r   r
   )rG   r   resultr   rH   r   rI   with_columns   s   
zDuckDBLazyFrame.with_columns	predicatec              
   C  sF   || d }z
|  | j|W S  ty" } zt|| d d }~ww )Nr   )r   rt   filterr   r
   )rG   r   maskr   rH   rH   rI   r      s   zDuckDBLazyFrame.filterdict[str, DType]c                   sL   j d u rttjjj_ tj  fddtjjjjD S )Nc                   s    i | ]\}}|t |j qS rH   )r   rC   )r   column_nameduckdb_dtypedeferred_time_zonerG   rH   rI   
<dictcomp>   s    z*DuckDBLazyFrame.schema.<locals>.<dictcomp>)rD   r   ziprx   rt   typesr   r   rO   rH   r   rI   schema   s   

zDuckDBLazyFrame.schema	list[str]c                 C  s.   | j d u r| jd urt| jn| jj| _ | j S rA   )rE   rD   listr   rt   rx   rO   rH   rH   rI   rx      s   


zDuckDBLazyFrame.columnspd.DataFramec                 C  rL   rA   )rt   r;   rO   rH   rH   rI   	to_pandas   rP   zDuckDBLazyFrame.to_pandaspa.Tablec                 C  rL   rA   )rt   r   rO   rH   rH   rI   to_arrow   rP   zDuckDBLazyFrame.to_arrowc                 C  s   | j | j|dS rZ   )	__class__rt   )rG   r=   rH   rH   rI   _with_version     zDuckDBLazyFrame._with_versionc                 C  s   | j || jdS rZ   )r   rC   )rG   r;   rH   rH   rI   r     r   zDuckDBLazyFrame._with_nativekeys$Sequence[str] | Sequence[DuckDBExpr]drop_null_keysr*   c                C  s   ddl m} || ||dS )Nr   r)   )r   )narwhals._duckdb.group_byr*   )rG   r   r   r*   rH   rH   rI   group_by	  s   zDuckDBLazyFrame.group_bymappingMapping[str, str]c                   s,   | j } fdd|jD }| | j j| S )Nc                 3  s2    | ]}| v rt | | nt |V  qd S rA   )r   r   r   r   rH   rI   r     s
     
z)DuckDBLazyFrame.rename.<locals>.<genexpr>)rt   rx   r   ru   )rG   r   r;   r   rH   r   rI   rename  s
   
zDuckDBLazyFrame.renameotherhowr7   left_onSequence[str] | Noneright_onsuffixc                C  s  |dkrdn|}|dkr)| j dk rd| j  }t|| jd|jd}n+|d us/J |d us5J dd	 t||D }	tt|	}
| jdj|jd|
|d
}|dv rdd | j	D }|j	D ]I}|| j	v }|dkr||s||
td| d qc|dks|r|d u s||vr|
td| d| |  qc|d u s||vr|
t| qc|j| | jj}n
|d| jj}| |S )Nfulloutercross)   r      z;'duckdb>=1.1.4' is required for cross-join, found version: lhsrhsc                 s  4    | ]\}}t d | dt d| dkV  qdS lhs.""rhs."Nr   r   leftrightrH   rH   rI   r   ,  
    
z'DuckDBLazyFrame.join.<locals>.<genexpr>)	conditionr   >   r   r   innerr   c                 S  s   g | ]
}t d | dqS )r   r   r   )r   xrH   rH   rI   r   9  s    z(DuckDBLazyFrame.join.<locals>.<listcomp>r   r   lhs.*)rN   NotImplementedErrorrt   	set_aliasr   r   r   r   joinrx   appendr   r   ru   r   )rG   r   r   r   r   r   
native_howrj   relitr   ru   rq   
col_in_lhsresrH   rH   rI   r     sD   	




&
zDuckDBLazyFrame.joinby_leftby_rightstrategyr6   c             	   C  sZ  | j }|j }	g }
|d ur|d ur|
dd t||D  ng  }}|dkr:|
td| dtd| dk n|dkrR|
td| dtd| dk nd}t|tt|
}d	g}|	jD ]5}||jv r|d u su||h|vr|d| d
| | d qc|d u s||h|vr|t	t| qcdd
| d| d}| t|S )Nc                 s  r   r   r   r   rH   rH   rI   r   Y  r   z,DuckDBLazyFrame.join_asof.<locals>.<genexpr>backwardr   r   r   forwardzKOnly 'backward' and 'forward' strategies are currently supported for DuckDBr   z" as "z
            SELECT ,zD
            FROM lhs
            ASOF LEFT JOIN rhs
            ON z
            )rt   r   r   r   r   r   r   r   rx   rr   r   r   rU   sql)rG   r   r   r   r   r   r   r   r   r   
conditionsrj   r   ru   rq   queryrH   rH   rI   	join_asofJ  s:   

((


zDuckDBLazyFrame.join_asofc                 C  s   | j S rA   )r   rO   rH   rH   rI   collect_schemay  rn   zDuckDBLazyFrame.collect_schemasubsetkeepr8   c          
      C  s   |dkr|n|p
| j  }rb| | }r|td| j }tdg | j |}|dkr,|n|}ttd||}ttdt |d|}	| | j	t ||	
t|tdk	t||gdS | | jd	| j S )
Nany   none
row_numbercountrH   r   )exclude, )rx   _check_columns_existr   r   r	   r   r   r   rt   ru   r   r   r   uniquer   )
rG   r  r  subset_erroridx_name
count_namerq   idx_expr
count_exprrH   rH   rI   r  |  s$   zDuckDBLazyFrame.uniqueby
descendingbool | Sequence[bool]
nulls_lastc                G  sX   t |tr|gt| }|rdd t||D }n
dd t||D }| | jj| S )Nc                 s  4    | ]\}}|st | nt |  V  qd S rA   )r   r  descr   rq   r  rH   rH   rI   r     r   z'DuckDBLazyFrame.sort.<locals>.<genexpr>c                 s  r  rA   )r   nulls_firstr  r  rH   rH   rI   r     r   )rT   r>   lenr   r   rt   sort)rG   r  r  r  r   rH   rH   rI   r    s   
zDuckDBLazyFrame.sortkIterable[str]reversec          	      C  s~   | j }t|}t|tr| gt| }ndd |D }ttd||dgt| d}|t|k}d| d}| t	
|S )Nc                 S  s   g | ]}| qS rH   rH   )r   revrH   rH   rI   r     s    z)DuckDBLazyFrame.top_k.<locals>.<listcomp>r  T)order_byr  r  z3
        SELECT *
        FROM _df
        QUALIFY z	
        )rt   r   rT   r>   r  r   r	   r   r   rU   r   )	rG   r  r  r  _dfr  exprr   r   rH   rH   rI   top_k  s    
zDuckDBLazyFrame.top_kc                 C  s8   |d ur|n| j }ttdd |D }| | j|S )Nc                 s  s    | ]	}t | V  qd S rA   )r   	isnotnullr   rH   rH   rI   r     r   z-DuckDBLazyFrame.drop_nulls.<locals>.<genexpr>)rx   r   r   r   rt   r   )rG   r  r  keep_conditionrH   rH   rI   
drop_nulls  s   zDuckDBLazyFrame.drop_nullsc                   s   | j j}|  }D ]}|| }||jkrd| d}t|q
tdkr,d}t|td  | j}| j	} 
 td @ tdk}	||	j fdd|D  }
||	 jfd	d|D  }| |
|S )
Nz-`explode` operation not supported for dtype `z`, expected List typer   zExploding on multiple columns is not supported with DuckDB backend since we cannot guarantee that the exploded columns have matching element counts.r   r  c                 3  s,    | ]}|v rt d  |n|V  qdS )unnestN)r	   r   r   col_to_exploderx   rH   rI   r     s
    
z*DuckDBLazyFrame.explode.<locals>.<genexpr>c                 3  s*    | ]}| v rt d |n|V  qd S rA   )r   r   r   )rx   rH   rI   r     s
    
)rC   dtypesr  Listr   r  r   r   rt   rx   r%  r	   r   r   ru   r   union)rG   rx   r+  r   rq   dtyperj   r   original_columnsnot_null_conditionnon_null_relnull_relrH   r)  rI   explode  s<   



zDuckDBLazyFrame.explodeonindexvariable_name
value_namec           
        s   |d u rg n| |d u r fdd| j D n|}|dkr"d}t||dkr,d}t|ddd |D }| j}d	| d
| d| d}	| t|	jg  || S )Nc                   s   g | ]}| vr|qS rH   rH   )r   cindex_rH   rI   r     s    z+DuckDBLazyFrame.unpivot.<locals>.<listcomp> z:`variable_name` cannot be empty string for duckdb backend.z7`value_name` cannot be empty string for duckdb backend.r
  c                 s  s    | ]	}t t|V  qd S rA   )rr   r   r   rH   rH   rI   r     r   z*DuckDBLazyFrame.unpivot.<locals>.<genexpr>z(
            unpivot rel
            on z(
            into
                name "z"
                value "z"
            )rx   r   r   rt   r   rU   r   ru   )
rG   r4  r5  r6  r7  on_rj   
unpivot_onr   r   rH   r9  rI   unpivot  s(    zDuckDBLazyFrame.unpivot)r      r!  c                 C  sH   |d u r
d}t |ttd|dtd |}| | j|t S )Nz5Cannot pass `order_by` to `with_row_index` for DuckDBr  )r!  r   )		TypeErrorr   r	   r   r   r   rt   ru   r   )rG   rq   r!  rj   r#  rH   rH   rI   with_row_index  s   zDuckDBLazyFrame.with_row_indexfilestr | Path | BytesIOc                 C  s    | j }d| d}t| d S )Nz5
            COPY (SELECT * FROM df)
            TO 'z+'
            (FORMAT parquet)
            )rt   rU   r   )rG   rB  r;   r   rH   rH   rI   sink_parquet  s
   zDuckDBLazyFrame.sink_parquetzO`LazyFrame.gather_every` is deprecated and will be removed in a future version.zG`LazyFrame.tail` is deprecated and will be removed in a future version.)r;   r<   r=   r   r:   r>   r?   r@   )r?   rK   )rQ   rR   r?   rS   )rX   r<   rY   r1   r?   r%   )r^   r   r_   r   r?   r`   )r?   r%   )r?   r"   )r?   r,   )rq   rr   r?   r.   )r?   rw   )rz   r{   r|   r   r?   r'   )r   r   r?   r%   )r   rr   r?   r%   )r   r(   r?   r%   )rx   r   r   r>   r?   r%   rA   )rz   r   r?   r%   )r   r(   r?   r%   )r?   r   )r?   r   )r?   r   )r?   r   )r=   r   r?   r%   )r;   r<   r?   r%   )r   r   r   r>   r?   r*   )r   r   r?   r%   )r   r%   r   r7   r   r   r   r   r   rr   r?   r%   )r   r%   r   rr   r   rr   r   r   r   r   r   r6   r   rr   r?   r%   )r  r   r  r8   r?   r%   )r  rr   r  r  r  r>   r?   r%   )r  r   r  r  r  r  r?   r%   )r  r   r?   r%   )rx   r   r?   r%   )
r4  r   r5  r   r6  rr   r7  rr   r?   r%   )rq   rr   r!  r   r?   r%   )rB  rC  r?   r@   )6__name__
__module____qualname__r   DUCKDBrM   rJ   propertyrN   staticmethodrW   classmethodr]   rh   rk   rl   rm   rp   rv   ry   r   r   r   r   ru   r   rc   r   r   r   rx   r   r   r   r   r   r   r   r   r  r  r  r$  r'  r3  r>  r   backend_versionrA  rD  r   
deprecatedgather_everytailrH   rH   rH   rI   r9   ;   sl    

	






"







	






2
/





*!
		r9   )r(   r<   r`   )R
__future__r   	functoolsr   operatorr   typingr   r   rU   r   narwhals._duckdb.utilsr   r	   r
   r   r   r   r   r   narwhals._sql.dataframer   narwhals._utilsr   r   r   r   r   r   r   r   narwhals.dependenciesr   narwhals.exceptionsr   collections.abcr   r   r   r   ior    pathlibr!   r   r"   pandaspdpyarrowpar#   duckdb.typingr$   typing_extensionsr%   r&   narwhals._compliant.typingr'   narwhals._duckdb.exprr(   r   r*   ro   r,   rs   r.   narwhals._typingr/   r0   r1   narwhals.dataframer2   narwhals.dtypesr3   re   r5   rg   narwhals.typingr6   r7   r8   r9   rH   rH   rH   rI   <module>   sL    (
(

