o
    Lhb                     @  s  U d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZ d dlmZ erd dlmZm Z m!Z! d d	l"m#Z# d d
l$m%Z& d dl'm(Z( d dl)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3m4Z4m5Z5 d dl6m7Z7 d dl8m9Z9m:Z:m;Z;m<Z< e
de/dZ=e;Z>de?d< e;Z@de?d< eeeAgef ZBde?d< eAZCde?d< ejDejEejFhZGdZHeIeHejJZKdZLeIeLejJZMdZNeIeNejJZOdZPeIePejJZQe	d ZRde?d< d d!d"ZSd#d$d%d&d'd(d)d*d+d,d-
ZTd.e?d/< ejDU ZV	 dd4d5ZWdd;d<ZXddAdBZYddEdFZZej[dGdHddMdNZ\ddPdQZ]	dddTdUZ^ddWdXZ_ddYdZZ`ejDU d[krZdd^d_Zandd`d_ZaddbdcZbddgdhZcej[dGdHddjdkZdejejfZfefjgdlefjhdmiZidne?do< efjjdpdqdrdsefjkdtdudvdsefjldwdxdydsefjmdzd{d|dsefjnd}d~ddsefjoddddsefjpddddsefjqddddsefjrddddsefjsddddsefjtddeudsefjvddd3dsiZwde?d< efjxfZydddZzdddZ{dddZ|ej}dfej}dfej~efej}dfej~efej~dfej~efej~efej~efd	Zde?d< dddZeeeddZde?d< dddZdddZdddZdddZG dd ded7ef ZdS )    )annotationsN)TYPE_CHECKINGAnyCallableLiteralTypeVar)EagerSeriesNamespace)MS_PER_SECONDNS_PER_MICROSECONDNS_PER_MILLISECONDNS_PER_SECONDSECONDS_PER_DAYUS_PER_SECOND)ImplementationVersion_DeferredIterablecheck_columns_existisinstance_or_issubclass)
ShapeError)IterableIteratorMapping)
ModuleType)DtypeBaseMaskedDtype)	TypeAliasTypeIs)IntervalUnit)PandasLikeExprPandasLikeSeries)NativeDataFrameTNativeNDFrameTNativeSeriesT)DType)DTypeBackend	IntoDTypeTimeUnit_1DArrayExprT)boundr   UnitCurrent
UnitTargetBinOpBroadcastIntoRhsa  ^
    datetime64\[
        (?P<time_unit>s|ms|us|ns)                 # Match time unit: s, ms, us, or ns
        (?:,                                      # Begin non-capturing group for optional timezone
            \s*                                   # Optional whitespace after comma
            (?P<time_zone>                        # Start named group for timezone
                [a-zA-Z\/]+                       # Match timezone name, e.g., UTC, America/New_York
                (?:[+-]\d{2}:\d{2})?              # Optional offset in format +HH:MM or -HH:MM
                |                                 # OR
                pytz\.FixedOffset\(\d+\)          # Match pytz.FixedOffset with integer offset in parentheses
            )                                     # End time_zone group
        )?                                        # End optional timezone group
    \]                                            # Closing bracket for datetime64
$a  ^
    timestamp\[
        (?P<time_unit>s|ms|us|ns)                 # Match time unit: s, ms, us, or ns
        (?:,                                      # Begin non-capturing group for optional timezone
            \s?tz=                                # Match "tz=" prefix
            (?P<time_zone>                        # Start named group for timezone
                [a-zA-Z\/]*                       # Match timezone name (e.g., UTC, America/New_York)
                (?:                               # Begin optional non-capturing group for offset
                    [+-]\d{2}:\d{2}               # Match offset in format +HH:MM or -HH:MM
                )?                                # End optional offset group
            )                                     # End time_zone group
        )?                                        # End optional timezone group
    \]                                            # Closing bracket for timestamp
    \[pyarrow\]                                   # Literal string "[pyarrow]"
$z^
    timedelta64\[
        (?P<time_unit>s|ms|us|ns)                 # Match time unit: s, ms, us, or ns
    \]                                            # Closing bracket for timedelta64
$a  ^
    duration\[
        (?P<time_unit>s|ms|us|ns)                 # Match time unit: s, ms, us, or ns
    \]                                            # Closing bracket for duration
    \[pyarrow\]                                   # Literal string "[pyarrow]"
$)yearquartermonthweekdayhourminutesecondmillisecondmicrosecond
nanosecondNativeIntervalUnitDmin)dmr0   r1   r2   r4   r5   r6   r7   r8   r9   r:   )
yqmor>   hr?   smsusnsz)Mapping[IntervalUnit, NativeIntervalUnit]
UNITS_DICTimplementationr   returnboolc                 C  s   | t jt jhv S N)r   PANDASMODINrI    rP   \/var/www/html/Persson_Maskin/env/lib/python3.10/site-packages/narwhals/_pandas_like/utils.pyis_pandas_or_modin      rR   lhsr!   rhsPandasLikeSeries | object7tuple[pd.Series[Any] | object, pd.Series[Any] | object]c                 C  s   ddl m} | jj}| jrt||r|js| jjd |jfS t||rG|jr/| j|jjd fS |jj|urA| jt|j||jdfS | j|jfS t|t	rRd}t
|| j|fS )zValidate RHS of binary operation.

    If the comparison isn't supported, return `NotImplemented` so that the
    "right-hand-side" operation (e.g. `__radd__`) can be tried.
    r   r    rO   z$Expected Series or scalar, got list.)narwhals._pandas_like.seriesr!   nativeindex
_broadcast
isinstanceiloc	set_index_implementationlist	TypeError)rT   rU   r!   	lhs_indexmsgrP   rP   rQ   align_and_extract_native   s    


rd   objr#   rZ   r   c                C  s   t || jr t| }t|  }kr d| d| }t||tju r0| jdd} || _| S |tj	u rKd|
   krAdk rKn n| j|dddS | j|dd	S )
z}Wrapper around pandas' set_axis to set object index.

    We can set `copy` / `inplace` based on implementation/version.
    zExpected object of length z, got length: F)deep         r   )axiscopy)rl   )r\   to_native_namespaceIndexlenr   r   CUDFrm   rZ   rM   _backend_versionset_axis)re   rZ   rI   expected_len
actual_lenrc   rP   rP   rQ   r^      s   



r^   argskwargsc                O  sH   |t ju r| dkr| j|i |ddiS | j|i |dddS )zXWrapper around pandas' rename so that we can set `copy` based on implementation/version.rj   inplaceF)rm   rx   )r   rM   rr   rename)re   rI   rv   rw   rP   rP   rQ   ry      s   
ry      )maxsizenative_dtypeversionr   r%   c                 C  s  t | }|j}|dv r| S |dv r| S |dv r| S |dv r'| S |dv r/| S |dv r7| S |dv r?| S |dv rG|	 S |d	v rO|
 S |d
v rW| S |dv r_| S |dv rg| S |drp| S |dkryt| |S t| }st| }r|d}|d}|||S t| }st| }r|d}||S |dkr| S |dr|dr| S |dr|dr| S |dr|dr| S | S )N>   int64[pyarrow]Int64int64Int64[pyarrow]>   int32[pyarrow]Int32int32Int32[pyarrow]>   int16[pyarrow]Int16int16Int16[pyarrow]>   int8[pyarrow]Int8int8Int8[pyarrow]>   uint64[pyarrow]UInt64uint64UInt64[pyarrow]>   uint32[pyarrow]UInt32uint32UInt32[pyarrow]>   uint16[pyarrow]UInt16uint16UInt16[pyarrow]>   uint8[pyarrow]UInt8uint8UInt8[pyarrow]>   double[pyarrow]float64[pyarrow]Float64float64Float64[pyarrow]>   float[pyarrow]float32[pyarrow]Float32float32Float32[pyarrow]>   string[python]large_string[pyarrow]string[pyarrow_numpy]strstringstring[pyarrow]>   bool[pyarrow]rK   booleanboolean[pyarrow]zdictionary<category	time_unit	time_zonezdate32[day][pyarrow]decimalz	[pyarrow]timebinary)r   dtypesr   r   r   r   r   r   r   r   r   r   StringBoolean
startswithCategorical$native_categorical_to_narwhals_dtypePATTERN_PD_DATETIMEmatchPATTERN_PA_DATETIMEgroupDatetimePATTERN_PD_DURATIONPATTERN_PA_DURATIONDurationDateendswithDecimalTimeBinaryUnknown)r|   r}   dtyper   match_dt_time_unitdt_time_zonedu_time_unitrP   rP   rQ   #non_object_native_to_narwhals_dtype   sd   	





r   seriesc                 C  sr   |j }|tju r| S tjjj| ddd}|dkr | S |dkr-|t	j
ur-| S |dkr5| S | S )Nd   T)skipnar   empty)r   r   rq   r   pdapitypesinfer_dtypeheadr   V1Object)r   r}   rI   r   inferred_dtyperP   rP   rQ   object_native_to_narwhals_dtype  s   
r   pd.CategoricalDtype#Literal[Implementation.CUDF] | Nonec                 C  sN   |j }|tju r| S | jr#|tju rt| n| jj	}|
t|S | S rL   )r   r   r   r   orderedr   rq   _cudf_categorical_to_list
categoriesto_listEnumr   )r|   r}   rI   r   	into_iterrP   rP   rQ   r   2  s   

r   Callable[[], list[Any]]c                   s   d fdd}|S )NrJ   	list[Any]c                     s    j   S rL   )r   to_arrow	to_pylistrP   r|   rP   rQ   fnH  s   z%_cudf_categorical_to_list.<locals>.fn)rJ   r   rP   )r|   r   rP   r   rQ   r   D  s   r   c                 C  s   t | }|dr!ddlm} t| dr||  |S || j|S |dkr0| r0t| |t	j
S |dkr9t| |S |t	ju rC|j S d}t|)N)
large_listr`   structfixed_size_listr   )native_to_narwhals_dtyper   r   objectz;Unreachable code, object dtype should be handled separately)r   r   narwhals._arrow.utilsr   hasattrr   pyarrow_dtypeis_cudfr   r   rq   r   DASKr   r   AssertionError)r|   r}   rI   	str_dtypearrow_native_to_narwhals_dtyperc   rP   rP   rQ   r   N  s"   




r   )rh      r   TypeIs[BaseMaskedDtype]c                 C  s&   t  }t| tjjjot| d|du S )z/Return `True` if `dtype` is `"numpy_nullable"`.baseN)r   r\   r   r   
extensionsExtensionDtypegetattr)r   sentinelrP   rP   rQ   is_dtype_numpy_nullablep  s   r   c                 C  s*   t | tjjjrddlm} t | |S dS )Nr   r   F)r\   r   r   r   r   pandas.core.arrays.maskedr   )r   OldBaseMaskedDtyperP   rP   rQ   r   {  s   
r&   c                 C  s*   |t ju rdS t| rdS t| rdS dS )zjGet dtype backend for pandas type.

    Matches pandas' `dtype_backend` argument in `convert_dtypes`.
    Npyarrownumpy_nullable)r   rq   is_dtype_pyarrowr   )r   rI   rP   rP   rQ   get_dtype_backend  s
   
r  r   Iterable[Any]Iterator[DTypeBackend]c                   s    fdd| D S )ziYield a `DTypeBackend` per-dtype.

    Matches pandas' `dtype_backend` argument in `convert_dtypes`.
    c                 3  s    | ]}t | V  qd S rL   )r  ).0r   rO   rP   rQ   	<genexpr>  s    z&iter_dtype_backends.<locals>.<genexpr>rP   )r   rI   rP   rO   rQ   iter_dtype_backends  s   r  TypeIs[pd.ArrowDtype]c                 C  s   t tdo
t| tjS )N
ArrowDtype)r   r   r\   r  r   rP   rP   rQ   r    s   r  r   r   zMapping[type[DType], str]NW_TO_PD_DTYPES_INVARIANTr   r   r   )r  r  Nr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   z<Mapping[type[DType], Mapping[DTypeBackend, str | type[Any]]]NW_TO_PD_DTYPES_BACKENDr'   dtype_backendstr | PandasDtypec              
   C  s  |dvrd| d}t ||j}|  }t| }r|S t| }r)|| S t| |jrit|r:t	dk r:d}	n| j
}	|dkrU| j }
rKd|
 nd}d	|	 | d
S | j }
r_d|
 nd}d|	 | dS t| |jrt|rzt	dk rzd}n| j
}|dkrd| d
S d| dS t| |jrzdd l}W dS  ty } zd}t||d }~ww t| |jr|tju rd}t|t| |jr| }|j| jddS d}t |t||j|j|j|j|jfrt| ||S t|trd|j  d| d}t|d|  }t!|)N>   Nr  r  z;Expected one of {None, 'pyarrow', 'numpy_nullable'}, got: '')r   rG   r  z, tz= z
timestamp[z
][pyarrow]z, zdatetime64[]z	duration[ztimedelta64[r   z/'pyarrow>=13.0.0' is required for `Date` dtype.zdate32[pyarrow]z9Converting to Enum is not supported in narwhals.stable.v1T)r   z9Can not cast / initialize Enum without categories presentConverting to z dtype is not supported for .zUnknown dtype: )"
ValueErrorr   	base_typer  getr  r   r   rR   PANDAS_VERSIONr   r   r   r   r  ModuleNotFoundErrorr   r   r   NotImplementedErrorr\   rn   CategoricalDtyper   
issubclassStructArrayListr   r   narwhals_to_native_arrow_dtypeUNSUPPORTED_DTYPES__name__r   )r   r  rI   r}   rc   r   r  pd_typeinto_pd_typer   tztz_partr   paexcrG   rP   rP   rQ   narwhals_to_native_dtype  sl   






r,  pd.ArrowDtypec              
   C  s   t |r7tdkr7zdd l}W n ty( } zd|  d|j }t||d }~ww ddlm} t|| |S d|  d| d| d	}t	|)
N)r   r   r   zUnable to convert to z  to to the following exception: )r,  r  z+ dtype is not supported for implementation z and version r  )
rR   r  r  ImportErrorrc   r   r,  r   r  r  )r   rI   r}   r*  r+  rc   _to_arrow_dtyperP   rP   rQ   r#    s"   
r#  r   c                 C  s,   dt | v rdS t |  t | krdS dS )Nr  r   r   r   )r   lowerr  rP   rP   rQ   int_dtype_mapper/  s
   r1  i  i@B )	)rG   rF   )rG   rE   )rF   rG   )rF   rE   )rE   rG   )rE   rF   )rD   rG   )rD   rF   )rD   rE   zGMapping[tuple[UnitCurrent, UnitTarget], tuple[BinOpBroadcast, IntoRhs]]_TIMESTAMP_DATETIME_OP_FACTORrD   r$   currentr(   r   c                 C  sD   ||kr| S t ||f }r|\}}|| |S d| d}t|)Nzunexpected time unit zD, please report an issue at https://github.com/narwhals-dev/narwhals)r2  r  r   )rD   r3  r   itemr   factorrc   rP   rP   rQ   calculate_timestamp_datetimeF  s   

r6  rh   )rG   rF   rE   rD   zMapping[TimeUnit, int]_TIMESTAMP_DATE_FACTORc                 C  s   | t  t|  S rL   )r   r7  )rD   r   rP   rP   rQ   calculate_timestamp_date]  rS   r8  dfr"   column_nameslist[str] | _1DArrayNativeDataFrameT | Anyc              
   C  s   t || jd kr| j|k r| S | jjjdks$|tju r:| dk r:t	|| j
 d }r1|| jdd|f S z| | W S  tyZ } zt	|| j
 d }rU|| d}~ww )zsSelect columns by name.

    Prefer this over `df.loc[:, column_names]` as it's
    generally more performant.
    rh   brg   )	availableN)rp   shapecolumnsallr   kindr   rM   rr   r   tolistlocKeyError)r9  r:  rI   errorerP   rP   rQ   select_columns_by_namea  s     


rH  c                 C  s"   | j tjtjtjhv o| jjdkS )NrK   )r_   r   rM   rN   r   rY   r   )rD   rP   rP   rQ   is_non_nullable_boolean~  s
   
rI  r   c                C  sD   | t jt jhv rddl}|S | t ju rddl}|S d|  }t|)zCReturns numpy or cupy module depending on the given implementation.r   Nz!Expected pandas/modin/cudf, got: )r   rM   rN   numpyrq   cupyr   )rI   npcprc   rP   rP   rQ   import_array_module  s   

rN  c                   @  s   e Zd ZdS )PandasLikeSeriesNamespaceN)r%  
__module____qualname__rP   rP   rP   rQ   rO    s    rO  )rI   r   rJ   rK   )rT   r!   rU   rV   rJ   rW   )re   r#   rZ   r   rI   r   rJ   r#   )
re   r#   rv   r   rI   r   rw   r   rJ   r#   )r|   r   r}   r   rJ   r%   )r   r!   r}   r   rI   r   rJ   r%   rL   )r|   r   r}   r   rI   r   rJ   r%   )r|   r   rJ   r   )r|   r   r}   r   rI   r   rJ   r%   )r   r   rJ   r   )r   r   rI   r   rJ   r&   )r   r  rI   r   rJ   r  )r   r   rJ   r  )
r   r'   r  r&   rI   r   r}   r   rJ   r  )r   r'   rI   r   r}   r   rJ   r-  )r   r   rJ   r   )rD   r$   r3  r(   r   r(   rJ   r$   )rD   r$   r   r(   rJ   r$   )r9  r"   r:  r;  rI   r   rJ   r<  )rD   r!   rJ   rK   )rI   r   rJ   r   )
__future__r   	functoolsoperatorretypingr   r   r   r   r   pandasr   narwhals._compliantr   narwhals._constantsr	   r
   r   r   r   r   narwhals._utilsr   r   r   r   r   narwhals.exceptionsr   collections.abcr   r   r   r   r   pandas._typingr   PandasDtypepandas.core.dtypes.dtypesr   typing_extensionsr   r   narwhals._durationr   narwhals._pandas_like.exprr   rX   r!   narwhals._pandas_like.typingr"   r#   r$   narwhals.dtypesr%   narwhals.typingr&   r'   r(   r)   r*   r,   __annotations__r-   intr.   r/   rM   rq   rN   PANDAS_LIKE_IMPLEMENTATIONPD_DATETIME_RGXcompileVERBOSEr   PA_DATETIME_RGXr   PD_DURATION_RGXr   PA_DURATION_RGXr   r;   
ALIAS_DICTrH   rr   r  rR   rd   r^   ry   	lru_cacher   r   r   r   r   r   r  r  r  MAINr   r   r   r  r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r$  r,  r#  r1  floordivmulr2  r6  r7  r8  rH  rI  rN  rO  rP   rP   rP   rQ   <module>   s    




 


J


 




&

E






	