o
    Mh3                     @   s   d Z ddlZddlmZ ddlm  mZ h dZh dZ						d dd	Z
d!d
dZd"ddZd"ddZd"ddZdd Zd"ddZd#ddZd!ddZd!ddZd!ddZdd ZdS )$zE
Built-in datasets for demonstration, educational and test purposes.
    N)import_module>   cudfmodinpandaspolarspyarrow>   r   r   r   Fr   c                 C   s   t jtd|ddd}|r|t d|k}| r3|t t dt  t 	dgj
jdd}|s;|d	d
}|rN|tddddddddddd
}| S )a  
    Each row represents a country on a given year.

    https://www.gapminder.org/data/

    Parameters
    ----------
    datetimes: bool
        Whether or not 'year' column will converted to datetime type

    centroids: bool
        If True, ['centroid_lat', 'centroid_lon'] columns are added

    year: int | None
        If provided, the dataset will be filtered for that year

    pretty_names: bool
        If True, prettifies the column names

    return_type: {'pandas', 'polars', 'pyarrow', 'modin', 'cudf'}
        Type of the resulting dataframe

    Returns
    -------
    Dataframe of `return_type` type
        Dataframe with 1704 rows and the following columns:
        `['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
        'iso_alpha', 'iso_num']`.

        If `datetimes` is True, the 'year' column will be a datetime column
        If `centroids` is True, two new columns are added: ['centroid_lat', 'centroid_lon']
        If `year` is an integer, the dataset will be filtered for that year
    	gapminderreturn_typeT
eager_onlyyearz-01-01z%Y-%m-%d)formatcentroid_latcentroid_lonCountry	ContinentYearzLife ExpectancyzGDP per Capita
PopulationzISO Alpha Country CodezISO Numeric Country CodezCentroid LatitudezCentroid Longitude)
country	continentr   lifeExp	gdpPercappop	iso_alphaiso_numr   r   )nwfrom_native_get_datasetfiltercolwith_columns
concat_strcastStringlitstrto_datetimedroprenamedict	to_native)	datetimes	centroidsr   pretty_namesr
   df r0   U/var/www/html/Persson_Maskin/env/lib/python3.10/site-packages/plotly/data/__init__.pyr      s>   (r   c                 C   s>   t jtd|ddd}| r|tddddd	d
dd}| S )a
  
    Each row represents a restaurant bill.

    https://vincentarelbundock.github.io/Rdatasets/doc/reshape2/tips.html

    Parameters
    ----------
    pretty_names: bool
        If True, prettifies the column names

    return_type: {'pandas', 'polars', 'pyarrow', 'modin', 'cudf'}
        Type of the resulting dataframe

    Returns
    -------
    Dataframe of `return_type` type
        Dataframe with 244 rows and the following columns:
        `['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']`.
    tipsr	   Tr   z
Total BillTipzPayer GenderzSmokers at TablezDay of WeekMealz
Party Size)
total_billtipsexsmokerdaytimesize)r   r   r   r)   r*   r+   )r.   r
   r/   r0   r0   r1   r2   W   s   r2   c                 C      t d| dS )a  
    Each row represents a flower.

    https://en.wikipedia.org/wiki/Iris_flower_data_set

    Parameters
    ----------
    return_type: {'pandas', 'polars', 'pyarrow', 'modin', 'cudf'}
        Type of the resulting dataframe

    Returns
    -------
    Dataframe of `return_type` type
        Dataframe with 150 rows and the following columns:
        `['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species', 'species_id']`.
    irisr	   r   r	   r0   r0   r1   r=   |   s   r=   c                 C   r<   )a  
    Each row represents a level of wind intensity in a cardinal direction, and its frequency.

    Parameters
    ----------
    return_type: {'pandas', 'polars', 'pyarrow', 'modin', 'cudf'}
        Type of the resulting dataframe

    Returns
    -------
    Dataframe of `return_type` type
        Dataframe with 128 rows and the following columns:
        `['direction', 'strength', 'frequency']`.
    windr	   r>   r	   r0   r0   r1   r?      s   r?   c                 C   r<   )a  
    Each row represents voting results for an electoral district in the 2013 Montreal
    mayoral election.

    Parameters
    ----------
    return_type: {'pandas', 'polars', 'pyarrow', 'modin', 'cudf'}
        Type of the resulting dataframe

    Returns
    -------
    Dataframe of `return_type` type
        Dataframe with 58 rows and the following columns:
        `['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result', 'district_id']`.
    electionr	   r>   r	   r0   r0   r1   r@         r@   c                  C   s   ddl } ddl}ddl}|j|j|jtddd}| |d}||	 
d}W d   |S 1 s9w   Y  |S )a@  
    Each feature represents an electoral district in the 2013 Montreal mayoral election.

    Returns
    -------
        A GeoJSON-formatted `dict` with 58 polygon or multi-polygon features whose `id`
        is an electoral district numerical ID and whose `district` property is the ID and
        district name.
    r   Npackage_datadatasetszelection.geojson.gzrzutf-8)gzipjsonospathjoindirname__file__GzipFileloadsreaddecode)rE   rF   rG   rH   fresultr0   r0   r1   election_geojson   s   

rR   c                 C   r<   )a  
    Each row represents the availability of car-sharing services near the centroid of a zone
    in Montreal over a month-long period.

    Parameters
    ----------
    return_type: {'pandas', 'polars', 'pyarrow', 'modin', 'cudf'}
        Type of the resulting dataframe

    Returns
    -------
    Dataframe of `return_type` type
        Dataframe` with 249 rows and the following columns:
        `['centroid_lat', 'centroid_lon', 'car_hours', 'peak_hour']`.
    carsharer	   r>   r	   r0   r0   r1   rS      rA   rS   c                 C   s   | r|t vrd| d}t|tjtd|dddtdt }|r3|tdj	
 }| rB| d}d|j_|S | S )	ai  
    Each row in this wide dataset represents closing prices from 6 tech stocks in 2018/2019.

    Parameters
    ----------
    indexed: bool
        Whether or not the 'date' column is used as the index and the column index
        is named 'company'. Applicable only if `return_type='pandas'`

    datetimes: bool
        Whether or not the 'date' column will be of datetime type

    return_type: {'pandas', 'polars', 'pyarrow', 'modin', 'cudf'}
        Type of the resulting dataframe

    Returns
    -------
    Dataframe of `return_type` type
        Dataframe with 100 rows and the following columns:
        `['date', 'GOOG', 'AAPL', 'AMZN', 'FB', 'NFLX', 'MSFT']`.
        If `indexed` is True, the 'date' column is used as the index and the column index
        is named 'company'
        If `datetimes` is True, the 'date' column will be a datetime column
    	Backend ' ' does not support setting indexstocksr	   Tr   datecompany)BACKENDS_WITH_INDEX_SUPPORTNotImplementedErrorr   r   r   r!   r    r#   r$   r&   r'   r+   	set_indexcolumnsname)indexedr,   r
   msgr/   r0   r0   r1   rV      s   rV   c                 C   sV   | r|t vrd| d}t|tjtd|ddd}| r'| }d|j_|S | S )a  
    Each row in this wide dataset represents the results of 100 simulated participants
    on three hypothetical experiments, along with their gender and control/treatment group.

    Parameters
    ----------
    indexed: bool
        If True, then the index is named "participant".
        Applicable only if `return_type='pandas'`

    return_type: {'pandas', 'polars', 'pyarrow', 'modin', 'cudf'}
        Type of the resulting dataframe

    Returns
    -------
    Dataframe of `return_type` type
        Dataframe with 100 rows and the following columns:
        `['experiment_1', 'experiment_2', 'experiment_3', 'gender', 'group']`.
        If `indexed` is True, the data frame index is named "participant"
    rT   rU   
experimentr	   Tr   participant)rY   rZ   r   r   r   r+   indexr]   r^   r
   r_   r/   r0   r0   r1   r`     s   r`   c                 C   s\   | r|t vrd| d}t|tjtd|ddd}| r*| d}d|j_|S | S )	a  
    This dataset represents the medal table for Olympic Short Track Speed Skating for the
    top three nations as of 2020.

    Parameters
    ----------
    indexed: bool
        Whether or not the 'nation' column is used as the index and the column index
        is named 'medal'. Applicable only if `return_type='pandas'`

    return_type: {'pandas', 'polars', 'pyarrow', 'modin', 'cudf'}
        Type of the resulting dataframe

    Returns
    -------
    Dataframe of `return_type` type
        Dataframe with 3 rows and the following columns:
        `['nation', 'gold', 'silver', 'bronze']`.
        If `indexed` is True, the 'nation' column is used as the index and the column index
        is named 'medal'
    rT   rU   medalsr	   Tr   nationmedal)	rY   rZ   r   r   r   r+   r[   r\   r]   rc   r0   r0   r1   medals_wide1  s   rg   c                 C   s\   | r|t vrd| d}t|tjtd|dddjdgdd	d
}| r*t|d}| S )an  
    This dataset represents the medal table for Olympic Short Track Speed Skating for the
    top three nations as of 2020.

    Parameters
    ----------
    indexed: bool
        Whether or not the 'nation' column is used as the index.
        Applicable only if `return_type='pandas'`

    return_type: {'pandas', 'polars', 'pyarrow', 'modin', 'cudf'}
        Type of the resulting dataframe

    Returns
    -------
    Dataframe of `return_type` type
        Dataframe with 9 rows and the following columns: `['nation', 'medal', 'count']`.
        If `indexed` is True, the 'nation' column is used as the index.
    rT   rU   rd   r	   Tr   re   countrf   )rb   
value_namevariable_name)rY   rZ   r   r   r   unpivotmaybe_set_indexr+   rc   r0   r0   r1   medals_longV  s   rm   c              
   C   s   t jt jt jtdd| d }|tvr#d| dt }t|z|dkr+d}n	|dkr2d	}n|}t|}W n tyM   d
| d| d}t|w z|	|W S  t
yp } zd|  d| }t
||jd}~ww )aI  
    Loads the dataset using the specified backend.

    Notice that the available backends are 'pandas', 'polars', 'pyarrow' and they all have
    a `read_csv` function (pyarrow has it via pyarrow.csv). Therefore we can dynamically
    load the library using `importlib.import_module` and then call
    `backend.read_csv(filepath)`.

    Parameters
    ----------
    d: str
        Name of the dataset to load.

    return_type: {'pandas', 'polars', 'pyarrow', 'modin', 'cudf'}
        Type of the resulting dataframe

    Returns
    -------
    Dataframe of `return_type` type
    rB   rC   z.csv.gzzUnsupported return_type. Found z, expected one of r   zpyarrow.csvr   zmodin.pandaszreturn_type=z, but z is not installedzUnable to read 'z' dataset due to: N)rG   rH   rI   rJ   rK   AVAILABLE_BACKENDSrZ   r   ModuleNotFoundErrorread_csv	Exceptionwith_traceback__traceback__)dr
   filepathr_   module_to_loadbackender0   r0   r1   r   {  s<   r   )FFNFr   )Fr   )r   )FFr   )__doc__rG   	importlibr   narwhals.stable.v1stablev1r   rn   rY   r   r2   r=   r?   r@   rR   rS   rV   r`   rg   rm   r   r0   r0   r0   r1   <module>   s.    

I
%




,
$
%%