import pandas as pd
reviews = pd.read_csv("../input/wine-reviews/winemag-data-130k-v2.csv", index_col=0)
pd.set_option("display.max_rows", 5)
reviews.groupby('points').points.count()
points
80 397
81 692
...
99 33
100 19
Name: points, Length: 21, dtype: int64
reviews.groupby('points').price.min()
points
80 5.0
81 5.0
...
99 44.0
100 80.0
Name: price, Length: 21, dtype: float64
reviews.groupby('winery').apply(lambda df: df.title.iloc[0])
winery
1+1=3 1+1=3 NV Rosé Sparkling (Cava)
10 Knots 10 Knots 2010 Viognier (Paso Robles)
...
àMaurice àMaurice 2013 Fred Estate Syrah (Walla Walla V...
Štoka Štoka 2009 Izbrani Teran (Kras)
Length: 16757, dtype: object
reviews.groupby(['country', 'province']).apply(lambda df: df.loc[df.points.idxmax()])
|
country |
description |
designation |
points |
price |
province |
region_1 |
region_2 |
taster_name |
taster_twitter_handle |
title |
variety |
winery |
||
|
country |
province |
|||||||||||||
|
Argentina |
Mendoza Province |
Argentina |
If the color doesn't tell the full story, the ... |
Nicasia Vineyard |
97 |
120.0 |
Mendoza Province |
Mendoza |
NaN |
Michael Schachner |
@wineschach |
Bodega Catena Zapata 2006 Nicasia Vineyard Mal... |
Malbec |
Bodega Catena Zapata |
|
Other |
Argentina |
Take note, this could be the best wine Colomé ... |
Reserva |
95 |
90.0 |
Other |
Salta |
NaN |
Michael Schachner |
@wineschach |
Colomé 2010 Reserva Malbec (Salta) |
Malbec |
Colomé |
|
|
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
|
Uruguay |
San Jose |
Uruguay |
Baked, sweet, heavy aromas turn earthy with ti... |
El Preciado Gran Reserva |
87 |
50.0 |
San Jose |
NaN |
NaN |
Michael Schachner |
@wineschach |
Castillo Viejo 2005 El Preciado Gran Reserva R... |
Red Blend |
Castillo Viejo |
|
Uruguay |
Uruguay |
Cherry and berry aromas are ripe, healthy and ... |
Blend 002 Limited Edition |
91 |
22.0 |
Uruguay |
NaN |
NaN |
Michael Schachner |
@wineschach |
Narbona NV Blend 002 Limited Edition Tannat-Ca... |
Tannat-Cabernet Franc |
Narbona |
reviews.groupby(['country']).price.agg([len, min, max])
|
len |
min |
max |
|
|
country |
|||
|
Argentina |
3800 |
4.0 |
230.0 |
|
Armenia |
2 |
14.0 |
15.0 |
|
... |
... |
... |
... |
|
Ukraine |
14 |
6.0 |
13.0 |
|
Uruguay |
109 |
10.0 |
130.0 |
countries_reviewed = reviews.groupby(['country', 'province']).description.agg([len])
countries_reviewed
|
len |
||
|
country |
province |
|
|
Argentina |
Mendoza Province |
3264 |
|
Other |
536 |
|
|
... |
... |
... |
|
Uruguay |
San Jose |
3 |
|
Uruguay |
24 |
mi = countries_reviewed.index
type(mi)
pandas.core.indexes.multi.MultiIndex
countries_reviewed.reset_index()
|
country |
province |
len |
|
|
0 |
Argentina |
Mendoza Province |
3264 |
|
1 |
Argentina |
Other |
536 |
|
... |
... |
... |
... |
|
423 |
Uruguay |
San Jose |
3 |
|
424 |
Uruguay |
Uruguay |
24 |
countries_reviewed = countries_reviewed.reset_index()
countries_reviewed.sort_values(by='len')
|
country |
province |
len |
|
|
179 |
Greece |
Muscat of Kefallonian |
1 |
|
192 |
Greece |
Sterea Ellada |
1 |
|
... |
... |
... |
... |
|
415 |
US |
Washington |
8639 |
|
392 |
US |
California |
36247 |
countries_reviewed.sort_values(by='len', ascending=False)
|
country |
province |
len |
|
|
392 |
US |
California |
36247 |
|
415 |
US |
Washington |
8639 |
|
... |
... |
... |
... |
|
63 |
Chile |
Coelemu |
1 |
|
149 |
Greece |
Beotia |
1 |
countries_reviewed.sort_index()
|
country |
province |
len |
|
|
0 |
Argentina |
Mendoza Province |
3264 |
|
1 |
Argentina |
Other |
536 |
|
... |
... |
... |
... |
|
423 |
Uruguay |
San Jose |
3 |
|
424 |
Uruguay |
Uruguay |
24 |
countries_reviewed.sort_values(by=['country', 'len'])
|
country |
province |
len |
|
|
1 |
Argentina |
Other |
536 |
|
0 |
Argentina |
Mendoza Province |
3264 |
|
... |
... |
... |
... |
|
424 |
Uruguay |
Uruguay |
24 |
|
419 |
Uruguay |
Canelones |
43 |