import pandas as pd
pd.set_option('max_rows', 5)
import numpy as np
reviews = pd.read_csv("../input/wine-reviews/winemag-data-130k-v2.csv", index_col=0)
reviews
|
country |
description |
designation |
points |
price |
province |
region_1 |
region_2 |
taster_name |
taster_twitter_handle |
title |
variety |
winery |
|
|
0 |
Italy |
Aromas include tropical fruit, broom, brimston... |
Vulkà Bianco |
87 |
NaN |
Sicily & Sardinia |
Etna |
NaN |
Kerin O’Keefe |
@kerinokeefe |
Nicosia 2013 Vulkà Bianco (Etna) |
White Blend |
Nicosia |
|
1 |
Portugal |
This is ripe and fruity, a wine that is smooth... |
Avidagos |
87 |
15.0 |
Douro |
NaN |
NaN |
Roger Voss |
@vossroger |
Quinta dos Avidagos 2011 Avidagos Red (Douro) |
Portuguese Red |
Quinta dos Avidagos |
|
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
|
129969 |
France |
A dry style of Pinot Gris, this is crisp with ... |
NaN |
90 |
32.0 |
Alsace |
Alsace |
NaN |
Roger Voss |
@vossroger |
Domaine Marcel Deiss 2012 Pinot Gris (Alsace) |
Pinot Gris |
Domaine Marcel Deiss |
|
129970 |
France |
Big, rich and off-dry, this is powered by inte... |
Lieu-dit Harth Cuvée Caroline |
90 |
21.0 |
Alsace |
Alsace |
NaN |
Roger Voss |
@vossroger |
Domaine Schoffit 2012 Lieu-dit Harth Cuvée Car... |
Gewürztraminer |
Domaine Schoffit |
reviews.points.describe()
count 129971.000000
mean 88.447138
...
75% 91.000000
max 100.000000
Name: points, Length: 8, dtype: float64
reviews.taster_name.describe()
count 103727
unique 19
top Roger Voss
freq 25514
Name: taster_name, dtype: object
reviews.points.mean()
88.44713820775404
reviews.taster_name.unique()
array(['Kerin O’Keefe', 'Roger Voss', 'Paul Gregutt',
'Alexander Peartree', 'Michael Schachner', 'Anna Lee C. Iijima',
'Virginie Boone', 'Matt Kettmann', nan, 'Sean P. Sullivan',
'Jim Gordon', 'Joe Czerwinski', 'Anne Krebiehl\xa0MW',
'Lauren Buzzeo', 'Mike DeSimone', 'Jeff Jenssen',
'Susan Kostrzewa', 'Carrie Dykes', 'Fiona Adams',
'Christina Pickard'], dtype=object)
Um eine Liste aus Einzelartige Werten zu sehen, wo steht wie oft sie im Dataset vorkommen,
reviews.taster_name.value_counts()
Roger Voss 25514
Michael Schachner 15134
...
Fiona Adams 27
Christina Pickard 6
Name: taster_name, Length: 19, dtype: int64
review_points_mean = reviews.points.mean()
reviews.points.map(lambda p: p - review_points_mean)
0 -1.447138
1 -1.447138
...
129969 1.552862
129970 1.552862
Name: points, Length: 129971, dtype: float64
def remean_points(row):
row.points = row.points - review_points_mean
return row
reviews.apply(remean_points, axis='columns')
|
country |
description |
designation |
points |
price |
province |
region_1 |
region_2 |
taster_name |
taster_twitter_handle |
title |
variety |
winery |
|
|
0 |
Italy |
Aromas include tropical fruit, broom, brimston... |
Vulkà Bianco |
-1.447138 |
NaN |
Sicily & Sardinia |
Etna |
NaN |
Kerin O’Keefe |
@kerinokeefe |
Nicosia 2013 Vulkà Bianco (Etna) |
White Blend |
Nicosia |
|
1 |
Portugal |
This is ripe and fruity, a wine that is smooth... |
Avidagos |
-1.447138 |
15.0 |
Douro |
NaN |
NaN |
Roger Voss |
@vossroger |
Quinta dos Avidagos 2011 Avidagos Red (Douro) |
Portuguese Red |
Quinta dos Avidagos |
|
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
|
129969 |
France |
A dry style of Pinot Gris, this is crisp with ... |
NaN |
1.552862 |
32.0 |
Alsace |
Alsace |
NaN |
Roger Voss |
@vossroger |
Domaine Marcel Deiss 2012 Pinot Gris (Alsace) |
Pinot Gris |
Domaine Marcel Deiss |
|
129970 |
France |
Big, rich and off-dry, this is powered by inte... |
Lieu-dit Harth Cuvée Caroline |
1.552862 |
21.0 |
Alsace |
Alsace |
NaN |
Roger Voss |
@vossroger |
Domaine Schoffit 2012 Lieu-dit Harth Cuvée Car... |
Gewürztraminer |
Domaine Schoffit |
reviews.head(1)
|
country |
description |
designation |
points |
price |
province |
region_1 |
region_2 |
taster_name |
taster_twitter_handle |
title |
variety |
winery |
|
|
0 |
Italy |
Aromas include tropical fruit, broom, brimston... |
Vulkà Bianco |
87 |
NaN |
Sicily & Sardinia |
Etna |
NaN |
Kerin O’Keefe |
@kerinokeefe |
Nicosia 2013 Vulkà Bianco (Etna) |
White Blend |
Nicosia |
review_points_mean = reviews.points.mean()
reviews.points - review_points_mean
0 -1.447138
1 -1.447138
...
129969 1.552862
129970 1.552862
Name: points, Length: 129971, dtype: float64
reviews.country + " - " + reviews.region_1
0 Italy - Etna
1 NaN
...
129969 France - Alsace
129970 France - Alsace
Length: 129971, dtype: object