x=0
import pandas as pd
df = pd.read_csv('diamonds.csv')
type(df)
pandas.core.frame.DataFrame
# df.head()
df.head(10)
# | carat | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 |
1 | 2 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 |
2 | 3 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 |
3 | 4 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 |
4 | 5 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 |
5 | 6 | 0.24 | Very Good | J | VVS2 | 62.8 | 57.0 | 336 | 3.94 | 3.96 | 2.48 |
6 | 7 | 0.24 | Very Good | I | VVS1 | 62.3 | 57.0 | 336 | 3.95 | 3.98 | 2.47 |
7 | 8 | 0.26 | Very Good | H | SI1 | 61.9 | 55.0 | 337 | 4.07 | 4.11 | 2.53 |
8 | 9 | 0.22 | Fair | E | VS2 | 65.1 | 61.0 | 337 | 3.87 | 3.78 | 2.49 |
9 | 10 | 0.23 | Very Good | H | VS1 | 59.4 | 61.0 | 338 | 4.00 | 4.05 | 2.39 |
df.tail()
# | carat | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|---|
1106 | 1107 | 0.91 | Premium | I | SI2 | 62.0 | 59.0 | 2913 | 6.18 | 6.23 | 3.85 |
1107 | 1108 | 0.83 | Premium | E | SI1 | 62.2 | 59.0 | 2913 | 6.05 | 5.97 | 3.74 |
1108 | 1109 | 0.85 | Ideal | G | SI2 | 62.0 | 57.0 | 2913 | 6.10 | 6.02 | 3.76 |
1109 | 1110 | 0.80 | Very Good | F | SI1 | 63.5 | 55.0 | 2914 | 5.86 | 5.89 | 3.73 |
1110 | 1111 | 0.73 | Ideal | E | SI1 | 61.4 | 58.0 | 2914 | 5.76 | 5.80 | 3.55 |
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1111 entries, 0 to 1110
Data columns (total 11 columns):
# 1111 non-null int64
carat 1111 non-null float64
cut 1111 non-null object
color 1111 non-null object
clarity 1111 non-null object
depth 1111 non-null float64
table 1111 non-null float64
price 1111 non-null int64
x 1111 non-null float64
y 1111 non-null float64
z 1111 non-null float64
dtypes: float64(6), int64(2), object(3)
memory usage: 82.5+ KB
df.describe()
# | carat | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|
count | 1111.00000 | 1111.000000 | 1111.000000 | 1111.000000 | 1111.000000 | 1111.000000 | 1111.000000 | 1111.000000 |
mean | 556.00000 | 0.684086 | 61.735914 | 57.708731 | 2456.058506 | 5.588587 | 5.581683 | 3.447642 |
std | 320.86238 | 0.198297 | 1.731228 | 2.430179 | 864.254630 | 0.634599 | 0.623776 | 0.396959 |
min | 1.00000 | 0.200000 | 53.000000 | 52.000000 | 326.000000 | 3.790000 | 3.750000 | 2.270000 |
25% | 278.50000 | 0.700000 | 60.900000 | 56.000000 | 2777.000000 | 5.620000 | 5.610000 | 3.440000 |
50% | 556.00000 | 0.710000 | 61.800000 | 57.000000 | 2822.000000 | 5.760000 | 5.760000 | 3.550000 |
75% | 833.50000 | 0.790000 | 62.600000 | 59.000000 | 2862.500000 | 5.910000 | 5.910000 | 3.640000 |
max | 1111.00000 | 1.270000 | 69.500000 | 70.000000 | 2914.000000 | 7.120000 | 7.050000 | 4.330000 |
df['carat'].head()
0 0.23
1 0.21
2 0.23
3 0.29
4 0.31
Name: carat, dtype: float64
df[['carat', 'price']].head()
# df[ ['carat', 'price'] ].head()
carat | price | |
---|---|---|
0 | 0.23 | 326 |
1 | 0.21 | 326 |
2 | 0.23 | 327 |
3 | 0.29 | 334 |
4 | 0.31 | 335 |
df.iloc[0]
# 1
carat 0.23
cut Ideal
color E
clarity SI2
depth 61.5
table 55
price 326
x 3.95
y 3.98
z 2.43
Name: 0, dtype: object
# df.iloc[0]['carat']
df.iloc[0][['carat', 'price']]
carat 0.23
price 326
Name: 0, dtype: object
df.iloc[:10]
# | carat | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 |
1 | 2 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 |
2 | 3 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 |
3 | 4 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 |
4 | 5 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 |
5 | 6 | 0.24 | Very Good | J | VVS2 | 62.8 | 57.0 | 336 | 3.94 | 3.96 | 2.48 |
6 | 7 | 0.24 | Very Good | I | VVS1 | 62.3 | 57.0 | 336 | 3.95 | 3.98 | 2.47 |
7 | 8 | 0.26 | Very Good | H | SI1 | 61.9 | 55.0 | 337 | 4.07 | 4.11 | 2.53 |
8 | 9 | 0.22 | Fair | E | VS2 | 65.1 | 61.0 | 337 | 3.87 | 3.78 | 2.49 |
9 | 10 | 0.23 | Very Good | H | VS1 | 59.4 | 61.0 | 338 | 4.00 | 4.05 | 2.39 |
df.iloc[ [0, 2, 1, 0] ]
# | carat | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 |
2 | 3 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 |
1 | 2 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 |
0 | 1 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 |
row0 = df.iloc[0]
row0
# 1
carat 0.23
cut Ideal
color E
clarity SI2
depth 61.5
table 55
price 326
x 3.95
y 3.98
z 2.43
Name: 0, dtype: object
row0['price']
326
row0.iloc[1]
0.23
row0.loc['price']
326
carats = df.set_index('carat')
carats.head()
# | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|
carat | ||||||||||
0.23 | 1 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 |
0.21 | 2 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 |
0.23 | 3 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 |
0.29 | 4 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 |
0.31 | 5 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 |
carats.loc[0.21]
# 2
cut Premium
color E
clarity SI1
depth 59.8
table 61
price 326
x 3.89
y 3.84
z 2.31
Name: 0.21, dtype: object
carats.info()
<class 'pandas.core.frame.DataFrame'>
Float64Index: 1111 entries, 0.23 to 0.73
Data columns (total 10 columns):
# 1111 non-null int64
cut 1111 non-null object
color 1111 non-null object
clarity 1111 non-null object
depth 1111 non-null float64
table 1111 non-null float64
price 1111 non-null int64
x 1111 non-null float64
y 1111 non-null float64
z 1111 non-null float64
dtypes: float64(5), int64(2), object(3)
memory usage: 114.5+ KB
carats.reset_index()
carat | # | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.23 | 1 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 |
1 | 0.21 | 2 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 |
2 | 0.23 | 3 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 |
3 | 0.29 | 4 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 |
4 | 0.31 | 5 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1106 | 0.91 | 1107 | Premium | I | SI2 | 62.0 | 59.0 | 2913 | 6.18 | 6.23 | 3.85 |
1107 | 0.83 | 1108 | Premium | E | SI1 | 62.2 | 59.0 | 2913 | 6.05 | 5.97 | 3.74 |
1108 | 0.85 | 1109 | Ideal | G | SI2 | 62.0 | 57.0 | 2913 | 6.10 | 6.02 | 3.76 |
1109 | 0.80 | 1110 | Very Good | F | SI1 | 63.5 | 55.0 | 2914 | 5.86 | 5.89 | 3.73 |
1110 | 0.73 | 1111 | Ideal | E | SI1 | 61.4 | 58.0 | 2914 | 5.76 | 5.80 | 3.55 |
1111 rows × 11 columns
df.head()
# | carat | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 |
1 | 2 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 |
2 | 3 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 |
3 | 4 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 |
4 | 5 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 |
df.head() *2
# | carat | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2 | 0.46 | IdealIdeal | EE | SI2SI2 | 123.0 | 110.0 | 652 | 7.90 | 7.96 | 4.86 |
1 | 4 | 0.42 | PremiumPremium | EE | SI1SI1 | 119.6 | 122.0 | 652 | 7.78 | 7.68 | 4.62 |
2 | 6 | 0.46 | GoodGood | EE | VS1VS1 | 113.8 | 130.0 | 654 | 8.10 | 8.14 | 4.62 |
3 | 8 | 0.58 | PremiumPremium | II | VS2VS2 | 124.8 | 116.0 | 668 | 8.40 | 8.46 | 5.26 |
4 | 10 | 0.62 | GoodGood | JJ | SI2SI2 | 126.6 | 116.0 | 670 | 8.68 | 8.70 | 5.50 |
(df['carat'] + 1).head()
0 1.23
1 1.21
2 1.23
3 1.29
4 1.31
Name: carat, dtype: float64
df['expensive'] = df['price'] > 1000
df.head()
# | carat | cut | color | clarity | depth | table | price | x | y | z | expensive | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 | False |
1 | 2 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 | False |
2 | 3 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 | False |
3 | 4 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 | False |
4 | 5 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 | False |
df.tail()
# | carat | cut | color | clarity | depth | table | price | x | y | z | expensive | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1106 | 1107 | 0.91 | Premium | I | SI2 | 62.0 | 59.0 | 2913 | 6.18 | 6.23 | 3.85 | True |
1107 | 1108 | 0.83 | Premium | E | SI1 | 62.2 | 59.0 | 2913 | 6.05 | 5.97 | 3.74 | True |
1108 | 1109 | 0.85 | Ideal | G | SI2 | 62.0 | 57.0 | 2913 | 6.10 | 6.02 | 3.76 | True |
1109 | 1110 | 0.80 | Very Good | F | SI1 | 63.5 | 55.0 | 2914 | 5.86 | 5.89 | 3.73 | True |
1110 | 1111 | 0.73 | Ideal | E | SI1 | 61.4 | 58.0 | 2914 | 5.76 | 5.80 | 3.55 | True |
def is_large(row):
vol = row['x'] * row['y'] * row['z']
if vol > 20:
return "big"
else:
return "small"
df['volume_label'] = df.apply(is_large, axis=1)
df.head()
# | carat | cut | color | clarity | depth | table | price | x | y | z | expensive | volume_label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 | False | big |
1 | 2 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 | False | big |
2 | 3 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 | False | big |
3 | 4 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 | False | big |
4 | 5 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 | False | big |
df['expensive'].replace({True: 'yes!', False: 'no...'})
0 no...
1 no...
2 no...
3 no...
4 no...
...
1106 yes!
1107 yes!
1108 yes!
1109 yes!
1110 yes!
Name: expensive, Length: 1111, dtype: object
def depth_as_inch(row):
inch = row['depth'] / 3
row.drop('depth')
return row
df.apply(depth_as_inch, axis=1)
# | carat | cut | color | clarity | depth | table | price | x | y | z | expensive | volume_label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 | False | big |
1 | 2 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 | False | big |
2 | 3 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 | False | big |
3 | 4 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 | False | big |
4 | 5 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 | False | big |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1106 | 1107 | 0.91 | Premium | I | SI2 | 62.0 | 59.0 | 2913 | 6.18 | 6.23 | 3.85 | True | big |
1107 | 1108 | 0.83 | Premium | E | SI1 | 62.2 | 59.0 | 2913 | 6.05 | 5.97 | 3.74 | True | big |
1108 | 1109 | 0.85 | Ideal | G | SI2 | 62.0 | 57.0 | 2913 | 6.10 | 6.02 | 3.76 | True | big |
1109 | 1110 | 0.80 | Very Good | F | SI1 | 63.5 | 55.0 | 2914 | 5.86 | 5.89 | 3.73 | True | big |
1110 | 1111 | 0.73 | Ideal | E | SI1 | 61.4 | 58.0 | 2914 | 5.76 | 5.80 | 3.55 | True | big |
1111 rows × 13 columns
df['carat'] + 1
0 1.23
1 1.21
2 1.23
3 1.29
4 1.31
...
1106 1.91
1107 1.83
1108 1.85
1109 1.80
1110 1.73
Name: carat, Length: 1111, dtype: float64
df
# | carat | cut | color | clarity | depth | table | price | x | y | z | expensive | volume_label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 | False | big |
1 | 2 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 | False | big |
2 | 3 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 | False | big |
3 | 4 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 | False | big |
4 | 5 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 | False | big |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1106 | 1107 | 0.91 | Premium | I | SI2 | 62.0 | 59.0 | 2913 | 6.18 | 6.23 | 3.85 | True | big |
1107 | 1108 | 0.83 | Premium | E | SI1 | 62.2 | 59.0 | 2913 | 6.05 | 5.97 | 3.74 | True | big |
1108 | 1109 | 0.85 | Ideal | G | SI2 | 62.0 | 57.0 | 2913 | 6.10 | 6.02 | 3.76 | True | big |
1109 | 1110 | 0.80 | Very Good | F | SI1 | 63.5 | 55.0 | 2914 | 5.86 | 5.89 | 3.73 | True | big |
1110 | 1111 | 0.73 | Ideal | E | SI1 | 61.4 | 58.0 | 2914 | 5.76 | 5.80 | 3.55 | True | big |
1111 rows × 13 columns
df.groupby('cut').mean()
# | carat | depth | table | price | x | y | z | expensive | |
---|---|---|---|---|---|---|---|---|---|
cut | |||||||||
Fair | 606.666667 | 0.902424 | 63.457576 | 60.242424 | 2800.636364 | 6.105909 | 6.026667 | 3.847273 | 0.984848 |
Good | 507.020202 | 0.649394 | 62.322222 | 58.426263 | 2210.737374 | 5.421313 | 5.433535 | 3.381111 | 0.737374 |
Ideal | 586.541333 | 0.664773 | 61.695200 | 55.973067 | 2497.981333 | 5.559173 | 5.569333 | 3.432240 | 0.853333 |
Premium | 567.831804 | 0.690979 | 61.223853 | 58.776758 | 2464.987768 | 5.640826 | 5.597248 | 3.439205 | 0.840979 |
Very Good | 499.372951 | 0.659549 | 61.781148 | 57.968443 | 2385.991803 | 5.491721 | 5.519549 | 3.401516 | 0.811475 |
df.groupby('cut')[['price', 'carat']].mean()
price | carat | |
---|---|---|
cut | ||
Fair | 2800.636364 | 0.902424 |
Good | 2210.737374 | 0.649394 |
Ideal | 2497.981333 | 0.664773 |
Premium | 2464.987768 | 0.690979 |
Very Good | 2385.991803 | 0.659549 |
clarity_cut = df.groupby(['cut', 'clarity'])[['price', 'carat']].mean()
clarity_cut.head()
price | carat | ||
---|---|---|---|
cut | clarity | ||
Fair | I1 | 2824.250000 | 1.100000 |
IF | 2861.000000 | 0.620000 | |
SI1 | 2855.562500 | 0.925625 | |
SI2 | 2835.172414 | 0.956207 | |
VS1 | 2853.250000 | 0.807500 |
clarity_cut['price'].sort_values(ascending = False)
cut clarity
Fair IF 2861.000000
SI1 2855.562500
VS1 2853.250000
Ideal I1 2846.428571
Good I1 2845.000000
Very Good IF 2841.750000
Premium IF 2837.000000
Fair SI2 2835.172414
Good VVS2 2827.333333
Fair I1 2824.250000
Ideal IF 2814.428571
Fair VVS1 2801.000000
Ideal VVS2 2725.631579
Very Good SI2 2665.965517
Premium VVS2 2664.076923
I1 2630.166667
Ideal SI1 2592.385246
VS2 2577.722222
Good VS2 2577.107143
Fair VS2 2576.000000
Premium VS1 2571.245283
VS2 2559.631579
Very Good VS2 2468.000000
SI1 2405.694444
Premium SI1 2374.200000
SI2 2360.282051
Ideal VVS1 2353.363636
SI2 2303.129032
Very Good VS1 2295.511111
I1 2268.750000
Ideal VS1 2261.923077
Good VS1 2152.277778
VVS1 2110.000000
SI2 2092.076923
Very Good VVS1 2061.333333
VVS2 1961.687500
Good SI1 1883.250000
Premium VVS1 1674.875000
Name: price, dtype: float64
df['cut'].value_counts()
Ideal 375
Premium 327
Very Good 244
Good 99
Fair 66
Name: cut, dtype: int64
df['cut'].unique()
array(['Ideal', 'Premium', 'Good', 'Very Good', 'Fair'], dtype=object)
df.head()
# | carat | cut | color | clarity | depth | table | price | x | y | z | expensive | volume_label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 | False | big |
1 | 2 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 | False | big |
2 | 3 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 | False | big |
3 | 4 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 | False | big |
4 | 5 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 | False | big |
df.plot.scatter(x='x', y='y')
<matplotlib.axes._subplots.AxesSubplot at 0xc57e130>
df.plot.scatter(x='carat', y='price')
<matplotlib.axes._subplots.AxesSubplot at 0xc562cf0>
df.groupby('cut')['price'].mean().plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0xc6b53d0>
df['cut'] = df['cut'].astype(pd.CategoricalDtype(['Fair', 'Good', 'Very Good', 'Premium', 'Ideal'], ordered=True))
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1111 entries, 0 to 1110
Data columns (total 13 columns):
# 1111 non-null int64
carat 1111 non-null float64
cut 1111 non-null category
color 1111 non-null object
clarity 1111 non-null object
depth 1111 non-null float64
table 1111 non-null float64
price 1111 non-null int64
x 1111 non-null float64
y 1111 non-null float64
z 1111 non-null float64
expensive 1111 non-null bool
volume_label 1111 non-null object
dtypes: bool(1), category(1), float64(6), int64(2), object(3)
memory usage: 84.8+ KB
df.groupby('cut')['price'].mean().plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0xc51cd70>
df.groupby('cut')['price'].mean().sort_values().plot.bar()
<matplotlib.axes._subplots.AxesSubplot at 0xdd60cd0>
mask = df['cut'] == "Ideal"
df[mask].head()
# | carat | cut | color | clarity | depth | table | price | x | y | z | expensive | volume_label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 | False | big |
11 | 12 | 0.23 | Ideal | J | VS1 | 62.8 | 56.0 | 340 | 3.93 | 3.90 | 2.46 | False | big |
13 | 14 | 0.31 | Ideal | J | SI2 | 62.2 | 54.0 | 344 | 4.35 | 4.37 | 2.71 | False | big |
16 | 17 | 0.30 | Ideal | I | SI2 | 62.0 | 54.0 | 348 | 4.31 | 4.34 | 2.68 | False | big |
39 | 40 | 0.33 | Ideal | I | SI2 | 61.8 | 55.0 | 403 | 4.49 | 4.51 | 2.78 | False | big |
mask = (df['cut'] == "Ideal") & (df['color'] == 'J')
df[mask].head()
# | carat | cut | color | clarity | depth | table | price | x | y | z | expensive | volume_label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11 | 12 | 0.23 | Ideal | J | VS1 | 62.8 | 56.0 | 340 | 3.93 | 3.90 | 2.46 | False | big |
13 | 14 | 0.31 | Ideal | J | SI2 | 62.2 | 54.0 | 344 | 4.35 | 4.37 | 2.71 | False | big |
41 | 42 | 0.33 | Ideal | J | SI1 | 61.1 | 56.0 | 403 | 4.49 | 4.55 | 2.76 | False | big |
460 | 461 | 0.90 | Ideal | J | VS2 | 62.8 | 55.0 | 2817 | 6.20 | 6.16 | 3.88 | True | big |
681 | 682 | 0.75 | Ideal | J | SI1 | 61.5 | 56.0 | 2850 | 5.83 | 5.87 | 3.60 | True | big |
df[ (df['cut'] == "Ideal") & (df['color'] == 'J') ].head()
# | carat | cut | color | clarity | depth | table | price | x | y | z | expensive | volume_label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11 | 12 | 0.23 | Ideal | J | VS1 | 62.8 | 56.0 | 340 | 3.93 | 3.90 | 2.46 | False | big |
13 | 14 | 0.31 | Ideal | J | SI2 | 62.2 | 54.0 | 344 | 4.35 | 4.37 | 2.71 | False | big |
41 | 42 | 0.33 | Ideal | J | SI1 | 61.1 | 56.0 | 403 | 4.49 | 4.55 | 2.76 | False | big |
460 | 461 | 0.90 | Ideal | J | VS2 | 62.8 | 55.0 | 2817 | 6.20 | 6.16 | 3.88 | True | big |
681 | 682 | 0.75 | Ideal | J | SI1 | 61.5 | 56.0 | 2850 | 5.83 | 5.87 | 3.60 | True | big |
df[df['price'] > 1000].head()
# | carat | cut | color | clarity | depth | table | price | x | y | z | expensive | volume_label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
90 | 91 | 0.70 | Ideal | E | SI1 | 62.5 | 57.0 | 2757 | 5.70 | 5.72 | 3.57 | True | big |
91 | 92 | 0.86 | Fair | E | SI2 | 55.1 | 69.0 | 2757 | 6.45 | 6.33 | 3.52 | True | big |
92 | 93 | 0.70 | Ideal | G | VS2 | 61.6 | 56.0 | 2757 | 5.70 | 5.67 | 3.50 | True | big |
93 | 94 | 0.71 | Very Good | E | VS2 | 62.4 | 57.0 | 2759 | 5.68 | 5.73 | 3.56 | True | big |
94 | 95 | 0.78 | Very Good | G | SI2 | 63.8 | 56.0 | 2759 | 5.81 | 5.85 | 3.72 | True | big |
df.loc[[90, 91]]
# | carat | cut | color | clarity | depth | table | price | x | y | z | expensive | volume_label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
90 | 91 | 0.70 | Ideal | E | SI1 | 62.5 | 57.0 | 2757 | 5.70 | 5.72 | 3.57 | True | big |
91 | 92 | 0.86 | Fair | E | SI2 | 55.1 | 69.0 | 2757 | 6.45 | 6.33 | 3.52 | True | big |