import pandas as pd
print (pd .__version__ )
db = pd .DataFrame ()
print (db )
Empty DataFrame
Columns: []
Index: []
data = ['Abhinav' ,'Singh' ,'Anushka' ]
df = pd .DataFrame (data )
print (df )
0
0 Abhinav
1 Singh
2 Anushka
data = [['Rahul' , 35 ], ['vasu' , 8 ]]
df = pd .DataFrame (data ,columns = ['Name' ,'Age' ])
df ['Age' ] = df ['Age' ].astype (float )
print (df )
Name Age
0 Rahul 35.0
1 vasu 8.0
data = {'Name' :['Abhinav' ,'Anushka' ,'Vishal' ],'Age' :[20 ,22 ,19 ]}
df = pd .DataFrame (data )
print (df )
Name Age
0 Abhinav 20
1 Anushka 22
2 Vishal 19
data = [{'a' :1 ,'b' :2 },{'a' :7 ,'b' :8 ,'c' :9 }]
df = pd .DataFrame (data )
print (df )
a b c
0 1 2 NaN
1 7 8 9.0
db = pd .read_json ('data.json' )
print (db )
Duration Pulse Maxpulse Calories
0 60 110 130 409
1 60 117 145 479
2 60 103 135 340
3 45 109 175 282
4 45 117 148 406
5 60 102 127 300
6 55 105 155 355
7 45 120 145 245
8 50 117 150 350
9 55 98 155 455
10 55 111 155 255
Tail Function & reading CSV
db = pd .read_csv ('nyc_weather.csv' )
print (db .tail (10 ))#To Get only 10 details
EST Temperature DewPoint Humidity Sea Level PressureIn \
21 1/22/2016 26 6 41 30.21
22 1/23/2016 26 21 78 29.77
23 1/24/2016 28 11 53 29.92
24 1/25/2016 34 18 54 30.25
25 1/26/2016 43 29 56 30.03
26 1/27/2016 41 22 45 30.03
27 1/28/2016 37 20 51 29.90
28 1/29/2016 36 21 50 29.58
29 1/30/2016 34 16 46 30.01
30 1/31/2016 46 28 52 29.90
VisibilityMiles WindSpeedMPH PrecipitationIn CloudCover Events \
21 9 NaN 0.01 3 Snow
22 1 16.0 2.31 8 Fog-Snow
23 8 6.0 T 3 Snow
24 10 3.0 0 2 NaN
25 10 7.0 0 2 NaN
26 10 7.0 T 3 Rain
27 10 5.0 0 1 NaN
28 10 8.0 0 4 NaN
29 10 7.0 0 0 NaN
30 10 5.0 0 0 NaN
WindDirDegrees
21 34
22 42
23 327
24 286
25 244
26 311
27 234
28 298
29 257
30 241
db .columns
for _ in db .columns :
print (_ )
EST
Temperature
DewPoint
Humidity
Sea Level PressureIn
VisibilityMiles
WindSpeedMPH
PrecipitationIn
CloudCover
Events
WindDirDegrees
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 EST 31 non-null object
1 Temperature 31 non-null int64
2 DewPoint 31 non-null int64
3 Humidity 31 non-null int64
4 Sea Level PressureIn 31 non-null float64
5 VisibilityMiles 31 non-null int64
6 WindSpeedMPH 28 non-null float64
7 PrecipitationIn 31 non-null object
8 CloudCover 31 non-null int64
9 Events 9 non-null object
10 WindDirDegrees 31 non-null int64
dtypes: float64(2), int64(6), object(3)
memory usage: 2.8+ KB
print (db ['EST' ].tail (10 ))
print (type (db ['EST' ]))
21 1/22/2016
22 1/23/2016
23 1/24/2016
24 1/25/2016
25 1/26/2016
26 1/27/2016
27 1/28/2016
28 1/29/2016
29 1/30/2016
30 1/31/2016
Name: EST, dtype: object
<class 'pandas.core.series.Series'>
print (db ['Temperature' ].max ())
print (db [['Temperature' ,'EST' ,'Events' ]][db ['Temperature' ]== db ['Temperature' ].max ()])
Temperature EST Events
9 50 1/10/2016 Rain
Temperature
DewPoint
Humidity
Sea Level PressureIn
VisibilityMiles
WindSpeedMPH
CloudCover
WindDirDegrees
count
31.000000
31.000000
31.000000
31.000000
31.000000
28.000000
31.000000
31.000000
mean
34.677419
17.838710
51.677419
29.992903
9.193548
6.892857
3.129032
247.129032
std
7.639315
11.378626
11.634395
0.237237
1.939405
2.871821
2.629853
92.308086
min
20.000000
-3.000000
33.000000
29.520000
1.000000
2.000000
0.000000
34.000000
25%
29.000000
10.000000
44.500000
29.855000
9.000000
5.000000
1.000000
238.000000
50%
35.000000
18.000000
50.000000
30.010000
10.000000
6.500000
3.000000
281.000000
75%
39.500000
23.000000
55.000000
30.140000
10.000000
8.000000
4.500000
300.000000
max
50.000000
46.000000
78.000000
30.570000
10.000000
16.000000
8.000000
345.000000
EST
Temperature
DewPoint
Humidity
Sea Level PressureIn
VisibilityMiles
WindSpeedMPH
PrecipitationIn
CloudCover
Events
WindDirDegrees
0
1/1/2016
38
23
52
30.03
10
8.0
0
5
NaN
281
1
1/2/2016
36
18
46
30.02
10
7.0
0
3
NaN
275
2
1/3/2016
40
21
47
29.86
10
8.0
0
1
NaN
277
3
1/4/2016
25
9
44
30.05
10
9.0
0
3
NaN
345
4
1/5/2016
20
-3
41
30.57
10
5.0
0
0
NaN
333
ETL- Extract, Transfer and Load
Mobile App
PC, etc
db .to_csv ('abhinav.csv' ,index = False )
db = pd .read_csv ('weather_data_cities.csv' )
print (db )
day city temperature windspeed event
0 1/1/2017 new york 32 6 Rain
1 1/2/2017 new york 36 7 Sunny
2 1/3/2017 new york 28 12 Snow
3 1/4/2017 new york 33 7 Sunny
4 1/1/2017 mumbai 90 5 Sunny
5 1/2/2017 mumbai 85 12 Fog
6 1/3/2017 mumbai 87 15 Fog
7 1/4/2017 mumbai 92 5 Rain
8 1/1/2017 paris 45 20 Sunny
9 1/2/2017 paris 50 13 Cloudy
10 1/3/2017 paris 54 8 Cloudy
11 1/4/2017 paris 42 10 Cloudy
g = db .groupby ('city' )
for city ,city_dataframe in g :
print (city )
print ("data frame of city is " )
print (city_dataframe )
mumbai
data frame of city is
day city temperature windspeed event
4 1/1/2017 mumbai 90 5 Sunny
5 1/2/2017 mumbai 85 12 Fog
6 1/3/2017 mumbai 87 15 Fog
7 1/4/2017 mumbai 92 5 Rain
new york
data frame of city is
day city temperature windspeed event
0 1/1/2017 new york 32 6 Rain
1 1/2/2017 new york 36 7 Sunny
2 1/3/2017 new york 28 12 Snow
3 1/4/2017 new york 33 7 Sunny
paris
data frame of city is
day city temperature windspeed event
8 1/1/2017 paris 45 20 Sunny
9 1/2/2017 paris 50 13 Cloudy
10 1/3/2017 paris 54 8 Cloudy
11 1/4/2017 paris 42 10 Cloudy
getG = g .get_group ('mumbai' )
print (getG )
day city temperature windspeed event
4 1/1/2017 mumbai 90 5 Sunny
5 1/2/2017 mumbai 85 12 Fog
6 1/3/2017 mumbai 87 15 Fog
7 1/4/2017 mumbai 92 5 Rain
getG .to_csv ('mumbar weather.csv' ,index = False )
day
city
temperature
windspeed
event
0
1/1/2017
new york
32
6
Rain
1
1/2/2017
new york
36
7
Sunny
4
1/1/2017
mumbai
90
5
Sunny
5
1/2/2017
mumbai
85
12
Fog
8
1/1/2017
paris
45
20
Sunny
9
1/2/2017
paris
50
13
Cloudy
temperature
windspeed
count
mean
std
min
25%
50%
75%
max
count
mean
std
min
25%
50%
75%
max
city
mumbai
4.0
88.50
3.109126
85.0
86.50
88.5
90.50
92.0
4.0
9.25
5.057997
5.0
5.00
8.5
12.75
15.0
new york
4.0
32.25
3.304038
28.0
31.00
32.5
33.75
36.0
4.0
8.00
2.708013
6.0
6.75
7.0
8.25
12.0
paris
4.0
47.75
5.315073
42.0
44.25
47.5
51.00
54.0
4.0
12.75
5.251984
8.0
9.50
11.5
14.75
20.0
india_weather = pd .DataFrame ({'city' : ['hydrabad' , 'banglore' , 'pune' ], 'humidity' : [80 ,60 ,78 ],'temprature' : [33 ,28 ,40 ],})
print (india_weather )
city humidity temprature
0 hydrabad 80 33
1 banglore 60 28
2 pune 78 40
us_weather = pd .DataFrame ({'city' : ['newyork' , 'washington' , 'arizona' ], 'humidity' : [75 ,86 ,70 ],'temprature' : [30 ,36 ,34 ],})
print (us_weather )
city humidity temprature
0 newyork 75 30
1 washington 86 36
2 arizona 70 34
df = pd .concat ([india_weather ,us_weather ],ignore_index = True )
print (df )
city humidity temprature
0 hydrabad 80 33
1 banglore 60 28
2 pune 78 40
3 newyork 75 30
4 washington 86 36
5 arizona 70 34
temp_df = pd .DataFrame ({
'city' :['Hyderabad' , 'Banglore' ,'Pune' ],
'temperature' :[30 ,28 ,35 ]
})
humidity_df = pd .DataFrame ({
'city' :['Hyderabad' , 'Banglore' ,'Chennai' ],
'humidity' :[63 ,78 ,70 ]
})
print (temp_df )
print (humidity_df )
city temperature
0 Hyderabad 30
1 Banglore 28
2 Pune 35
city humidity
0 Hyderabad 63
1 Banglore 78
2 Chennai 70
df = pd .merge (humidity_df ,temp_df ,on = 'city' )
print (df )
city humidity temperature
0 Hyderabad 63 30
1 Banglore 78 28
df = pd .merge (humidity_df ,temp_df ,on = 'city' ,how = 'left' )
print (df )
city humidity temperature
0 Hyderabad 63 30.0
1 Banglore 78 28.0
2 Chennai 70 NaN
df = pd .merge (humidity_df ,temp_df ,on = 'city' ,how = 'right' )
print (df )
city humidity temperature
0 Hyderabad 63.0 30
1 Banglore 78.0 28
2 Pune NaN 35
s = pd .Series (list ('abcde' ),index = [49 ,48 ,47 ,0 ,1 ])
print (s )
print (s .loc [49 ])
print (s .iloc [0 ])
49 a
48 b
47 c
0 d
1 e
dtype: object
a
a