Skip to content

Latest commit

 

History

History

Folders and files

NameName
Last commit message
Last commit date

parent directory

..
 
 

README.md

Pandas

import pandas as pd
print(pd.__version__)
2.1.0
db=pd.DataFrame()
print(db)
Empty DataFrame
Columns: []
Index: []
data=['Abhinav','Singh','Anushka']
df=pd.DataFrame(data)
print(df)
         0
0  Abhinav
1    Singh
2  Anushka
data=[['Rahul', 35], ['vasu' , 8]]
df =pd.DataFrame(data,columns=['Name','Age'])
df['Age'] = df['Age'].astype(float)
print(df)
    Name   Age
0  Rahul  35.0
1   vasu   8.0
data={'Name':['Abhinav','Anushka','Vishal'],'Age':[20,22,19]}
df=pd.DataFrame(data)
print(df)
      Name  Age
0  Abhinav   20
1  Anushka   22
2   Vishal   19
data=[{'a':1,'b':2},{'a':7,'b':8,'c':9}]
df =pd.DataFrame(data)
print(df)
   a  b    c
0  1  2  NaN
1  7  8  9.0
db=pd.read_json('data.json')
print(db)
    Duration  Pulse  Maxpulse  Calories
0         60    110       130       409
1         60    117       145       479
2         60    103       135       340
3         45    109       175       282
4         45    117       148       406
5         60    102       127       300
6         55    105       155       355
7         45    120       145       245
8         50    117       150       350
9         55     98       155       455
10        55    111       155       255

Tail Function & reading CSV

db=pd.read_csv('nyc_weather.csv')
print(db.tail(10))#To Get only 10 details
          EST  Temperature  DewPoint  Humidity  Sea Level PressureIn  \
21  1/22/2016           26         6        41                 30.21   
22  1/23/2016           26        21        78                 29.77   
23  1/24/2016           28        11        53                 29.92   
24  1/25/2016           34        18        54                 30.25   
25  1/26/2016           43        29        56                 30.03   
26  1/27/2016           41        22        45                 30.03   
27  1/28/2016           37        20        51                 29.90   
28  1/29/2016           36        21        50                 29.58   
29  1/30/2016           34        16        46                 30.01   
30  1/31/2016           46        28        52                 29.90   

    VisibilityMiles  WindSpeedMPH PrecipitationIn  CloudCover    Events  \
21                9           NaN            0.01           3      Snow   
22                1          16.0            2.31           8  Fog-Snow   
23                8           6.0               T           3      Snow   
24               10           3.0               0           2       NaN   
25               10           7.0               0           2       NaN   
26               10           7.0               T           3      Rain   
27               10           5.0               0           1       NaN   
28               10           8.0               0           4       NaN   
29               10           7.0               0           0       NaN   
30               10           5.0               0           0       NaN   

    WindDirDegrees  
21              34  
22              42  
23             327  
24             286  
25             244  
26             311  
27             234  
28             298  
29             257  
30             241  
db.columns
for _ in db.columns:
    print(_)
EST
Temperature
DewPoint
Humidity
Sea Level PressureIn
VisibilityMiles
WindSpeedMPH
PrecipitationIn
CloudCover
Events
WindDirDegrees
db.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   EST                   31 non-null     object 
 1   Temperature           31 non-null     int64  
 2   DewPoint              31 non-null     int64  
 3   Humidity              31 non-null     int64  
 4   Sea Level PressureIn  31 non-null     float64
 5   VisibilityMiles       31 non-null     int64  
 6   WindSpeedMPH          28 non-null     float64
 7   PrecipitationIn       31 non-null     object 
 8   CloudCover            31 non-null     int64  
 9   Events                9 non-null      object 
 10  WindDirDegrees        31 non-null     int64  
dtypes: float64(2), int64(6), object(3)
memory usage: 2.8+ KB
print(db['EST'].tail(10))
print(type(db['EST']))
21    1/22/2016
22    1/23/2016
23    1/24/2016
24    1/25/2016
25    1/26/2016
26    1/27/2016
27    1/28/2016
28    1/29/2016
29    1/30/2016
30    1/31/2016
Name: EST, dtype: object
<class 'pandas.core.series.Series'>
print(db['Temperature'].max())
50
print(db[['Temperature','EST','Events']][db['Temperature']==db['Temperature'].max()])
   Temperature        EST Events
9           50  1/10/2016   Rain
print(db.shape)
(31, 11)

Describe Function

db.describe()
Temperature DewPoint Humidity Sea Level PressureIn VisibilityMiles WindSpeedMPH CloudCover WindDirDegrees
count 31.000000 31.000000 31.000000 31.000000 31.000000 28.000000 31.000000 31.000000
mean 34.677419 17.838710 51.677419 29.992903 9.193548 6.892857 3.129032 247.129032
std 7.639315 11.378626 11.634395 0.237237 1.939405 2.871821 2.629853 92.308086
min 20.000000 -3.000000 33.000000 29.520000 1.000000 2.000000 0.000000 34.000000
25% 29.000000 10.000000 44.500000 29.855000 9.000000 5.000000 1.000000 238.000000
50% 35.000000 18.000000 50.000000 30.010000 10.000000 6.500000 3.000000 281.000000
75% 39.500000 23.000000 55.000000 30.140000 10.000000 8.000000 4.500000 300.000000
max 50.000000 46.000000 78.000000 30.570000 10.000000 16.000000 8.000000 345.000000
db.head()
EST Temperature DewPoint Humidity Sea Level PressureIn VisibilityMiles WindSpeedMPH PrecipitationIn CloudCover Events WindDirDegrees
0 1/1/2016 38 23 52 30.03 10 8.0 0 5 NaN 281
1 1/2/2016 36 18 46 30.02 10 7.0 0 3 NaN 275
2 1/3/2016 40 21 47 29.86 10 8.0 0 1 NaN 277
3 1/4/2016 25 9 44 30.05 10 9.0 0 3 NaN 345
4 1/5/2016 20 -3 41 30.57 10 5.0 0 0 NaN 333

11-09-23

ETL- Extract, Transfer and Load

PDA

  1. Mobile App
  2. PC, etc
  • Data warehouses
db.to_csv('abhinav.csv',index=False)

Pandas Group By

db=pd.read_csv('weather_data_cities.csv')
print(db)
         day      city  temperature  windspeed   event
0   1/1/2017  new york           32          6    Rain
1   1/2/2017  new york           36          7   Sunny
2   1/3/2017  new york           28         12    Snow
3   1/4/2017  new york           33          7   Sunny
4   1/1/2017    mumbai           90          5   Sunny
5   1/2/2017    mumbai           85         12     Fog
6   1/3/2017    mumbai           87         15     Fog
7   1/4/2017    mumbai           92          5    Rain
8   1/1/2017     paris           45         20   Sunny
9   1/2/2017     paris           50         13  Cloudy
10  1/3/2017     paris           54          8  Cloudy
11  1/4/2017     paris           42         10  Cloudy
g=db.groupby('city')
for city,city_dataframe in g:
    print(city)
    print("data frame of city is ")
    print(city_dataframe)
mumbai
data frame of city is 
        day    city  temperature  windspeed  event
4  1/1/2017  mumbai           90          5  Sunny
5  1/2/2017  mumbai           85         12    Fog
6  1/3/2017  mumbai           87         15    Fog
7  1/4/2017  mumbai           92          5   Rain
new york
data frame of city is 
        day      city  temperature  windspeed  event
0  1/1/2017  new york           32          6   Rain
1  1/2/2017  new york           36          7  Sunny
2  1/3/2017  new york           28         12   Snow
3  1/4/2017  new york           33          7  Sunny
paris
data frame of city is 
         day   city  temperature  windspeed   event
8   1/1/2017  paris           45         20   Sunny
9   1/2/2017  paris           50         13  Cloudy
10  1/3/2017  paris           54          8  Cloudy
11  1/4/2017  paris           42         10  Cloudy
getG=g.get_group('mumbai')
print(getG)
        day    city  temperature  windspeed  event
4  1/1/2017  mumbai           90          5  Sunny
5  1/2/2017  mumbai           85         12    Fog
6  1/3/2017  mumbai           87         15    Fog
7  1/4/2017  mumbai           92          5   Rain
getG.to_csv('mumbar weather.csv',index=False)
g.head(2)
day city temperature windspeed event
0 1/1/2017 new york 32 6 Rain
1 1/2/2017 new york 36 7 Sunny
4 1/1/2017 mumbai 90 5 Sunny
5 1/2/2017 mumbai 85 12 Fog
8 1/1/2017 paris 45 20 Sunny
9 1/2/2017 paris 50 13 Cloudy
g.describe()
temperature windspeed
count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max
city
mumbai 4.0 88.50 3.109126 85.0 86.50 88.5 90.50 92.0 4.0 9.25 5.057997 5.0 5.00 8.5 12.75 15.0
new york 4.0 32.25 3.304038 28.0 31.00 32.5 33.75 36.0 4.0 8.00 2.708013 6.0 6.75 7.0 8.25 12.0
paris 4.0 47.75 5.315073 42.0 44.25 47.5 51.00 54.0 4.0 12.75 5.251984 8.0 9.50 11.5 14.75 20.0

Concatenate Data frames

india_weather=pd.DataFrame({'city': ['hydrabad', 'banglore', 'pune'], 'humidity': [80,60,78],'temprature': [33,28,40],})

print(india_weather)
       city  humidity  temprature
0  hydrabad        80          33
1  banglore        60          28
2      pune        78          40
us_weather=pd.DataFrame({'city': ['newyork', 'washington', 'arizona'], 'humidity': [75,86,70],'temprature': [30,36,34],})

print(us_weather)
         city  humidity  temprature
0     newyork        75          30
1  washington        86          36
2     arizona        70          34
df=pd.concat([india_weather,us_weather],ignore_index=True)
print(df)
         city  humidity  temprature
0    hydrabad        80          33
1    banglore        60          28
2        pune        78          40
3     newyork        75          30
4  washington        86          36
5     arizona        70          34
temp_df=pd.DataFrame({
    'city':['Hyderabad', 'Banglore','Pune'],
    'temperature':[30,28,35]
    
})
humidity_df=pd.DataFrame({
    'city':['Hyderabad', 'Banglore','Chennai'],
    'humidity':[63,78,70]
    
})
print(temp_df)
print(humidity_df)
        city  temperature
0  Hyderabad           30
1   Banglore           28
2       Pune           35
        city  humidity
0  Hyderabad        63
1   Banglore        78
2    Chennai        70

Natural Join

df=pd.merge(humidity_df,temp_df,on='city')
print(df)
        city  humidity  temperature
0  Hyderabad        63           30
1   Banglore        78           28

left outer join

df=pd.merge(humidity_df,temp_df,on='city',how='left')
print(df)
        city  humidity  temperature
0  Hyderabad        63         30.0
1   Banglore        78         28.0
2    Chennai        70          NaN

Right Outer Join

df=pd.merge(humidity_df,temp_df,on='city',how='right')
print(df)
        city  humidity  temperature
0  Hyderabad      63.0           30
1   Banglore      78.0           28
2       Pune       NaN           35

use of loc and iloc

s=pd.Series(list('abcde'),index=[49,48,47,0,1])
print(s)
print(s.loc[49])
print(s.iloc[0])
49    a
48    b
47    c
0     d
1     e
dtype: object
a
a