pythoncode
/
pan.py

import numpy as np
import pandas as pd
from matplotlib import *

# .........................Series.......................#

x1 = np.array([1, 2, 3, 4])
s = pd.Series(x1, index=[1, 2, 3, 4])
print(s)

# .......................DataFrame......................#

x2 = np.array([1, 2, 3, 4, 5, 6])
s = pd.DataFrame(x2)
print(s)

x3 = np.array([['Alex', 10], ['Nishit', 21], ['Aman', 22]])
s = pd.DataFrame(x3, columns=['Name', 'Age'])
print(s)

data = {'Name': ['Tom', 'Jack', 'Steve', 'Ricky'], 'Age': [28, 34, 29, 42]}
df = pd.DataFrame(data, index=['rank1', 'rank2', 'rank3', 'rank4'])
print(df)

data = [{'a': 1, 'b': 2}, {'a': 3, 'b': 4, 'c': 5}]
df = pd.DataFrame(data)
print(df)

d = {'one': pd.Series([1, 2, 3], index=['a', 'b', 'c']),
     'two': pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
print(df)

# ....Adding New column......#

data = {'one': pd.Series([1, 2, 3, 4], index=[1, 2, 3, 4]),
        'two': pd.Series([1, 2, 3], index=[1, 2, 3])}
df = pd.DataFrame(data)
print(df)
df['three'] = pd.Series([1, 2], index=[1, 2])
print(df)

# ......Deleting a column......#

data = {'one': pd.Series([1, 2, 3, 4], index=[1, 2, 3, 4]),
        'two': pd.Series([1, 2, 3], index=[1, 2, 3]),
        'three': pd.Series([1, 1], index=[1, 2])
        }
df = pd.DataFrame(data)
print(df)
del df['one']
print(df)
df.pop('two')
print(df)

# ......Selecting a particular Row............#

data = {'one': pd.Series([1, 2, 3, 4], index=[1, 2, 3, 4]),
        'two': pd.Series([1, 2, 3], index=[1, 2, 3]),
        'three': pd.Series([1, 1], index=[1, 2])
        }
df = pd.DataFrame(data)
print(df.loc[2])
print(df[1:4])

# .........Addition of Row.................#

df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns=['a', 'b'])

df = df.append(df2)
print(df.head())

# ........Deleting a Row..................#

df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
df2 = pd.DataFrame([[5, 6], [7, 8]], columns=['a', 'b'])

df = df.append(df2)

# Drop rows with label 0
df = df.drop(0)

print(df)

# ..........................Functions.....................................#


d = {'Name': pd.Series(['Tom', 'James', 'Ricky', 'Vin', 'Steve', 'Smith', 'Jack']),
     'Age': pd.Series([25, 26, 25, 23, 30, 29, 23]),
     'Rating': pd.Series([4.23, 3.24, 3.98, 2.56, 3.20, 4.6, 3.8])}

df = pd.DataFrame(d)
print("The transpose of the data series is:")
print(df.T)
print(df.shape)
print(df.size)
print(df.values)

# .........................Statistics.......................................#

d = {'Name': pd.Series(['Tom', 'James', 'Ricky', 'Vin', 'Steve', 'Smith', 'Jack',
                        'Lee', 'David', 'Gasper', 'Betina', 'Andres']),
     'Age': pd.Series([25, 26, 25, 23, 30, 29, 23, 34, 40, 30, 51, 46]),
     'Rating': pd.Series([4.23, 3.24, 3.98, 2.56, 3.20, 4.6, 3.8, 3.78, 2.98, 4.80, 4.10, 3.65])
     }
df = pd.DataFrame(d)
print(df.sum())

d = {'Name': pd.Series(['Tom', 'James', 'Ricky', 'Vin', 'Steve', 'Smith', 'Jack',
                        'Lee', 'David', 'Gasper', 'Betina', 'Andres']),
     'Age': pd.Series([25, 26, 25, 23, 30, 29, 23, 34, 40, 30, 51, 46]),
     'Rating': pd.Series([4.23, 3.24, 3.98, 2.56, 3.20, 4.6, 3.8, 3.78, 2.98, 4.80, 4.10, 3.65])
     }
df = pd.DataFrame(d)
print(df.describe(include='all'))

# .......................Sorting..........................................#

# Using the sort_index() method, by passing the axis arguments and the order of sorting,
# DataFrame can be sorted. By default, sorting is done on row labels in ascending order.

unsorted_df = pd.DataFrame(np.random.randn(10, 2), index=[1, 4, 6, 2, 3, 5, 9, 8, 0, 7], columns=['col2', 'col1'])

sorted_df = unsorted_df.sort_index()
print(sorted_df)
sorted_df = unsorted_df.sort_index(ascending=False)
print(sorted_df)

# By passing the axis argument with a value 0 or 1,
# the sorting can be done on the column labels. By default, axis=0, sort by row.
# Let us consider the following example to understand the same.

unsorted_df = pd.DataFrame(np.random.randn(10, 2), index=[1, 4, 6, 2, 3, 5, 9, 8, 0, 7], columns=['col2', 'col1'])
sorted_df = unsorted_df.sort_index(axis=1)
print(sorted_df)

unsorted_df = pd.DataFrame({'col1': [2, 1, 1, 1], 'col2': [1, 3, 2, 4]})
sorted_df = unsorted_df.sort_values(by='col1', kind='mergesort')

# print (sorted_df)

# ...........................SLICING...............................#

df = pd.DataFrame(np.random.randn(8, 4),
                  index=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'], columns=['A', 'B', 'C', 'D'])
# Select all rows for multiple columns, say list[]
print(df.loc[:, ['A', 'C']])
print(df.loc[['a', 'b', 'f', 'h'], ['A', 'C']])

df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])
# Index slicing
print(df.ix[:, 'A'])

# ............................statistics......................#

s = pd.Series([1, 2, 3, 4, 5, 4])
print(s.pct_change())

df = pd.DataFrame(np.random.randn(5, 2))
print(df.pct_change())

df = pd.DataFrame(np.random.randn(10, 4),
                  index=pd.date_range('1/1/2000', periods=10),
                  columns=['A', 'B', 'C', 'D'])
print(df.rolling(window=3).mean())

print(df.expanding(min_periods=3).mean())

# ........................MISSING DATA............................................#

df = pd.DataFrame(np.random.randn(3, 3), index=['a', 'c', 'e'], columns=['one',
                                                                         'two', 'three'])

df = df.reindex(['a', 'b', 'c'])

print(df)
print("NaN replaced with '0':")
print(df.fillna(0))

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f',
                                                'h'], columns=['one', 'two', 'three'])

df = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

print(df)
print(df.fillna(method='pad'))
print(df.fillna(method='bfill'))
print(df.dropna())
print(df.dropna(axis=1))

# .........................Grouping...............................................#

ipl_data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings',
                     'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],
            'Rank': [1, 2, 2, 3, 3, 4, 1, 1, 2, 4, 1, 2],
            'Year': [2014, 2015, 2014, 2015, 2014, 2015, 2016, 2017, 2016, 2014, 2015, 2017],
            'Points': [876, 789, 863, 673, 741, 812, 756, 788, 694, 701, 804, 690]}
df = pd.DataFrame(ipl_data)

grouped = df.groupby('Year')

for name, group in grouped:
    print(name)
    print(group)

print(grouped.get_group(2014))
grouped = df.groupby('Team')
print(grouped['Points'].agg([np.sum, np.mean, np.std]))

# ...............................Reading a Csv File............................#

data = pd.read_csv("dat.csv")
print(data)