In [1]: import pandas as pd
In [2]: import numpy as np
In [14]: df = pd.DataFrame({'Year':[2010,2010,2011,2011,2012,2012],'Class':['2A','2B','2C','2A','2B','2C']})
In [16]: df
Out[16]:
Class Year
0 2A 2010
1 2B 2010
2 2C 2011
3 2A 2011
4 2B 2012
5 2C 2012
In [19]: df['Score']=np.random.randint(low=40,high=100,size=6)
In [20]: df
Out[20]:
Class Year Score
0 2A 2010 73
1 2B 2010 87
2 2C 2011 97
3 2A 2011 41
4 2B 2012 86
5 2C 2012 81
In [23]: df.set_index(['Class','Year'],inplace=True)
In [24]: df
Out[24]:
Score
Class Year
2A 2010 73
2B 2010 87
2C 2011 97
2A 2011 41
2B 2012 86
2C 2012 81
In [25]: df.transpose()
Out[25]:
Class 2A 2B 2C 2A 2B 2C
Year 2010 2010 2011 2011 2012 2012
Score 73 87 97 41 86 81
In [26]: df
Out[26]:
Score
Class Year
2A 2010 73
2B 2010 87
2C 2011 97
2A 2011 41
2B 2012 86
2C 2012 81
In [27]: df.swaplevel()
Out[27]:
Score
Year Class
2010 2A 73
2B 87
2011 2C 97
2A 41
2012 2B 86
2C 81
In [28]: df.swaplevel().transpose()
Out[28]:
Year 2010 2011 2012
Class 2A 2B 2C 2A 2B 2C
Score 73 87 97 41 86 81
In [29]: df.swapaxes(0,1)
Out[29]:
Class 2A 2B 2C 2A 2B 2C
Year 2010 2010 2011 2011 2012 2012
Score 73 87 97 41 86 81
In [37]: df
Out[37]:
Score
Class Year
2A 2010 73
2B 2010 87
2C 2011 97
2A 2011 41
2B 2012 86
2C 2012 81
In [33]: def mark_grade(score):
...: if score > 90:
...: grade='A'
...: elif score > 75:
...: grade='B'
...: elif score > 60:
...: grade='C'
...: else:
...: grade='F'
...: return grade
...:
...:
In [39]: df['Grade']=df['Score'].apply(mark_grade)
In [40]: df
Out[40]:
Score Grade
Class Year
2A 2010 73 C
2B 2010 87 B
2C 2011 97 A
2A 2011 41 F
2B 2012 86 B
2C 2012 81 B
In [41]: df.stack()
Out[41]:
Class Year
2A 2010 Score 73
Grade C
2B 2010 Score 87
Grade B
2C 2011 Score 97
Grade A
2A 2011 Score 41
Grade F
2B 2012 Score 86
Grade B
2C 2012 Score 81
Grade B
dtype: object
In [44]: df.stack().to_frame()
Out[44]:
0
Class Year
2A 2010 Score 73
Grade C
2B 2010 Score 87
Grade B
2C 2011 Score 97
Grade A
2A 2011 Score 41
Grade F
2B 2012 Score 86
Grade B
2C 2012 Score 81
Grade B
In [58]: s = df.stack().to_frame()
In [59]: s
Out[59]:
0
Class Year
2A 2010 Score 73
Grade C
2B 2010 Score 87
Grade B
2C 2011 Score 97
Grade A
2A 2011 Score 41
Grade F
2B 2012 Score 86
Grade B
2C 2012 Score 81
Grade B
In [67]: s.index.names=['Class', 'Year','Item']
In [68]: s
Out[68]:
0
Class Year Item
2A 2010 Score 73
Grade C
2B 2010 Score 87
Grade B
2C 2011 Score 97
Grade A
2A 2011 Score 41
Grade F
2B 2012 Score 86
Grade B
2C 2012 Score 81
Grade B
In [69]: s.columns=['Value']
In [70]: s
Out[70]:
Value
Class Year Item
2A 2010 Score 73
Grade C
2B 2010 Score 87
Grade B
2C 2011 Score 97
Grade A
2A 2011 Score 41
Grade F
2B 2012 Score 86
Grade B
2C 2012 Score 81
Grade B
In [71]: s.unstack()
Out[71]:
Value
Item Score Grade
Class Year
2A 2010 73 C
2011 41 F
2B 2010 87 B
2012 86 B
2C 2011 97 A
2012 81 B
In [72]: s.unstack().unstack()
Out[72]:
Value
Item Score Grade
Year 2010 2011 2012 2010 2011 2012
Class
2A 73 41 None C F None
2B 87 None 86 B None B
2C None 97 81 None A B
In [74]: s.to_csv('class_stack.csv')
In [75]: ! cat class_stack.csv
Class,Year,Item,Value
2A,2010,Score,73
2A,2010,Grade,C
2B,2010,Score,87
2B,2010,Grade,B
2C,2011,Score,97
2C,2011,Grade,A
2A,2011,Score,41
2A,2011,Grade,F
2B,2012,Score,86
2B,2012,Grade,B
2C,2012,Score,81
2C,2012,Grade,B
In [83]: s.unstack(0)
Out[83]:
Value
Class 2A 2B 2C
Year Item
2010 Score 73 87 None
Grade C B None
2011 Score 41 None 97
Grade F None A
2012 Score None 86 81
Grade None B B
In [84]: s.unstack(1)
Out[84]:
Value
Year 2010 2011 2012
Class Item
2A Score 73 41 None
Grade C F None
2B Score 87 None 86
Grade B None B
2C Score None 97 81
Grade None A B
In [85]: s.unstack(-1)
Out[85]:
Value
Item Score Grade
Class Year
2A 2010 73 C
2011 41 F
2B 2010 87 B
2012 86 B
2C 2011 97 A
2012 81 B
In [86]: s.unstack('Item')
Out[86]:
Value
Item Score Grade
Class Year
2A 2010 73 C
2011 41 F
2B 2010 87 B
2012 86 B
2C 2011 97 A
2012 81 B
In [87]: s.unstack('Year')
Out[87]:
Value
Year 2010 2011 2012
Class Item
2A Score 73 41 None
Grade C F None
2B Score 87 None 86
Grade B None B
2C Score None 97 81
Grade None A B
In [98]: s.unstack(level=-2,fill_value='Missing')
Out[98]:
Value
Year 2010 2011 2012
Class Item
2A Score 73 41 Missing
Grade C F Missing
2B Score 87 Missing 86
Grade B Missing B
2C Score Missing 97 81
Grade Missing A B
In [100]: s.unstack(level=['Year','Class'],fill_value='Missing')
Out[100]:
Value
Year 2010 2011 2012
Class 2A 2B 2C 2A 2B 2C
Item
Score 73 87 97 41 86 81
Grade C B A F B B
In [101]: s.unstack(level=['Year','Class'],fill_value='Missing').to_csv('class2.csv')
In [102]: ! cat class2.csv
,Value,Value,Value,Value,Value,Value
Year,2010,2010,2011,2011,2012,2012
Class,2A,2B,2C,2A,2B,2C
Item,,,,,,
Score,73,87,97,41,86,81
Grade,C,B,A,F,B,B
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment