Sunday, April 18, 2021

Using Pandas and Mathplotlib with Oracle database

 Install the following packages if not yet

  • pip install pandas
  • pip install SQLAlchemy
  • pip install cx_Oracle
  • pip install matplotlib
import pandas as pd
import cx_Oracle
import sqlalchemy
from sqlalchemy.exc import SQLAlchemyError
try:
   engine = sqlalchemy.create_engine("oracle+cx_oracle://donghua:password@ol8.oci.net/?service_name=pdb1", arraysize=1000)
   employees_sql = """SELECT * FROM hr.employees"""; 
   df_employees= pd.read_sql(employees_sql, engine)
   departments_sql = """SELECT * FROM hr.departments"""
   df_departments = pd.read_sql(departments_sql, engine) 
except SQLAlchemyError as e:
   print(e)div>
Transform the pandas data frame

df_emp_deprt = df_employees.merge(df_departments)
df_emp_deprt.head(5)
df_emp = df_emp_deprt[['employee_id','job_id','salary','department_name']]
df_salary_by_job_id = df_emp[['job_id','salary']].groupby(['job_id'],as_index=False).mean().rename(columns={'salary':'avg_salary'}
df_salary_by_job_id.head(5)
df_salary_by_job_id_sorted = df_salary_by_job_id.sort_values(['avg_salary'], ascending=False)
df_salary_by_job_id_sorted



















ax = df_salary_by_job_id_sorted.plot.bar(x='job_id', y='avg_salary', rot=90, title="Average Salary Per Title")



import matplotlib.pyplot as plt
fig, axs = plt.subplots(figsize=(10, 7)) 
df_salary_by_job_id_sorted.plot.bar(x='job_id', y='avg_salary', ylim=[0,20000],\
     ax=axs, rot=90, legend=True,title="Average Salary Per Title")
axs.set_ylabel('Average Salary')
axs.set_xlabel('Job ID')
#plt.tight_layout()
#plt.show()
# Add series data on top of the bar
for i, v in enumerate(df_salary_by_job_id_sorted['avg_salary']):
    axs.text(i, v+300, '{:.0f}'.format(v),ha='center', va='bottom')
fig.savefig("/tmp/df_salary_by_job_id_sorted.png",bbox_inches='tight')     



No comments:

Post a Comment