Install the following packages if not yet
- pip install pandas
- pip install SQLAlchemy
- pip install cx_Oracle
- pip install matplotlib
import pandas as pd
import cx_Oracle
import sqlalchemy
from sqlalchemy.exc import SQLAlchemyError
try:
engine = sqlalchemy.create_engine("oracle+cx_oracle://donghua:password@ol8.oci.net/?service_name=pdb1", arraysize=1000)
employees_sql = """SELECT * FROM hr.employees""";
df_employees= pd.read_sql(employees_sql, engine)
departments_sql = """SELECT * FROM hr.departments"""
df_departments = pd.read_sql(departments_sql, engine)
except SQLAlchemyError as e:
print(e)
div>
Transform the pandas data framedf_emp_deprt = df_employees.merge(df_departments)
df_emp_deprt.head(5)
df_emp = df_emp_deprt[['employee_id','job_id','salary','department_name']]
df_salary_by_job_id = df_emp[['job_id','salary']].groupby(['job_id'],as_index=False).mean().rename(columns={'salary':'avg_salary'}
df_salary_by_job_id.head(5)
df_salary_by_job_id_sorted = df_salary_by_job_id.sort_values(['avg_salary'], ascending=False)
df_salary_by_job_id_sorted
ax = df_salary_by_job_id_sorted.plot.bar(x='job_id', y='avg_salary', rot=90, title="Average Salary Per Title")
import matplotlib.pyplot as plt
fig, axs = plt.subplots(figsize=(10, 7))
df_salary_by_job_id_sorted.plot.bar(x='job_id', y='avg_salary', ylim=[0,20000],\
ax=axs, rot=90, legend=True,title="Average Salary Per Title")
axs.set_ylabel('Average Salary')
axs.set_xlabel('Job ID')
#plt.tight_layout()
#plt.show()
# Add series data on top of the bar
for i, v in enumerate(df_salary_by_job_id_sorted['avg_salary']):
axs.text(i, v+300, '{:.0f}'.format(v),ha='center', va='bottom')
fig.savefig("/tmp/df_salary_by_job_id_sorted.png",bbox_inches='tight')
No comments:
Post a Comment