Data Engineer: Hierarchy queries using recursive function in Python

There is no direct function to find out the hierarchy levels in python similar to Oracle connect by clause. We have created recursive function to find the hierarchy queries.

#import the pandas packages
# to read the data from employees
import pandas as pd
df = pd.read_csv(r'Downloads\table.csv')
print(df.head(10))

#declare the varaibels
#write recursive functions
final_list=[]
dfs=[]
def get_parent_child_dtls(manager_id,level,sub_manager_list=[]):
    emp_list2=[]
    if level == 1:
        print(level)
        df1=df.query('MANAGER_ID == @manager_id')[['EMPLOYEE_ID','FIRST_NAME','MANAGER_ID']]
        #print(df.query('MANAGER_ID == @manager_id')[['EMPLOYEE_ID','FIRST_NAME','MANAGER_ID']])
        final_df = pd.DataFrame(df1, columns =['EMPLOYEE_ID','FIRST_NAME','MANAGER_ID'])
        #print(final_df)
        dfs.append(final_df)
        emp_list=[]
        emp_list = list(df1['EMPLOYEE_ID'])
        #print(emp_list)
        level = level + 1
        get_parent_child_dtls(manager_id,level,emp_list)
    elif level > 1:
        #print(level)
        level = level + 1
        employee_ids=''
        for emp in sub_manager_list:
            sub_manager=str(emp)
            #print(df.query('MANAGER_ID == @sub_manager')['EMPLOYEE_ID','FIRST_NAME','MANAGER_ID']])
            #return_df=return_df.append(df.query('MANAGER_ID == @sub_manager')[['EMPLOYEE_ID','FIRST_NAME','MANAGER_ID']])
            #add_managers_df(final_df,df.query('MANAGER_ID == @sub_manager')[['EMPLOYEE_ID','FIRST_NAME','MANAGER_ID']])
            #print(df.query('MANAGER_ID == @sub_manager')[['EMPLOYEE_ID','FIRST_NAME','MANAGER_ID']])
            dfs.append(df.query('MANAGER_ID == @sub_manager')[['EMPLOYEE_ID','FIRST_NAME','MANAGER_ID']])
            employee_ids=list(df.query('MANAGER_ID == @sub_manager')['EMPLOYEE_ID'].values)
            for child_emp in employee_ids:
                emp_list2.append(child_emp)
                print(child_emp)
        print('length---' + str(len(emp_list2)))
        print(emp_list2)
        if len(emp_list2)>0:
            get_parent_child_dtls(manager_id,level,emp_list2)
        else:
             result = pd.concat(dfs)
             #print(result)
             return result

#calling the functions
get_parent_child_dtls('100',1)
result = pd.concat(dfs,ignore_index=True)
print(result)

Data Engineer

Thursday, August 19, 2021

Hierarchy queries using recursive function in Python

No comments:

Post a Comment

PySpark- End-to-End DataFrame With Apache Spark--2

Search This Blog