I am trying hard to identify Python pandas error in code

A code i am trying to develop to give forcast, taking input from various source.

the reason i have pasted the code for easier understand.

i am getting below error. May someone please please help me if i missed something. what i should change to fix the issue. *i am sure cognos_cols is a list

Code:

cognos_cols = 'DataType,Combi,Month,Year,Qty_Val'.split(',')

df_cis = pd.read_csv(cis_file, sep=None, encoding = "UTF-16", 
                        header=None, engine='python', names = cognos_cols, usecols=[0, 1, 3, 4, 5])
df_volume =  pd.read_csv(volume_file, sep=None, encoding = "UTF-16", 
                        header=None, engine='python', names = cognos_cols, usecols=[0, 1, 3, 4, 5])

df_orders = pd.read_excel(fs_orders_file, header=0)
df_orders.PromDock_InvoiceDate = pd.to_datetime(df_orders.PromDock_InvoiceDate).dt.date

df_bom = pd.read_excel(fs_bom_file, header=0)
df_bom = df_bom.drop(axis=0, index=0) # drop the first row
df_bom = df_bom.drop(columns='IsBR')

df_actuals = pd.read_excel(actuals_file, 'Actuals', skiprows=4, header=0)
df_actuals.By = pd.to_datetime(df_actuals.By).dt.date

df_brazil = pd.read_excel(brazil_file, header=0)
df_brazil.By = pd.to_datetime(df_brazil.By).dt.date

# F1, F2, F3, ...
df_f1 = pd.read_excel(f1_file, 'F1', skiprows=4, header=0)
df_f2 = pd.read_excel(f2_file, 'F2', skiprows=4, header=0)
df_f3 = pd.read_excel(f3_file, 'F3', skiprows=4, header=0)
df_plan19 = pd.read_excel(plan_file, skiprows=4, header=0)

#... Lookups DataFrame ...
#.......................
df_ryo_pop = pd.read_excel(lookups_xls, 'RYOPOP', header=0)
# df_divadj = pd.read_excel(lookups_xls, '2018DivAdjust', header=0)
df_am = pd.read_excel(lookups_xls, 'AM_names', header=0).drop_duplicates('AM Code')
df_custgrp = pd.read_excel(lookups_xls, 'Customer_names', 
                           header=0).drop_duplicates('CustName')

# df_region = pd.read_excel(lookups_xls, 'CountryRef', header=0).drop_duplicates('Country')
df_prodref = pd.read_excel(lookups_xls, 'ProductRef', header=0, skiprows=4)
df_prodref = df_prodref.rename(columns = {'S1': 'Seg1', 
                                            'S2': 'Seg2', 'S3': 'Seg3', 'S4': 'Seg4' })
df_gamechangers = pd.read_excel(lookups_xls, 'GameChangers', header=0, skiprows=4, usecols=[1,2])

df_exceptions = pd.read_excel(lookups_xls, 'Master', skiprows=4, header=0, usecols = 'Y').drop_duplicates('Logic')
df_mfgsitechange = pd.read_excel(lookups_xls, 'MfgSite', header=4)

df_machine = pd.read_excel(lookups_xls, sheet_name='MachineRef', header=0, skiprows=4, 
                           converters={'MachineID': str})
df_machinemap = pd.read_excel(lookups_xls, sheet_name='MachineMap', header=0, skiprows=4, 
                              converters={'MachineID': str}, usecols=[0,1,2,3,4,5])

df_sites = pd.read_excel(lookups_xls, sheet_name='Master', header=0, skiprows=4, usecols="O:P").dropna()
df_holidays = pd.read_excel(lookups_xls, sheet_name='Master', header=0, skiprows=4, usecols="Q:R")

df_periods = pd.read_excel(lookups_xls, sheet_name='Master', header=0, skiprows=4, usecols="AA:AC")
df_periods = df_periods.dropna()

# Generate EssBy from Year_Period
df_periods['EssBy'] = [get_last_day(get_date(x, y)) for x, y in df_periods.Year_Period.str.split('-').tolist()]
df_periods.iloc[:, 1:4] = df_periods.iloc[:, 1:4].apply(pd.to_datetime, errors='coerce') # convert to datetime
df_periods.iloc[:, 1:4] = df_periods.iloc[:, 1:4].apply(lambda x: x.dt.date) # convert to datetime.date

# Get the start date as the minimum of Year_Period
start_date = pd.to_datetime(df_periods.Start.min()).date()

years = [start_date.year, start_date.year+1, start_date.year+2, start_date.year+3] #isin requires array

# Generate dictionary for mapping periods to Year_Period based on Essentra Calendar
df_essdict = df_periods.set_index('Year_Period')['EssBy'].to_dict()

def get_essby(df, start_date=start_date):
    '''Args: 
         df as pd dataframe, <start_date> defaulted from minum of df_periods.Start date
       Output:
         df with EssBy column'''
    
    df.By = pd.to_datetime(df.By).dt.date

    # Select only from the minimum start period if given
    if start_date != None:
        df = df.loc[df.By >= start_date, :].reset_index(drop=True)

    df['EssBy'] = [k for x in df.By 
                           for i, j, k in zip(df_periods.Start, df_periods.End, df_periods.EssBy) 
                           if i <= x <= j]
    return df

# %%time
#*********************************************************************
#                            Cognos Cleanup     
#_____________________________________________________________________

#...   Get the Forecast ...
#..........................

# Filter Forecast
forecastfilter = (((df_cis.DataType == 'Actual / Forecast') | (df_cis.DataType == 'Value GBP')) &
                (df_cis.Year.isin(years)) & 
                (df_cis.Month.isin(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])) & 
                (df_cis.Combi != 'TOTAL') & 
                (df_cis.Qty_Val != 0))

df0_forecast = df_cis[forecastfilter]

# Pivot the values to get Forecast, Month and Year Values
df1_forecast = df0_forecast.pivot_table(index = ['Combi', 'Month', 'Year'],
                           columns = 'DataType',
                           values = 'Qty_Val',
                           aggfunc = 'sum')

# Reset the index and show the values
df1_forecast = df1_forecast.reset_index().rename_axis(None).rename_axis(None, axis=1)

# Add new / Rename columns in line with SnOP Base Model
df1_forecast['DataType'] = 'Forecast'
df1_forecast = df1_forecast.rename(columns = {'Value GBP': 'GBPValue',
                      'Actual / Forecast': 'Qty'})

# Convert Months to MonthNum to get the last date of the month
df1_forecast['MonthNum'] = df1_forecast.Month.apply(lambda x: dict((v, k) for k, v in enumerate(calendar.month_abbr))[x])
df1_forecast['By'] =  pd.to_datetime(df1_forecast.Year*10000 +
                                    df1_forecast.MonthNum*100+1, format = "%Y%m%d") + pd.tseries.offsets.MonthEnd(1)

#...   Get the Plan   ...
#........................

# Filter Budget (Plan)
planfilter = (((df_cis.DataType == 'Budget') | (df_cis.DataType == 'Budget Value GBP')) &
                (df_cis.Year.isin(years)) & 
                (df_cis.Month.isin(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 
                                     'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])) & 
                (df_cis.Combi != 'TOTAL') & 
                (df_cis.Qty_Val != 0))

df_plan = df_cis[planfilter]

# Pivot to get sum of Plan, Month and Year Values
df1_plan = df_plan.pivot_table(index = ['Combi', 'Month', 'Year'],
                           columns = 'DataType',
                           values = 'Qty_Val',
                           aggfunc = 'sum').reset_index().rename_axis(None).rename_axis(None, axis=1)

# Set up columns with correct names
df1_plan['DataType'] = 'Plan'
df1_plan = df1_plan.rename(columns = {'Budget':'Qty', 'Budget Value GBP':'GBPValue' })

# Multiply the budget value with 1,000
df1_plan.GBPValue = df1_plan.GBPValue * 1000

# Convert Months to MonthNum
df1_plan['MonthNum'] = df1_plan.Month.apply(lambda x: dict((v, k) for k, v in enumerate(calendar.month_abbr))[x])

The Error is:

enter image description here

I am trying to find out whether it is a problem with code or Input Data, Please share your thoughts.

Many Thanks

Answer

Base on this part of the code where the variable names appear, and your error, are you sure that cognos_cols is a list?

cognos_cols = 'DataType,Combi,Month,Year,Qty_Val'.split(',')

df_cis = pd.read_csv(cis_file, sep=None, encoding = "UTF-16", 
                        header=None, engine='python', names = cognos_cols, usecols=[0, 1, 3, 4, 5])
df_volume =  pd.read_csv(volume_file, sep=None, encoding = "UTF-16", 
                        header=None, engine='python', names = cognos_cols, usecols=[0, 1, 3, 4, 5])
df_ryo_pop = pd.read_excel(lookups_xls, 'RYOPOP', header=0)
df_am = pd.read_excel(lookups_xls, 'AM_names', header=0).drop_duplicates('AM Code')

Leave a Reply

Your email address will not be published. Required fields are marked *