A code i am trying to develop to give forcast, taking input from various source.
the reason i have pasted the code for easier understand.
i am getting below error. May someone please please help me if i missed something. what i should change to fix the issue. *i am sure cognos_cols is a list
Code:
cognos_cols = 'DataType,Combi,Month,Year,Qty_Val'.split(',') df_cis = pd.read_csv(cis_file, sep=None, encoding = "UTF-16", header=None, engine='python', names = cognos_cols, usecols=[0, 1, 3, 4, 5]) df_volume = pd.read_csv(volume_file, sep=None, encoding = "UTF-16", header=None, engine='python', names = cognos_cols, usecols=[0, 1, 3, 4, 5]) df_orders = pd.read_excel(fs_orders_file, header=0) df_orders.PromDock_InvoiceDate = pd.to_datetime(df_orders.PromDock_InvoiceDate).dt.date df_bom = pd.read_excel(fs_bom_file, header=0) df_bom = df_bom.drop(axis=0, index=0) # drop the first row df_bom = df_bom.drop(columns='IsBR') df_actuals = pd.read_excel(actuals_file, 'Actuals', skiprows=4, header=0) df_actuals.By = pd.to_datetime(df_actuals.By).dt.date df_brazil = pd.read_excel(brazil_file, header=0) df_brazil.By = pd.to_datetime(df_brazil.By).dt.date # F1, F2, F3, ... df_f1 = pd.read_excel(f1_file, 'F1', skiprows=4, header=0) df_f2 = pd.read_excel(f2_file, 'F2', skiprows=4, header=0) df_f3 = pd.read_excel(f3_file, 'F3', skiprows=4, header=0) df_plan19 = pd.read_excel(plan_file, skiprows=4, header=0) #... Lookups DataFrame ... #....................... df_ryo_pop = pd.read_excel(lookups_xls, 'RYOPOP', header=0) # df_divadj = pd.read_excel(lookups_xls, '2018DivAdjust', header=0) df_am = pd.read_excel(lookups_xls, 'AM_names', header=0).drop_duplicates('AM Code') df_custgrp = pd.read_excel(lookups_xls, 'Customer_names', header=0).drop_duplicates('CustName') # df_region = pd.read_excel(lookups_xls, 'CountryRef', header=0).drop_duplicates('Country') df_prodref = pd.read_excel(lookups_xls, 'ProductRef', header=0, skiprows=4) df_prodref = df_prodref.rename(columns = {'S1': 'Seg1', 'S2': 'Seg2', 'S3': 'Seg3', 'S4': 'Seg4' }) df_gamechangers = pd.read_excel(lookups_xls, 'GameChangers', header=0, skiprows=4, usecols=[1,2]) df_exceptions = pd.read_excel(lookups_xls, 'Master', skiprows=4, header=0, usecols = 'Y').drop_duplicates('Logic') df_mfgsitechange = pd.read_excel(lookups_xls, 'MfgSite', header=4) df_machine = pd.read_excel(lookups_xls, sheet_name='MachineRef', header=0, skiprows=4, converters={'MachineID': str}) df_machinemap = pd.read_excel(lookups_xls, sheet_name='MachineMap', header=0, skiprows=4, converters={'MachineID': str}, usecols=[0,1,2,3,4,5]) df_sites = pd.read_excel(lookups_xls, sheet_name='Master', header=0, skiprows=4, usecols="O:P").dropna() df_holidays = pd.read_excel(lookups_xls, sheet_name='Master', header=0, skiprows=4, usecols="Q:R") df_periods = pd.read_excel(lookups_xls, sheet_name='Master', header=0, skiprows=4, usecols="AA:AC") df_periods = df_periods.dropna() # Generate EssBy from Year_Period df_periods['EssBy'] = [get_last_day(get_date(x, y)) for x, y in df_periods.Year_Period.str.split('-').tolist()] df_periods.iloc[:, 1:4] = df_periods.iloc[:, 1:4].apply(pd.to_datetime, errors='coerce') # convert to datetime df_periods.iloc[:, 1:4] = df_periods.iloc[:, 1:4].apply(lambda x: x.dt.date) # convert to datetime.date # Get the start date as the minimum of Year_Period start_date = pd.to_datetime(df_periods.Start.min()).date() years = [start_date.year, start_date.year+1, start_date.year+2, start_date.year+3] #isin requires array # Generate dictionary for mapping periods to Year_Period based on Essentra Calendar df_essdict = df_periods.set_index('Year_Period')['EssBy'].to_dict() def get_essby(df, start_date=start_date): '''Args: df as pd dataframe, <start_date> defaulted from minum of df_periods.Start date Output: df with EssBy column''' df.By = pd.to_datetime(df.By).dt.date # Select only from the minimum start period if given if start_date != None: df = df.loc[df.By >= start_date, :].reset_index(drop=True) df['EssBy'] = [k for x in df.By for i, j, k in zip(df_periods.Start, df_periods.End, df_periods.EssBy) if i <= x <= j] return df # %%time #********************************************************************* # Cognos Cleanup #_____________________________________________________________________ #... Get the Forecast ... #.......................... # Filter Forecast forecastfilter = (((df_cis.DataType == 'Actual / Forecast') | (df_cis.DataType == 'Value GBP')) & (df_cis.Year.isin(years)) & (df_cis.Month.isin(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])) & (df_cis.Combi != 'TOTAL') & (df_cis.Qty_Val != 0)) df0_forecast = df_cis[forecastfilter] # Pivot the values to get Forecast, Month and Year Values df1_forecast = df0_forecast.pivot_table(index = ['Combi', 'Month', 'Year'], columns = 'DataType', values = 'Qty_Val', aggfunc = 'sum') # Reset the index and show the values df1_forecast = df1_forecast.reset_index().rename_axis(None).rename_axis(None, axis=1) # Add new / Rename columns in line with SnOP Base Model df1_forecast['DataType'] = 'Forecast' df1_forecast = df1_forecast.rename(columns = {'Value GBP': 'GBPValue', 'Actual / Forecast': 'Qty'}) # Convert Months to MonthNum to get the last date of the month df1_forecast['MonthNum'] = df1_forecast.Month.apply(lambda x: dict((v, k) for k, v in enumerate(calendar.month_abbr))[x]) df1_forecast['By'] = pd.to_datetime(df1_forecast.Year*10000 + df1_forecast.MonthNum*100+1, format = "%Y%m%d") + pd.tseries.offsets.MonthEnd(1) #... Get the Plan ... #........................ # Filter Budget (Plan) planfilter = (((df_cis.DataType == 'Budget') | (df_cis.DataType == 'Budget Value GBP')) & (df_cis.Year.isin(years)) & (df_cis.Month.isin(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])) & (df_cis.Combi != 'TOTAL') & (df_cis.Qty_Val != 0)) df_plan = df_cis[planfilter] # Pivot to get sum of Plan, Month and Year Values df1_plan = df_plan.pivot_table(index = ['Combi', 'Month', 'Year'], columns = 'DataType', values = 'Qty_Val', aggfunc = 'sum').reset_index().rename_axis(None).rename_axis(None, axis=1) # Set up columns with correct names df1_plan['DataType'] = 'Plan' df1_plan = df1_plan.rename(columns = {'Budget':'Qty', 'Budget Value GBP':'GBPValue' }) # Multiply the budget value with 1,000 df1_plan.GBPValue = df1_plan.GBPValue * 1000 # Convert Months to MonthNum df1_plan['MonthNum'] = df1_plan.Month.apply(lambda x: dict((v, k) for k, v in enumerate(calendar.month_abbr))[x])
The Error is:
I am trying to find out whether it is a problem with code or Input Data, Please share your thoughts.
Many Thanks
Answer
Base on this part of the code where the variable names appear, and your error, are you sure that cognos_cols
is a list?
cognos_cols = 'DataType,Combi,Month,Year,Qty_Val'.split(',') df_cis = pd.read_csv(cis_file, sep=None, encoding = "UTF-16", header=None, engine='python', names = cognos_cols, usecols=[0, 1, 3, 4, 5]) df_volume = pd.read_csv(volume_file, sep=None, encoding = "UTF-16", header=None, engine='python', names = cognos_cols, usecols=[0, 1, 3, 4, 5]) df_ryo_pop = pd.read_excel(lookups_xls, 'RYOPOP', header=0) df_am = pd.read_excel(lookups_xls, 'AM_names', header=0).drop_duplicates('AM Code')