How to add data fields in a tuple with calculations involved in python

The snippet below works with an old data format however, I am trying to read an updated datasource.txt with an additional data field. I tried regex but cant seem to have it working.

data = {}
with open('datasource.txt') as f:
    for line in f:
        parts = line.split()
        team, a, b, c = parts if len(parts) == 5 else parts[:-1] + ['($0)'] + parts[-1]
        data[team] = tuple(map(sum, zip((int(a), float(b.replace(',', '')), float(c[2:-1].replace(',', ''))), data.get(team, (0, 0, 0)))))

data = {t: (a, b, c) for a, b, c, t in reversed(sorted((a, b, c, t) for t, (a, b, c) in data.items()))}

for team, (a, b, c) in data.items():
    print(f'{team:8} {a:4} {b:,} (${c:,})')

datasource.txt

alpha 1 54,00.01 ABC DSW2S
bravo 3 500,000.00 ACDEF
charlie 1 27,722.29 ($250.45) DGAS-CAS
charlie 10 252,336,733.383 ($492.06) DGAS-CAS
delta 2 11 ($10) SWSDSASS-CCSSW
echo 5 143,299.00 ($101) ACS34S1
echo 8 145,300 ($125.01) ACS34S1
falcon 3 0.1234 DSS2SFS3
falcon 5 9.19 DSS2SFS3
lima 6 45.00181 ($38.9) FGF5GGD-DDD
romeo 12 980 ASDS SSSS SDSD

Expected Output:

echo       13 288,599.0 ($226.01)            ACS34S1
romeo      12 980.0 ($0.0)                   ASDS SSSS SDSD    
charlie    11 252,364,455.67299998 ($742.51) DGAS-CAS
falcon      8 9.3134 ($0.0)                  DSS2SFS3   
lima        6 45.00181 ($38.9)               FGF5GGD-DDD
bravo       3 500,000.0 ($0.0)               ACDEF    
delta       2 11.0 ($10.0)                   SWSDSASS-CCSSW
alpha       1 54,000.01 ($0.0)               ABC DSW2S

Answer

You can do that with pandas.

  • First I have done some preprocessing to get the data to a stable format like converting to int/floats, adding $(0), joining the last column values etc.
  • Then used pandas to groupby and sum up the values.
import pandas as pd

dl = []
with open('text.txt') as f:
    for line in f:
        parts = line.split()
        # Cleaning data here.. Conversions to int/float etc,
        if not parts[3][:2].startswith('($'):
            parts.insert(3,'0')
        if len(parts) > 5:
            temp = ' '.join(parts[4:])
            parts = parts[:4] + [temp]
        parts[1] = int(parts[1])
        parts[2] = float(parts[2].replace(',', ''))
        parts[3] = float(parts[3].strip('($)'))
        
        dl.append(parts)
    
headers = ['col1', 'col2', 'col3', 'col4', 'col5']
df = pd.DataFrame(dl,columns=headers)
df = df.groupby(['col1','col5']).sum().reset_index()
df = df.sort_values('col2',ascending=False)
df['col4'] =  '($' + df['col4'].astype(str) + ')'
df = df[headers]
print(df)

      col1  col2          col3       col4            col5
4     echo    13  2.885990e+05  ($226.01)         ACS34S1
7    romeo    12  9.800000e+02     ($0.0)  ASDS SSSS SDSD
2  charlie    11  2.523645e+08  ($742.51)        DGAS-CAS
5   falcon     8  9.313400e+00     ($0.0)        DSS2SFS3
6     lima     6  4.500181e+01    ($38.9)     FGF5GGD-DDD
1    bravo     3  5.000000e+05     ($0.0)           ACDEF
3    delta     2  1.100000e+01    ($10.0)  SWSDSASS-CCSSW
0    alpha     1  5.400010e+03     ($0.0)       ABC DSW2S