Python CSV Normalizer (normwin.py)
From SnOwy - Ed's Wiki Notebook
TODO: Description.
import sys
import math
#if len(sys.argv) > 1 and sys.argv[1] == "-r": rank = True
#else: rank = False
if len(sys.argv) < 3:
exit("Usage: <input text file> <output text file>")
inhandle = open(sys.argv[1], "r")
outhandle = open(sys.argv[2], "w+")
cardinality = None
lines = []
min_vals = []
max_vals = []
for iline, line in enumerate(inhandle):
line = line.split(',')
if not cardinality:
cardinality = len(line)
elif cardinality != len(line):
exitStr = "Error: Line " + str(iline) + " has " + str(len(line)) +\
" elements, expected " + str(cardinality) + "."
exit(exitStr)
values = []
for val in line:
try:
val = float(val)
except ValueError:
exitStr = "Error: Line " + str(iline) + " contains non-number."
exit(exitStr)
values += [val]
lines += [values]
if iline == 0:
for val in values:
min_vals += [val]
max_vals += [val]
for i, (miv, mav, val) in enumerate(zip(min_vals, max_vals, values)):
if(math.isnan(miv)): min_vals[i] = val
elif val < miv: min_vals[i] = val
if(math.isnan(mav)): max_vals[i] = val
elif val > mav: max_vals[i] = val
for line in lines:
for i, (miv, mav, val) in enumerate(zip(min_vals, max_vals, line)):
line[i] = 0.2 + ((val - miv) / (mav - miv)) * 0.6
for line in lines:
outStr = ""
for ival, val in enumerate(line):
outStr += str(val)
if ival < len(line) -1:
outStr += ","
print >>outhandle, outStr