Python CSV Normalizer (normwin.py)

From SnOwy - Ed's Wiki Notebook

Jump to: navigation, search

TODO: Description.

import sys
import math

#if len(sys.argv) > 1 and sys.argv[1] == "-r": rank = True
#else: rank = False

if len(sys.argv) < 3:
	exit("Usage: <input text file> <output text file>")

inhandle = open(sys.argv[1], "r")
outhandle = open(sys.argv[2], "w+")

cardinality = None
lines = []
min_vals = []
max_vals = []

for iline, line in enumerate(inhandle):
	line = line.split(',')

	if not cardinality:
		cardinality = len(line)
	elif cardinality != len(line):
		exitStr = "Error: Line " + str(iline) + " has " + str(len(line)) +\
			" elements, expected " + str(cardinality) + "."
		exit(exitStr)

	values = []
	for val in line:
		try:
			val = float(val)
		except ValueError:
			exitStr = "Error: Line " + str(iline) + " contains non-number."
			exit(exitStr)
		values += [val]

	lines += [values]
	
	if iline == 0:
		for val in values:
			min_vals += [val]
			max_vals += [val]
	
	for i, (miv, mav, val) in enumerate(zip(min_vals, max_vals, values)):
		if(math.isnan(miv)): min_vals[i] = val
		elif val < miv: min_vals[i] = val
		if(math.isnan(mav)): max_vals[i] = val
		elif val > mav: max_vals[i] = val
		
for line in lines:
	for i, (miv, mav, val) in enumerate(zip(min_vals, max_vals, line)):
		line[i] = 0.2 + ((val - miv) / (mav - miv)) * 0.6

for line in lines:
	outStr = ""
	for ival, val in enumerate(line):
		outStr += str(val)
		if ival < len(line) -1:
			outStr += ","
	print >>outhandle, outStr
Personal tools
Namespaces
Variants
Actions
Navigation
Toolbox