From 703ab513c01e86558fa5d0a60fe56069e1a88176 Mon Sep 17 00:00:00 2001 From: arcan1s Date: Wed, 10 Dec 2014 06:13:42 +0300 Subject: [PATCH] add dynamic-hb.py --- dynamic-hb/dynamic-hb.py | 109 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 dynamic-hb/dynamic-hb.py diff --git a/dynamic-hb/dynamic-hb.py b/dynamic-hb/dynamic-hb.py new file mode 100644 index 0000000..ceac73a --- /dev/null +++ b/dynamic-hb/dynamic-hb.py @@ -0,0 +1,109 @@ +#!/usr/bin/python + +import argparse +import os + + +def calculate(splited, system): + """function to calculate results""" + matrix = [] + for i in range(system['mols']): + matrix.append(0) + for file in splited: + for aggl in file: + matrix[len(aggl)-1] += 1 + results = {} + while matrix[-1] == 0: + matrix.pop(-1) + for i, item in enumerate(matrix, start = 1): + results[i] = item + summ = 0 + for i in results: + if i == 1: continue + results[i] = i * results[i] / (system['steps'] * system['mols']) + summ += results[i] + results[1] = 1.0 - summ + return results + + +def parse_file(splited): + """function to parse input file""" + for file in splited: + for agl in file: + for i, molecule in enumerate(agl): + agl[i] = [int(molecule.split('=')[0]), [int(conn) for conn in molecule.split('=')[1].split(',') if conn]] + return splited + + +def print_result(output, matrix): + """function to print result to file""" + with open(output, 'w') as out: + out.write("| n | p |\n-----------------\n") + for i in matrix: + out.write(" %7i %7.5f \n" % (i, matrix[i])) + out.write("-----------------\n") + + +def read_file(input_file): + """function to read file from statgen""" + file = open(input_file).read() + if not file.startswith("statgen"): + print("It is not a statgen file") + exit(2) + prepare = file.split("\n") + system = {'steps': 0, 'mols': 0} + while not prepare[-1].startswith("SUMMARY STATISTIC"): + statistic = prepare.pop(-1) + if not statistic.startswith(" 1"): continue + values = [float(item) for item in statistic.split()] + system['steps'] = int(round(values[1] / values[2], 0)) + system['mols'] = int(round(values[2] / values[3], 0)) + prepare.pop(-1) + splited = '\n'.join(prepare).split("FILE")[1:] + for i, item in enumerate(splited): + text = item.split("AGL")[1:] + for j, agl in enumerate(text): + text[j] = [single for single in agl.split("\n")[1:] if single] + text[-1].pop(-1) + splited[i] = text + return splited, system + + +def select_aggl(splited, step): + """function to select agglomerates""" + for i, file in enumerate(splited): + first = (i - step) if (i - step) > 0 else 0 + last = (i + step) if (i + step) < (len(splited) - 1) else (len(splited) - 1) + for j, aggl in enumerate(file): + for k, mol in enumerate(aggl): + table = [] + for other_file in splited[first:last]: + for aggl_other in other_file: + if not mol[0] in [other_single[0] for other_single in aggl_other]: continue + table.extend(mols for other_single in aggl_other for mols in other_single[1] if other_single[0] == mol[0]) + for l, conn in enumerate(mol[1]): + if table.count(conn) == last - first: continue + splited[i][j][k][1].pop(l) + if len(mol[1]) == 0: + splited[i][j].pop(k) + if len(aggl) == 0: + splited[i].pop(j) + return splited + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description = "Sort agglomerates using dynamic criteria") + parser.add_argument('input', help = "file from statgen") + parser.add_argument('-s', '--step', type = int, help = "step to check agglomerates", + default = 1) + parser.add_argument('-o', '--output', help = "output file", default = "dhb-output.dat") + args = parser.parse_args() + + + if not os.path.isfile(args.input): + print("Could not find file %s" % args.input) + exit(1) + splited, system = read_file(args.input) + splited = parse_file(splited) + matrix = calculate(select_aggl(splited, args.step), system) + print_result(args.output, matrix)