script that compare effectiveness of two different algorithms that calculate the genus of an embedded graph
This commit is contained in:
parent
ca555c5709
commit
be6a802338
364
compare_algorithms.py
Normal file
364
compare_algorithms.py
Normal file
@ -0,0 +1,364 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
# from collections import defaultdict
|
||||||
|
import ntpath
|
||||||
|
# import time
|
||||||
|
from timeit import default_timer
|
||||||
|
from pycallgraph import PyCallGraph
|
||||||
|
from pycallgraph.output import GraphvizOutput
|
||||||
|
|
||||||
|
|
||||||
|
def time_usage(func):
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
|
best_times = []
|
||||||
|
for _ in range(3):
|
||||||
|
beg_ts = default_timer()
|
||||||
|
retval = func(*args, **kwargs)
|
||||||
|
end_ts = default_timer()
|
||||||
|
best_times.append(end_ts - beg_ts)
|
||||||
|
best_times.sort()
|
||||||
|
best_times = best_times[:3:]
|
||||||
|
print(func.__name__, end=" ")
|
||||||
|
print("time:", sum(best_times)/3)
|
||||||
|
return retval
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
try:
|
||||||
|
profile
|
||||||
|
except NameError:
|
||||||
|
profile = lambda x: x
|
||||||
|
|
||||||
|
# import cProfile
|
||||||
|
# from memory_profiler import profile
|
||||||
|
|
||||||
|
|
||||||
|
class MySettings(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.new_version = True
|
||||||
|
self.old_version = True
|
||||||
|
# self.old_version = False
|
||||||
|
# self.new_version = False
|
||||||
|
# self.dir_from = os.path.join(os.getcwd(), "../mapsTEST/")
|
||||||
|
self.dir_from = os.path.join(os.getcwd(), "../mapsSMOG/")
|
||||||
|
# self.dir_from = os.path.join(os.getcwd(), "../mapsCHIMERA/")
|
||||||
|
# self.dir_to = os.path.join(os.getcwd(), "../test/")
|
||||||
|
self.dir_to = os.path.join(os.getcwd(), "../dataGENUS/")
|
||||||
|
self.overwrite = True
|
||||||
|
# self.overwrite = False
|
||||||
|
self.save_ext = ".genus"
|
||||||
|
self.read_ext = ".smog"
|
||||||
|
# self.read_ext = ".test"
|
||||||
|
# self.read_ext = ".chimera"
|
||||||
|
# self.dir_rap = os.path.join(os.getcwd(), "./outGENUS/")
|
||||||
|
# self.log_rap = os.path.join(self.dir_rap, "log.out")
|
||||||
|
# self.log_gen = os.path.join(self.dir_rap, "genus.out")
|
||||||
|
# self.misMaps = os.path.join(self.dir_rap, "missingMaps.txt")
|
||||||
|
|
||||||
|
|
||||||
|
class MyProtein(object):
|
||||||
|
def __init__(self, read_path, save_path, old_version=False):
|
||||||
|
self.save_path = save_path
|
||||||
|
self.genus_change = []
|
||||||
|
# self.contacts_dict = dict(int)
|
||||||
|
# self.contacts_dict = defaultdict(int)
|
||||||
|
self.contacts_dict = {}
|
||||||
|
self.old_version = old_version
|
||||||
|
try:
|
||||||
|
# original_map = np.loadtxt(read_path, int)
|
||||||
|
original_map = pd.read_csv(read_path, skip_blank_lines=True,
|
||||||
|
header=None, delim_whitespace=True)
|
||||||
|
original_map = np.array(original_map)
|
||||||
|
except ValueError:
|
||||||
|
# print("ValueError while reading the file %s.\n" % (read_path))
|
||||||
|
self.original_map = np.array([])
|
||||||
|
return None
|
||||||
|
|
||||||
|
original_map.sort()
|
||||||
|
idx = np.lexsort(original_map.T) # T means transpose
|
||||||
|
original_map = np.take(original_map, idx, axis=0)
|
||||||
|
self.maxelement = np.amax(original_map)
|
||||||
|
row_mask = np.append(True, np.diff(original_map, axis=0).any(axis=1))
|
||||||
|
original_map = original_map[row_mask]
|
||||||
|
# skip contacts between neighbour vertices
|
||||||
|
row_mask = (original_map[:, 1] - original_map[:, 0] > 1)
|
||||||
|
self.original_map = original_map[row_mask]
|
||||||
|
self.modified_map = np.copy(self.original_map)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# @profile
|
||||||
|
def solve_bifurcations(self, modified_map, axis=0):
|
||||||
|
if axis:
|
||||||
|
frequency = np.unique(modified_map, return_counts=True)
|
||||||
|
else:
|
||||||
|
frequency = np.unique(modified_map[:, 0], return_counts=True)
|
||||||
|
frequency = np.column_stack((frequency[0], frequency[1]))
|
||||||
|
frequency = frequency[frequency[:, 1] > 1]
|
||||||
|
for row in frequency[::-1]:
|
||||||
|
to_add = row[1] - 1
|
||||||
|
np.putmask(modified_map, modified_map > row[0],
|
||||||
|
modified_map + to_add)
|
||||||
|
if axis:
|
||||||
|
duplicate = np.where(modified_map[:, 0] == row[0])
|
||||||
|
if duplicate[0]:
|
||||||
|
modified_map[duplicate[0][0]][0] += to_add
|
||||||
|
to_add -= 1
|
||||||
|
|
||||||
|
duplicate = np.where(modified_map[:, axis] == row[0])
|
||||||
|
for dup in duplicate[0]:
|
||||||
|
modified_map[dup][axis] += to_add
|
||||||
|
to_add -= 1
|
||||||
|
if not axis:
|
||||||
|
self.solve_bifurcations(modified_map, 1)
|
||||||
|
|
||||||
|
def simplify_map(self):
|
||||||
|
self.solve_bifurcations(self.modified_map)
|
||||||
|
self.modified_map = self.remove_unused(self.modified_map)
|
||||||
|
ind = np.lexsort((self.modified_map[:, 0], self.modified_map[:, 1]))
|
||||||
|
self.original_map = self.original_map[ind]
|
||||||
|
self.modified_map = self.modified_map[ind]
|
||||||
|
|
||||||
|
def remove_unused(self, in_map):
|
||||||
|
ind = np.argsort(np.argsort(in_map.flatten()))
|
||||||
|
m_map = np.array(range(in_map.size))[ind]
|
||||||
|
m_map.shape = (-1, 2)
|
||||||
|
return m_map
|
||||||
|
|
||||||
|
@profile
|
||||||
|
# @time_usage
|
||||||
|
def old_genus_function(self):
|
||||||
|
genus_change = [] # genus value along a chain
|
||||||
|
chain_positions = []
|
||||||
|
length = 1
|
||||||
|
genus = 0
|
||||||
|
for num in range(self.maxelement):
|
||||||
|
row_mask = (self.original_map[:, 1] <= num + 1)
|
||||||
|
partial_map = self.modified_map[row_mask]
|
||||||
|
if partial_map.shape[0] > length:
|
||||||
|
partial_map = self.remove_unused(partial_map)
|
||||||
|
genus = self.old_get_genus(partial_map)
|
||||||
|
length = partial_map.shape[0]
|
||||||
|
genus_change.append(genus)
|
||||||
|
chain_positions.append(num + 1)
|
||||||
|
return np.column_stack((chain_positions, genus_change))
|
||||||
|
|
||||||
|
# ######################## GENUS ALONG CHAIN ##############
|
||||||
|
@profile
|
||||||
|
def old_get_genus(self, partial_map):
|
||||||
|
remaining_chain = np.ones(4 * partial_map.shape[0], dtype=np.int)
|
||||||
|
remaining_chain.shape = (-1, 2, 2)
|
||||||
|
boundary_components = 0
|
||||||
|
p_max = np.amax(partial_map)
|
||||||
|
while remaining_chain.any():
|
||||||
|
boundary_components += 1
|
||||||
|
x, y, z = np.unravel_index(np.argmax(remaining_chain == 1),
|
||||||
|
remaining_chain.shape)
|
||||||
|
old_x, old_y = x, y
|
||||||
|
while True:
|
||||||
|
remaining_chain[x][y][z] = 0
|
||||||
|
remaining_chain[x][1-y][1-z] = 0
|
||||||
|
value = partial_map[x][1-y]
|
||||||
|
value += 2*z - 1
|
||||||
|
value %= (p_max + 1)
|
||||||
|
x, y = np.unravel_index(np.argmax(partial_map == value),
|
||||||
|
partial_map.shape)
|
||||||
|
if x == old_x and y == old_y:
|
||||||
|
break
|
||||||
|
|
||||||
|
genus = int((1 + partial_map.shape[0] - boundary_components)/2)
|
||||||
|
return genus
|
||||||
|
|
||||||
|
# @time_usage
|
||||||
|
@profile
|
||||||
|
def get_genus_function(self):
|
||||||
|
genus_change = [] # genus value along a chain
|
||||||
|
chain_positions = []
|
||||||
|
genus = 0 # current genus value
|
||||||
|
old_position = 0 # needed for vertices with bifurcations
|
||||||
|
# get_genus_change() is recalled for each row in modified_map.
|
||||||
|
# If a result is positive, the current genus value increases by 1.
|
||||||
|
# The current position in chain is taken from original_map.
|
||||||
|
for row_id in range(self.modified_map.shape[0]):
|
||||||
|
new_position = self.original_map[row_id][1]
|
||||||
|
for i in range(new_position - old_position):
|
||||||
|
chain_positions.append(old_position + i + 1)
|
||||||
|
genus_change.append(genus)
|
||||||
|
genus += self.get_genus_change(row_id)
|
||||||
|
genus_change[new_position-1] = genus
|
||||||
|
old_position = new_position
|
||||||
|
for i in range(self.maxelement - old_position):
|
||||||
|
chain_positions.append(old_position + i + 1)
|
||||||
|
genus_change.append(genus)
|
||||||
|
# Genus function is calculated and two lists are saved into a table:
|
||||||
|
return np.column_stack((chain_positions, genus_change))
|
||||||
|
|
||||||
|
|
||||||
|
# ####### GENUS CHANGE FOR AN EDGE ######################
|
||||||
|
@profile
|
||||||
|
def get_genus_change(self, row_id):
|
||||||
|
# updating dictionary
|
||||||
|
last_end = self.modified_map[row_id][1] # end of added edge
|
||||||
|
last_start = self.modified_map[row_id][0] # start of added edge
|
||||||
|
self.contacts_dict[last_end] = last_start
|
||||||
|
self.contacts_dict[last_start] = last_end
|
||||||
|
# Each step in the following loop consists of two phases:
|
||||||
|
# "footsteps" along vertices without "extra-edges" and
|
||||||
|
# a "jump" along an edge if such one is found in the dictionary
|
||||||
|
current = last_start + 1
|
||||||
|
# start by "footstep" from the vertex last_start
|
||||||
|
path_begin = last_end # used for memoization
|
||||||
|
empty_path = False
|
||||||
|
# used for memoization - there is no extra
|
||||||
|
while current != last_start:
|
||||||
|
if current not in self.contacts_dict:
|
||||||
|
# a vertex without extra edge
|
||||||
|
if not empty_path:
|
||||||
|
# used for memoization - memoization should be finalized
|
||||||
|
self.contacts_dict[path_begin] = current - 1
|
||||||
|
# the dictionary is updated - in next recalls of
|
||||||
|
# the function new "shortcut"
|
||||||
|
# instead of full walk will be used
|
||||||
|
empty_path = True
|
||||||
|
current += 1 # "footstep"
|
||||||
|
continue
|
||||||
|
if current == last_end:
|
||||||
|
# STOP - the new edge has two boundaries
|
||||||
|
return 0 # genus doesn't change
|
||||||
|
if empty_path: # used for memoization
|
||||||
|
# Set "shortcut" begin (path_begin) if
|
||||||
|
# the last one was just erased.
|
||||||
|
path_begin = current
|
||||||
|
empty_path = False
|
||||||
|
current = self.contacts_dict[current] + 1
|
||||||
|
# a "jump" (along edge from dictionary)
|
||||||
|
# and a "footstep" (+1)
|
||||||
|
return 1
|
||||||
|
# the new edge has only one boundary - genus increases by one
|
||||||
|
|
||||||
|
#############################################################
|
||||||
|
|
||||||
|
# @profile
|
||||||
|
def save(self):
|
||||||
|
np.savetxt(self.save_path, self.genus_change, fmt='%i')
|
||||||
|
|
||||||
|
# @profile
|
||||||
|
def calculate(self):
|
||||||
|
if self.original_map.shape[0] < 2:
|
||||||
|
lst = np.column_stack((list(range(1, self.maxelement + 1)),
|
||||||
|
self.maxelement * [0]))
|
||||||
|
self.genus_change = lst
|
||||||
|
else:
|
||||||
|
self.simplify_map()
|
||||||
|
# print("**************** Genus change ***********")
|
||||||
|
# print(self.save_path)
|
||||||
|
# print(self.genus_change)
|
||||||
|
if self.old_version:
|
||||||
|
self.genus_change = self.old_genus_function()
|
||||||
|
else:
|
||||||
|
self.genus_change = self.get_genus_function()
|
||||||
|
# if self.genus_change.shape[0]:
|
||||||
|
# print(
|
||||||
|
# self.genus_change[self.genus_change.shape[0] - 1][1], "\t",
|
||||||
|
# self.genus_change[self.genus_change.shape[0] - 1][0], "\t",
|
||||||
|
# ntpath.basename(self.save_path)
|
||||||
|
# )
|
||||||
|
# else:
|
||||||
|
# print(self.genus_change, "\t", ntpath.basename(self.save_path))
|
||||||
|
|
||||||
|
|
||||||
|
@time_usage
|
||||||
|
def new_version(read_path, save_path):
|
||||||
|
with PyCallGraph(output=GraphvizOutput(), ):
|
||||||
|
my_protein = MyProtein(read_path, save_path)
|
||||||
|
if not my_protein.original_map.any():
|
||||||
|
# if my_protein.genus_change:
|
||||||
|
# print("Genus zero")
|
||||||
|
# else:
|
||||||
|
# # print(my_protein.genus_change, "\t",
|
||||||
|
# # ntpath.basename(my_protein.save_path))
|
||||||
|
# print("Error: %s. Empty map." % read_path)
|
||||||
|
return None
|
||||||
|
my_protein.calculate()
|
||||||
|
# my_protein.save()
|
||||||
|
return my_protein.genus_change
|
||||||
|
|
||||||
|
|
||||||
|
@time_usage
|
||||||
|
def old_version(read_path, save_path):
|
||||||
|
my_protein = MyProtein(read_path, save_path, old_version=True)
|
||||||
|
if not my_protein.original_map.any():
|
||||||
|
# print(old_protein.genus_change, "\t",
|
||||||
|
# ntpath.basename(old_protein.save_path))
|
||||||
|
# print("Error: %s. Empty map." % read_path)
|
||||||
|
return None
|
||||||
|
my_protein.calculate()
|
||||||
|
# my_protein.save()
|
||||||
|
return my_protein.genus_change
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
|
settings = MySettings()
|
||||||
|
|
||||||
|
if not os.path.isdir(settings.dir_from):
|
||||||
|
print ("""Error: {}: no such directory. Please change settings in {}"""
|
||||||
|
.format(settings.dir_from, __file__))
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
os.makedirs(settings.dir_to, exist_ok=True)
|
||||||
|
# os.makedirs(settings.dir_rap, exist_ok=True)
|
||||||
|
if len(argv) != 2:
|
||||||
|
my_list = []
|
||||||
|
# print ("Usage: $ %s 'chain_list.txt'" % __file__)
|
||||||
|
# exit()
|
||||||
|
settings.dir_from = os.path.join(os.getcwd(), "../mapsTEST/")
|
||||||
|
settings.dir_to = os.path.join(os.getcwd(), "../test/")
|
||||||
|
settings.read_ext = ".test"
|
||||||
|
all_files = list(filter(lambda x: x.endswith(settings.read_ext),
|
||||||
|
os.listdir(settings.dir_from)))
|
||||||
|
for f in all_files[:5]:
|
||||||
|
# print(f)
|
||||||
|
idn = ntpath.basename(f).split(".")[0]
|
||||||
|
my_list.append(idn)
|
||||||
|
else:
|
||||||
|
chain_list = argv[1]
|
||||||
|
if not os.path.isfile(chain_list):
|
||||||
|
print ("Error: {}: no such file.".format(chain_list))
|
||||||
|
sys.exit()
|
||||||
|
with open(chain_list, "r") as f:
|
||||||
|
my_list = f.read().splitlines()
|
||||||
|
# misMaps = ""
|
||||||
|
for my_id in my_list:
|
||||||
|
# pdb_id = line[:4].upper()
|
||||||
|
# chain = line[4:].strip()
|
||||||
|
# my_id = pdb_id + chain
|
||||||
|
save_path = os.path.join(settings.dir_to, my_id + settings.save_ext)
|
||||||
|
if os.path.isfile(save_path) and not settings.overwrite:
|
||||||
|
print("File ", save_path, " already exists.")
|
||||||
|
continue
|
||||||
|
read_path = os.path.join(settings.dir_from, my_id + settings.read_ext)
|
||||||
|
if not os.path.isfile(read_path):
|
||||||
|
# print("Error: %s. No such file." % read_path)
|
||||||
|
# misMaps += my_id + "\n" # to be changed
|
||||||
|
continue
|
||||||
|
print(read_path)
|
||||||
|
print("Chain: " + my_id)
|
||||||
|
if settings.new_version:
|
||||||
|
genus_change = new_version(read_path, save_path)
|
||||||
|
if genus_change is None:
|
||||||
|
continue
|
||||||
|
if settings.old_version:
|
||||||
|
old_genus_change = old_version(read_path, save_path)
|
||||||
|
if settings.new_version:
|
||||||
|
if np.array_equal(old_genus_change, genus_change):
|
||||||
|
print("OK")
|
||||||
|
print(genus_change[-1])
|
||||||
|
else:
|
||||||
|
print("\n!!!\n!!!Results are different!!!\n!!!")
|
||||||
|
# with open(settings.misMaps, 'w') as f:
|
||||||
|
# f.write(misMaps)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main(sys.argv)
|
Loading…
Reference in New Issue
Block a user