class Match:
    '''Main class of the match
    Variables
    start = Start of the match
    end = End of the match
    density = A depth map of the distance of the match
    s_start = The start of the match from the source
    s_end = The end of the match from the source
    mean = Find average distance of the match
    source = Source of the Match
    text 1 = Part of the text
    text 2 = Match in other text
    '''
    density = None
    s_start = None
    s_end = None
    source = None
    text1 = None
    text2 = None

    def __init__(self, start, end):
        self.start = start
        self.end = end
    
    def mean(self):
        if not self.density:
            print("Warning: No density specified, Skipping...")
            return
        x = 0
        for y in self.density:
            x += y
        x = x/len(self.density)
        return x
    
    def find_text(self, text1, text2, extra = 5):
        try:
            self.text1 = text1[self.start-extra:self.end+extra]
        except:
            self.text1 = text1[self.start:self.end]
        try:
            self.text2 = text2[self.s_start-extra:self.s_end+extra]
        except:
            self.text2 = text2[self.s_start:self.s_end]
    
    def print(self):
        print(f"Document:\n {self.text1}\n\nSource:\n{self.text2}\n\n\n\n")

def match_elements(matches):
    '''Extract match elements'''
    output = []
    for y in matches:
        match, source, text = y
        output.append([match.dist, text, source, match.start, match.end])
    return output

def source_sort(matches, leng):
    '''Orginize by source'''
    output = [[]for k in range(leng)]
    for x in matches:
        output[x[2]].append(x)
    return output

def check_merges(matches):
    '''Checks merging matches'''
    output = []
    for source in matches:
        try:
            results = [(source[0],0)]
            count = 0
            for match in source[1:]:
                if match[3] > results[-1][0][4]:
                    count += 1
                results.append((match, count))
            output.append(results)
        except IndexError:
            output.append(None)
    return output

def print_matches(matches, searchurls, ex = []):
    '''Prints Matches'''
    count = 0
    exclude = []
    for source in matches:
        try:
            if count not in ex:
                print(len(source),"match (es) from", searchurls[count])
                exclude.append(count)
        except:
            pass
        count += 1
    return exclude

def separate_matches(matches):
    '''Separates matches based on last classification'''
    output = []
    for source in matches:
        try:
            ts = [[] for x in range(source[-1][-1] + 1)]
            for match in source:
                ts[match[1]].append(match[0])
            output.append(ts)
        except:
            output.append(None)
    return output


def join_matches(matches):
    '''Joins each array of the same match, putting the texts together and averaging its distance'''
    output = []
    for source in matches:
        ns = []
        try:
            for match in source:
                tex = match[0][1][:-1]
                added = 0
                for m in match:
                    added += m[0]
                    tex.append(m[1][-1])
                ns.append([added/len(match), tex, match[0][3],match[-1][4]])
        except:
            ns = None
        output.append(ns)
    return output

def de_preprocess(matches, dic1, dic2, dist):
    '''Takes the preprocessed match and makes it normal, and takes both dictionaries'''
    output=[]
    for i, cluster in enumerate(matches):
        newcluster = []
        for x in enumerate(cluster):
                newcluster.append(dic1[x[0]])
        start = newcluster[0]
        end = newcluster[-1]
        mstart = dic2[cluster[0][1]]
        mend = dic2[cluster[-1][1]]
        dens = [None]*(1+end-start) #Where None means not yet known
        for j,x in enumerate(newcluster):
                dens[x-start] = dist[i][j]
        output.append((start, end, mstart, mend, dens))
    return output

def bilinear(dens):
    '''Linearly aliases the match accuracy for matches between existing ones'''
    for x in dens:
        points = [i for i, y in enumerate(x) if y != None]
        pairs = []
        for y in range(0,len(points)-1):
            pairs.append((points[y], points[y+1]))
        for y in pairs:
            interval = (x[y[1]]-x[y[0]]) / (y[1]- y[0])
            added = x[y[0]]
            if y[1]-y[0] > 1:
                for z in range(1, y[1]-y[0]):
                    added += interval
                    x[y[0]+z] = round(added)
    return dens

def shingle_final(input, dens):
    '''Makes the matches into the Match class'''
    output = []
    for x in input:
        y = Match(start = x[0], end = x[1])
        y.s_start = x[2]
        y.density = dens
        y.s_end = x[3]
        output.append(y)
    return output