#!/usr/bin/python
from sys import argv, exit
from subprocess import Popen, PIPE
from collections import defaultdict
from os import path, remove

# argv[0]: script itself
# argv[1]: input.txt
# argv[2]: C|G|A|D
# argv[3]: output.txt
# argv[4]: binary_path

def cas_offinder_bulge(argv1, argv2, argv3, argv4):
    if not argv2 in ['C', 'G', 'A', 'D']:
        print("")
        print("Copyright (c) 2015 Jeongbin Park and Sangsu Bae")
        print("")
        print("Usage: cas-offinder-bulge {input_file} {C|G|A|D} {output_file} {binary_path}")
        print("(C: using CPUs, G: using GPUs, A: using accelerators, D: dry-run")
        print("")
        print("Example input file (DNA bulge 2, RNA bulge 1):")
        print("/var/chromosomes/human_hg19")
        print("NNNNNNNNNNNNNNNNNNNNNRG 2 1")
        print("GGCCGACCTGTCGCTGACGCNNN 5")
        print("CGCCAGCGTCAGCGACAGGTNNN 5")
        print("ACGGCGCCAGCGTCAGCGACNNN 5")
        print("GTCGCTGACGCTGGCGCCGTNNN 5")
        print("")
        p = Popen([argv4], stderr=PIPE, stdout=PIPE)
        flag = False
        for line in p.stdout:
            line = line.strip()
            if line == "Available device list:":
                flag = True
            if flag:
                print(line)
    else:
        fnhead = path.join(path.dirname(argv1), path.splitext(path.basename(argv1))[0])
        with open(argv1) as f:
            chrom_path = f.readline()
            pattern, bulge_dna, bulge_rna = f.readline().strip().split()
            isreversed = False
            for i in range(int(len(pattern)/2)):
                if pattern[i] == 'N' and pattern[len(pattern)-i-1] != 'N':
                    isreversed = False
                    break
                elif pattern[i] != 'N' and pattern[len(pattern)-i-1] == 'N':
                    isreversed = True
                    break
            bulge_dna, bulge_rna = int(bulge_dna), int(bulge_rna)
            targets = [line.strip().split() for line in f]
            rnabulge_dic = defaultdict(lambda: [])
            bg_tgts = defaultdict(lambda: set())
            for raw_target, mismatch in targets:
                if isreversed:
                    target = raw_target.lstrip('N')
                    len_pam = len(raw_target) - len(target)
                    bg_tgts['N' * len_pam + target + 'N' * bulge_dna].add(mismatch)
                    for bulge_size in range(1, bulge_dna+1):
                        for i in range(1, len(target)):
                            bg_tgt = 'N' * len_pam + target[:i] + 'N' * bulge_size + target[i:] + 'N' * (bulge_dna - bulge_size)
                            bg_tgts[bg_tgt].add(mismatch)
                    for bulge_size in range(1, bulge_rna+1):
                        for i in range(1, len(target)-bulge_size):
                            bg_tgt = 'N' * len_pam + target[:i] + target[i+bulge_size:] + 'N' * (bulge_dna + bulge_size)
                            bg_tgts[bg_tgt].add(mismatch)
                            rnabulge_dic[bg_tgt].append( (i, int(mismatch), target[i:i+bulge_size]) )
                else:
                    target = raw_target.rstrip('N')
                    len_pam = len(raw_target) - len(target)
                    bg_tgts['N' * bulge_dna + target + 'N' * len_pam].add(mismatch)
                    for bulge_size in range(1, bulge_dna+1):
                        for i in range(1, len(target)):
                            bg_tgt = 'N' * (bulge_dna - bulge_size) + target[:i] + 'N' * bulge_size + target[i:] + 'N' * len_pam
                            bg_tgts[bg_tgt].add(mismatch)
                    for bulge_size in range(1, bulge_rna+1):
                        for i in range(1, len(target)-bulge_size):
                            bg_tgt = 'N' * (bulge_dna + bulge_size) + target[:i] + target[i+bulge_size:] + 'N' * len_pam
                            bg_tgts[bg_tgt].add(mismatch)
                            rnabulge_dic[bg_tgt].append( (i, int(mismatch), target[i:i+bulge_size]) )
            if isreversed:
                seq_pam = pattern[:len_pam]
            else:
                seq_pam = pattern[-len_pam:]

        with open(fnhead + '_bulge.txt', 'w') as f:
            f.write(chrom_path)
            if isreversed:
                f.write(pattern + bulge_dna*'N' + '\n')
            else:
                f.write(bulge_dna*'N' + pattern + '\n')
            for tgt, mismatch in bg_tgts.items():
                f.write(tgt + ' ' + str(max(mismatch)) + '\n')
        print("Created temporary file (%s)."%(fnhead+'_bulge.txt'))
        if argv2 == 'D':
            print(bg_tgts.items())
            print(rnabulge_dic.items())
            return
        outfn = fnhead+'_bulgeout.txt'
        print("Running Cas-OFFinder (output file: %s)..."%outfn)
        p = Popen([argv4, fnhead+'_bulge.txt', argv2, outfn])
        ret = p.wait()
        if ret != 0:
            print ("Cas-OFFinder process was interrupted!")
            exit(ret)

        print("Processing output file...")
        with open(outfn) as fi, open(argv3, 'w') as fo:
            fo.write('#Bulge type\tcrRNA\tDNA\tChromosome\tPosition\tDirection\tMismatches\tBulge Size\n')
            for line in fi:
                entries = line.strip().split('\t')
                res = []
                ncnt = 0
                if isreversed:
                    for c in entries[0][::-1]:
                        if c == 'N':
                            ncnt += 1
                        else:
                            break
                    if ncnt == 0:
                        ncnt = -len(entries[0])
                else:
                    for c in entries[0]:
                        if c == 'N':
                            ncnt += 1
                        else:
                            break
                if entries[0] in rnabulge_dic:
                    for pos, query_mismatch, seq in rnabulge_dic[entries[0]]:
                        if isreversed:
                            tgt = (seq_pam + entries[0][len_pam:len_pam+pos] + seq + entries[0][len_pam+pos:-ncnt], entries[3][:len_pam+pos] + '-'*len(seq) + entries[3][len_pam+pos:-ncnt])
                        else:
                            tgt = (entries[0][ncnt:ncnt+pos] + seq + entries[0][ncnt+pos:-len_pam] + seq_pam, entries[3][ncnt:ncnt+pos] + '-'*len(seq) + entries[3][ncnt+pos:])
                        if query_mismatch >= int(entries[5]):
                            fo.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\n'.format('RNA', tgt[0], tgt[1], entries[1], int(entries[2]) + (ncnt if (not isreversed and entries[4] == "+") or (isreversed and ncnt > 0 and entries[4] == "-") else 0), entries[4], int(entries[5]), len(seq)))
                else:
                    bulge = 0
                    if isreversed:
                        for c in entries[0][:-ncnt][len_pam:]:
                            if c == 'N':
                                bulge += 1
                            elif bulge != 0:
                                break
                        tgt = (seq_pam + entries[0][:-ncnt][len_pam:].replace('N', '-'), entries[3][:-ncnt]) 
                    else:
                        for c in entries[0][ncnt:][:-len_pam]:
                            if c == 'N':
                                bulge += 1
                            elif bulge != 0:
                                break
                        tgt = (entries[0][ncnt:][:-len_pam].replace('N', '-') + seq_pam, entries[3][ncnt:]) 
                    fo.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\n'.format('X' if bulge == 0 else 'DNA', tgt[0], tgt[1], entries[1], int(entries[2]) + (ncnt if (not isreversed and entries[4] == "+") or (isreversed and ncnt > 0 and entries[4] == "-") else 0), entries[4], int(entries[5]), bulge))

        remove(fnhead + '_bulge.txt')
        print("Done!")

# if __name__ == '__main__':
#     main()
