#!/usr/bin/env python # file pick_oligos.py # Copyright (C) 2005 Regents of the University of Colorado # Contact: # James Smagala # smagala@colorado.edu # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """Takes csv oligos from find_oligos, scores them, then picks the best Revision History 050615 James Smagala: File creation 050824 JS: score_oligos combined with this file, use optparse """ from sys import exit from optparse import OptionParser def score(values): """Generate a score for an oligo pair""" flag = int(values[7]) E1 = float(values[5]) E2 = float(values[6]) length = float(values[3]) T1 = float(values[17]) T2 = float(values[21]) # start of 'bad' entropy values e_threshold = 0.1 # penalties for bad situations flag_multiplier = 10 e_multiplier = 10 e_penalty = 15 worst_case_penalty = 20 # S is the total score for this oligo pair S = flag*flag_multiplier + 10*(E1+E2) + 1/length + 1/T1 + 1/T2 if E1 > e_threshold: S += e_penalty if E2 > e_threshold: S += e_penalty if E1 > e_threshold and E2 > e_threshold and flag: S += worst_case_penalty return S def pick_and_sort(node): """Pick the lowest entropy non-overlapping oligos and return them sorted.""" # DSU node = [(record[13],record) for record in node] node.sort() keep = [] while node: keep.append(node[0][1]) # if the start or end fall in the range of the kept oligo, discard node = filter(lambda x:int(x[1][1]) > int(keep[-1][2]) or\ int(x[1][2]) < int(keep[-1][1]),node) return keep def print_node(node,outfile): """Write out the sorted values for a node.""" for record in node: print >>outfile, "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" %\ tuple(record) def parseCommandLine(): """Get and check command line options and arguments.""" usage = "usage: %prog SCORE_FILE PICK_FILE INFILE" version = "%prog 0.1" description = "SCORE and PICK oligos in INFILE" parser = OptionParser(usage=usage,version=version,description=description) options, args = parser.parse_args() # Make sure we've got a single input file try: infile = ''.join(args[2].split('"')) pick_file = ''.join(args[1].split('"')) score_file = ''.join(args[0].split('"')) except IndexError: parser.error("Missing file") return infile, score_file, pick_file def main(): infile, score_file, pick_file = parseCommandLine() # Attempt to open files for IO try: infile = open(infile,'r') outfile = open(score_file,'w') except IOError,err: print err exit(1) # Score the oligos and write out the file print >>outfile, "name,start,end,length,,entropy1,entropy2,flag,,# seq exist,# seq total,# cons regs,,score,,5' seq, 3' seq,,5' match tm,5' self tm,5' %gc,,3' match tm, 3' self tm,3' %gc" for line in infile: # values are comma separated line = ''.join(line.split()) values = line.split(',') # ignore the first line, we already printed a new header if values[0] == 'name': continue # leave the blank lines in the file if values[0] == '': print >>outfile, ',' continue print >>outfile, "%s,%s,%s,%s,,%s,%s,%s,,%s,%s,%s,,%s,,%s,%s,,%s,%s,%s,,%s,%s,%s" %\ (values[0],values[1],values[2],values[3],values[5],values[6],values[7],\ values[9],values[10],values[25],score(values),values[14],values[15],\ values[17],values[18],values[19],values[21],values[22],values[23]) outfile.close() infile.close() # Read the file back in and pick oligos try: infile = open(score_file,'r') outfile = open(pick_file,'w') except IOError,err: print err exit(1) print >>outfile, "name,start,end,length,,entropy1,entropy2,flag,,# seq exist,# seq total,# cons regs,,score,,5' seq, 3' seq,,5' match tm,5' self tm,5' %gc,,3' match tm, 3' self tm,3' %gc" node = [] for line in infile: # values are comma separated line = ''.join(line.split()) values = line.split(',') # ignore the first line, we already printed a new header if values[0] == 'name': continue # leave the blank lines in the file if values[0] == '': node = pick_and_sort(node) print_node(node,outfile) node = [] print >>outfile, ',' continue node.append(values) # did the file end before we wrote out the last node? if node: node = pick_and_sort(node) print_node(node,outfile) infile.close() outfile.close() if __name__ == "__main__": main()