|
|||||||||||
Gene Compare ProgramThis is a tiny REXX program that takes the names of three GEN files and lists all the genes in the third that aren't the same as the corresponding gene in either of the first two. The obvious use for this is to take the genes of a father and a mother norn, and compare them to the genes of a child of those parents; the genes that show up as different are the ones in which a spontaneous mutation has occurred. The program will also tell you if it finds that any of the individuals have more than one of any gene, 'cause that's interesting. To use the program, put it into a file, get yourself a REXX interpreter, find the monikers of the two parents and the child that you're interested in (see the section on file formats for some help in that area), and then call the program on the relevant monikers. So for instance to see how Bill (moniker 6FKV) differs from his parents Molly (8GOQ) and Ringo (9GPO), you would call the REXX interpreter on this script with the obvious arguments; something like: gcomp genetics\8goq.gen genetics\9gpo.gen genetics\6fkv.gen Here's the REXX code itself. Error checking and comments and help are minimal; the idea here is to show you the algorithm, not to give you a finished program!
/* Given two parents and a kid, print out stuff. A hack, lots */
/* of unstated assumptions (files <12000 bytes, no errors, etc...) */
parse arg p1 p2 kid rest
/* Print out a little heading */
say " "
say "--" kid "--"
/* Initialize the arrays to empty */
p1gene. = ""
p2gene. = ""
kidgene. = ""
/* Read in the first parent file, up to 12000 bytes */
l = charin(p1,1,12000)
/* Now spin through that, reading in the genes */
do while 1
if left(l,4)="gend" then leave
if left(l,4)<>"gene" then do
say "Missing 'gene' in file" p1
exit
end
/* Remove the "gene" tag */
l = substr(l,5)
/* Find the next "gen" or "gend" (bad assumption here!) */
p = pos("gen",l)
if p=0 then do
say "Missing 'gen' in file" p1
exit
end
/* Get the contents of the current gene */
g = left(l,p-1)
/* Get the gene identity (first three bytes) */
gt = c2x(substr(g,1,1))"_"c2x(substr(g,2,1))"_"c2x(substr(g,3,1))
/* Brag if we've already seen this one! */
if p1gene.gt<>"" then say p1 "has multiple" gt "genes."
/* Stick it in the array */
p1gene.gt = g
/* Advance to the next "gene" or "gend" */
l = substr(l,p)
end
/* Now do exactly the same things with the second parent */
l = charin(p2,1,12000)
do while 1
if left(l,4)="gend" then leave
if left(l,4)<>"gene" then do
say "Missing 'gene' in file" p2
exit
end
l = substr(l,5)
p = pos("gen",l)
if p=0 then do
say "Missing 'gen' in file" p2
exit
end
g = left(l,p-1)
gt = c2x(substr(g,1,1))"_"c2x(substr(g,2,1))"_"c2x(substr(g,3,1))
if p2gene.gt<>"" then say p2 "has multiple" gt "genes."
p2gene.gt = g
l = substr(l,p)
end
/* Now do almost exactly the same thing with the third file */
l = charin(kid,1,12000)
do while 1
if left(l,4)="gend" then leave
if left(l,4)<>"gene" then do
say "Missing 'gene' in file" kid
exit
end
l = substr(l,5)
p = pos("gen",l)
if p=0 then do
say "Missing 'gen' in file" kid
exit
end
g = left(l,p-1)
gt = c2x(substr(g,1,1))"_"c2x(substr(g,2,1))"_"c2x(substr(g,3,1))
if kidgene.gt<>"" then say kid "has multiple" gt "genes."
/* If the gene is novel, announce the fact */
if g <> p1gene.gt then if g <> p2gene.gt then do
say "Kid differs from both parents in the" gt "gene."
end
kidgene.gt = g
l = substr(l,p)
end
/* and we're done... */
exit
|
|||||||||||