eval 'exec /usr/dist/exe/perl -S $0 ${1+"$@"}'
  if 0;

#genbank2mif:	Ken Shirriff
#Usage: genbank2mif < genbankfile > file.mif
#This generates a MIF file for FrameMaker that shows the structure of
#the sequence.

$proteinonly = 1;	# Only display CDS records
$textabove = 1;		# Print text above the box

$xscale = 1500.;	# How many bases per inch

$yp = 0;		# Starting y position
$yheight = 0.1;
$yspc = 0.15;		# Spacing between lines

$color[1] = "Red";
$color[2] = "Green";
$color[0] = "Blue";

&ginit();

while (<>) {
	chop;
	if (/^SOURCE.*/) {
		&parsesource();
	}
	if (/^FEATURES.*/) {
		&parsefeatures();
	}
	if (/^ORIGIN.*/) {
		&parseorigin();
		last;
	}
}

# seq = 0 ... length($seq)-1

$len = length($seq);
print STDERR "length =$len\n";
&findorf();
# &checkcg(); # experimental function
&gend();

# ------------

#Draw the ORF's by looking for stop codons.
# findorf(
sub findorf {

	&ggroup();
	$yp += 0.5;
	$yheight = 0.2;
	$yspc = 0.3;
	$yspc = 0.05;
	&gtext(0./$xscale, $yp-$yheight/2, "0");
	&gtext($len/$xscale,$yp-$yheight/2,$len);
	&gbox(1./$xscale,$yp,$len/$xscale,$yp+$yheight,$color[1%3]);
	&findseq(1,3,"taa",$yp,$yp+$yheight);
	&findseq(1,3,"tag",$yp,$yp+$yheight);
	&findseq(1,3,"tga",$yp,$yp+$yheight);
	&gendgroup();
	$yp += $yheight + $yspc;
	&ggroup();
	&gbox(1./$xscale,$yp,$len/$xscale,$yp+$yheight,$color[2%3]);
	&findseq(2,3,"taa",$yp,$yp+$yheight);
	&findseq(2,3,"tag",$yp,$yp+$yheight);
	&findseq(2,3,"tga",$yp,$yp+$yheight);
	&gendgroup();
	$yp += $yheight + $yspc;
	&ggroup();
	&gbox(1./$xscale,$yp,$len/$xscale,$yp+$yheight,$color[3%3]);
	&findseq(3,3,"taa",$yp,$yp+$yheight);
	&findseq(3,3,"tag",$yp,$yp+$yheight);
	&findseq(3,3,"tga",$yp,$yp+$yheight);
	&gendgroup();
}


# Print the SOURCE record at the top of the page.
sub parsesource {

	($junk,$txt) = split(' ',$_,2);
	&gtext(0,$yp,$txt);
	$yp += 3*$yspc;
}

# Parse the FEATURES entries, printing a box for note, gene, product
sub parsefeatures {
	local($y, $tag, $pos, $note);
	while (<>) {
		if (/^\w/) {
			if ($pos) {
				&printfeature($text,$pos);
			}
			if (/^ORIGIN.*/) {
				&parseorigin();
			}
			last;
		}
		if ( /^ {9}/ ) {
			# ttype is a priority field; the highest
			# priority gets printed.
			if ( /note/ && $ttype<3) {
				($text) = $_ =~ /^ *.note="([^"]*)"?\n?/;
				$ttype = 3;
			} elsif ( /gene/  && $ttype < 6) {
				($text) = $_ =~ /^ *.gene="([^"]*)"?\n?/;
				$ttype = 6;
			} elsif ( /product/  && $ttype < 8) {
				($text) = $_ =~ /^ *.product="([^"]*)"?\n?/;
				$ttype = 8;
			} else {
			}
		} else {
			if ($pos) {
				&printfeature($text,$pos);
			}
			$pos = 0;
			($text, $pos1) = split(' ');
			if ($proteinonly) {
				if ($text =~ /CDS/) {
				} else {
					next;
				}
			}
			#Skip all these fields
			next if $text =~ /exon/;
			next if $text =~ /prim_transcript/;
			next if $text =~ /intron/;
			next if $text =~ /virion/;
			next if $text =~ /provirus/;
			next if $text =~ /mRNA/;
			$pos = $pos1;
			$ttype = 1;
		}
	}
}


# Print a box for the feature.  Parse the 123..456 field.
sub printfeature {
	local($txt, $pos) = @_;
	local($offset, $stride, $tst, $y0,$y1) = @_;
	if (($a,$b) = $pos =~ /^<?(\d+)\.\.>?(\d+)$/) {
		&ggroup();
		&gbox($a/$xscale, $yp, $b/$xscale, $yp+$yheight, $color[$a%3]);
		if ($textabove) {
			&gtext($a/$xscale+0.03, $yp+$2.1*yheight, $txt);
		} else {
			&gtext($b/$xscale+0.03, $yp+$yheight, $txt);
		}
		&gendgroup($gc);
		$yp += $yheight+$yspc;
	} elsif (($a) = $pos =~ /^<?(\d+)$/) {
		&ggroup();
		&gbox($a/$xscale, $yp, $a/$xscale, $yp+$yheight, $color[$a%3]);
		if ($textabove) {
			&gtext($a/$xscale+0.03, $yp+$2.1*yheight, $txt);
		} else {
			&gtext($a/$xscale+0.03, $yp+$yheight, $txt);
		}
		&gendgroup($gc);
		$yp += $yheight+$yspc;
	} elsif (($a,$b,$c,$d) = $pos =~ /^join.(\d+)\.\.(\d+),(\d+)\.\.(\d+).$/) {
		&ggroup();
		&gbox($a/$xscale, $yp, $b/$xscale, $yp+$yheight, $color[$a%3]);
		&gline($b/$xscale, $yp+$yheight/2., $c/$xscale, $yp+$yheight/2.);
		&gbox($c/$xscale, $yp, $d/$xscale, $yp+$yheight, $color[$a%3]);
		if ($textabove) {
			&gtext($a/$xscale+0.03, $yp+$2.1*yheight, $txt);
		} else {
			&gtext($d/$xscale+0.03, $yp+$yheight, $txt);
		}
		&gendgroup($gc);
		$yp += $yheight+$yspc;

	} else {
		print STDERR "$pos $txt ?????\n";
	}
}

# Collect all the sequence data in the ORIGIN record into a big string
# in $seq
sub parseorigin {
	$seq = "x";
	while (<>) {
		chop;
		if ($_ eq "//" ) {
			last;
		}
		s/\s\d+\w//;
		s/\s//g;
		$c = substr($_,0,1);
		substr($seq,-1,0) = $_;
	}
	$seq = substr($seq,1);
	chop $seq;
}

# ------------

# Look for occurrences of $tst in the sequence, starting at $offset and
# moving through at $y.  Draw lines from y0 to y1 whereever found.
# This is used, for instance, to find stop codons.
sub findseq {
	local($offset, $stride, $tst, $y0,$y1) = @_;
	local($slen);
	$slen = length($tst);
	for ($i=$offset-1; $i<$len; $i += $stride) {
		if (substr($seq,$i,$slen) eq $tst) {
			&gline($i/$xscale,$y0,$i/$xscale,$y1);
		}
	}
}

sub checkcg {
	$yp += 1;
	&gbox(1./$xscale,$yp,$len/$xscale,$yp+$yheight);
	&findseq(1,1,"cg",$yp, $yp+$yheight);
	$oldtotal = 0;
	$oldx = 0;
	$total = 0;
	$histw = 100;

	for ($i=0;$i<$len;$i++) {
		$bases{substr($seq,$i,1)}++;
	}
	$ct = $bases{"c"};
	$gt = $bases{"g"};
	$at = $bases{"a"};
	$tt = $bases{"t"};

	$bscale = 4; #inches per fraction
	$exp = $ct*$gt/$len/$len;
	&gline(0/$xscale, $yp-$exp*$bscale, $len/$xscale,$yp-$exp*$bscale);

	$p = int($exp*100);
	&gtext($len/$xscale,$yp-$exp*$bscale,$p . "%");

	for ($i=0;$i<$len;$i++) {
	if (0) {
		$cnt++;
		if (substr($seq,$i,2) eq "cg") {
			$total++;
		}
		if ($i-$histw>0 && substr($seq,$i-$histw,2) eq "cg") {
			$total--;
		}

		if (1 || $cnt==$histw || $i==$len-1) {
			$cnt = $histw;
			&gline($oldx/$xscale, $yp-$oldtotal*$bscale/$cnt, ($i-$histw/2.)/$xscale,$yp-$total*$bscale/$cnt);
			$oldtotal = $total;
			$oldx = $i-$histw/2.;
			$cnt = 0;
		}
	} else {
		$cnt++;
		if (substr($seq,$i,2) eq "cg") {
			$total++;
		}

		if ($cnt==$histw) {
			&gline($oldx/$xscale, $yp-$oldtotal*$bscale/$cnt, ($i-$histw/2.)/$xscale,$yp-$total*$bscale/$cnt);
			$oldtotal = $total;
			$oldx = $i-$histw/2.;
			$cnt = 0;
			$total = 0;
		}
	}
	}
}

# ------------
# Graphics functions.  These functions generate MIF code.

# ginit
sub ginit {
	print "<MIFFile 1.0> #Generated by genbank2mif\n";
}

# gend
sub gend {
	print "# End of MIFFile\n";
}

# gline(x0,y0,x1,y1)
sub gline {
	local($x0,$y0,$x1,$y1) = @_;
	print "<PolyLine\n";
	if ($group > 0) {
		print "  <GroupID $group>\n";
	}
	print "  <Point $x0 $y0>\n";
	print "  <Point $x1 $y1>\n";
	print "> #end of PolyLine\n";
}

# gbox(x0,y0,x1,y1,fill)
sub gbox {
	local($x0,$y0,$x1,$y1,$fill) = @_;
	print "<Rectangle\n";
	if ($group > 0) {
		print "  <GroupID $group>\n";
	}
	if ($fill) {
		print "  <ObColor $fill>\n";
		print "  <DashedPattern <DashedStyle Solid>>\n";
	} else {
		print "  <ObColor Black>\n";
	}
	$w = $x1-$x0;
	$h = $y1-$y0;
	print "  <ShapeRect $x0 $y0 $w $h>\n";
	print "  <BRect $x0 $y0 $w $h>\n";
	print "> #end of Rectangle\n";
}

# ggroup: Group together all elements until gendgroup
sub ggroup {
	$gc++;
	$group = $gc;
}

# gendgroup
sub gendgroup {
	# Should pop group
	print "<Group\n";
	print "  <ID $group>\n";
	if (0) {
		print "  <GroupID $group>\n";
	}
	print "> #end of Group\n";
	$group = 0;
}

#gtext (x,y,text)
sub gtext {
	local($x,$y,$text) = @_;
	print "<TextLine\n";
	if ($group > 0) {
		print "  <GroupID $group>\n";
	}
	print "  <TLOrigin $x $y>\n";
	print "  <TLAlignment Left>\n";
	print "  <String `$text'>\n";
	print "> #end of TextLine\n";
}
