sub MAIN (
Str :$fasta! where *.IO.f,
Int :$number=100,
Str :$out-dir=$*PROGRAM.IO.dirname
) {
mkdir $out-dir unless $out-dir.IO.d;
my $ext = $fasta.IO.extension;
my $basename = $fasta.IO.basename;
$basename ~~ s/\.$ext$//;
sub writer (@seqs) {
state $fnum = 0;
my $fname = $*SPEC.catfile(
$out-dir, sprintf('%s.%04d.%s', $basename, ++$fnum, $ext)
);
printf "Writing %s to %s\n", @seqs.elems, $fname.IO…
sub MAIN (:$out-dir="", *@fastq) {
if ($out-dir.chars > 0 && ! $out-dir.IO.d) {
mkdir $out-dir;
}
my $i = 0;
for @fastq -> $fastq {
(my $basename = $fastq.IO.basename) ~~ s/\.\w*?$//;
my $out-file = $*SPEC.catfile(
$out-dir || $fastq.IO.dirname, $basename ~ '.fa');
printf "%3d: %s -> %s\n",
++$i, $fastq.IO.basename, $out-file;
my $out-fh = open $out-file, :w;
for $fastq.IO.lines -> $header, $seq, $brea…
I have some FASTA files with headers like this:
>gi|83274083|ref|AC_000032.1| Mus musculus strain mixed chromosome 10, alternate assembly Mm_Celera, whole genome shotgun sequence
I wanted to extract just the 2nd field, so here's a Perl 6 script to do that:
#!/usr/bin/env perl6
use File::Temp;
sub MAIN (*@files) {
my $i = 0;
for @files -> $file {
my ($tmpfile, $tmpfh) = tempfile();
printf "%3d: %s\n", ++$i, $file.IO.basename;
for $file.IO.lines -> $line {
if $line.substr…