Pod::Plexus meta-yak, editor folding

Pod::Plexus development has led me down a slippery slope peppered with boulders and broken glass I thought I could avoid forever.

My inner typographer wants to lay out documentation "just so", which means putting all the POD together, usually at the end of the file. Pod::Plexus and Pod::Weaver move POD around, making this semitypographical exercise a big time waster. All the careful consideration of placing "SEE ALSO" before (or after) "BUGS" matters naught.

Suddenly interleaving POD and code doesn't sound so bad. It doesn't change the finished product. I can also get rid of some of my pre-method comments, since the documentation will do double duty. Smaller distributions and less work for me? What's not to like?

Well, it's a pity that interleaved code and POD is butt ugly and distracting. By breaking up the continuity and context between methods, it's actually interfering with my ability to hold design in my head. And this is code I'm in the process of writing. Maybe I'll evolve out of my modalities, but it's a learning hurdle for now.

Which leads me to my other nemesis, source folding. If you don't know, it's where groups of lines in a source file are squashed ("folded") together into a single line. As much as I hate the concept, the ability to hide POD or code might make interleaving them barely tolerable. It's ironic because I hate source folding about as much as I hate interleaving code and POD. Necessity seems to be the mother of strange bedfellows. Maybe two wrongs can make a right.

So I spent the past couple nights hacking POD/code folding into my .vimrc. Now two keystrokes will hide all the POD so I can read the code, and two other keystrokes will swap that around. The results are ugly and preliminary, but I'm kind of happy about them. I thought I'd share. This is me sharing. You have been shared with.

I plan to clean this up and add it to Pod::Plexus' CPAN distribution after the module is ready to be released. I chose to shave this meta-yak now because I think it'll help me write the remaining code faster. I'll let you know how that works out for me.

Meanwhile, if you know Vim scripting, you're probably better off than I am. Patches to this are welcome. For example, I don't know how I'm going to add Moose syntax to the mix.

" Folding!  Ogods, the FOLDING!

let perl_include_pod=1
let perl_extended_vars=1
let perl_sync_dist=1000

let perl_fold=1
let perl_nofold_packages=1

syntax on

if has("gui_running")
  colorscheme rocco
  set guioptions-=T
else
  colorscheme rocco
endif

" Highlight whitespace where it doesn't belong.  Goes after "syntax on".
highlight ExtraWhitespace ctermbg=red guibg=red
au ColorScheme * highlight ExtraWhitespace guibg=red
au BufEnter    * match     ExtraWhitespace /\s\+$/
au InsertEnter * match     ExtraWhitespace /\s\+\%#\@<!$/
au InsertLeave * match     ExtraWhiteSpace /\s\+$/

" Extend the stock perl.vim highlighting without hacking a custom one.
au Syntax perl call MyPerlAdd()
function MyPerlAdd()
  syn match perlPodTabs "\t" contained
  syn cluster Pod add=perlPodTabs
  " syn match podVerbatimLine "^\s.*$" contains=perlPodTabs,@NoSpell
  syn keyword perlTodo TODO TBD FIXME XXX FUTURE NOTE NB contained
  syn match perlTodo "-><-" contained
  hi link perlPodTabs ExtraWhitespace

  " Extend POD regions to include Pod::Plexus syntax.
  syn match podCommand "^=public"    nextgroup=podCmdText skipwhite contains=@NoSpell keepend
  syn match podCommand "^=private"   nextgroup=podCmdText skipwhite contains=@NoSpell keepend
  syn match podCommand "^=attribute" nextgroup=podCmdText skipwhite contains=@NoSpell keepend
  syn match podCommand "^=method"    nextgroup=podCmdText skipwhite contains=@NoSpell keepend

  syn sync match perlSyncPOD grouphere perlPOD "^=public"
  syn sync match perlSyncPOD grouphere perlPOD "^=private"
  syn sync match perlSyncPOD grouphere perlPOD "^=attribute"
  syn sync match perlSyncPOD grouphere perlPOD "^=method"

  syn region perlPOD start="^=public" end="^=cut" contains=@Pod,@Spell,perlTodo keepend fold
  syn region perlPOD start="^=private" end="^=cut" contains=@Pod,@Spell,perlTodo keepend fold
  syn region perlPOD start="^=attribute" end="^=cut" contains=@Pod,@Spell,perlTodo keepend fold
  syn region perlPOD start="^=method" end="^=cut" contains=@Pod,@Spell,perlTodo keepend fold

  syn match podSpecial "\[%[^%\]]*%\]" contains=@NoSpell
endfun

highlight Folded guibg=black guifg=darkred
highlight FoldColumn guibg=black guifg=darkred

" z1 shows POD; z2 shows everything else.  TODO - Support Moose syntax.
nnoremap <silent> z1 :call ShowFoldsMatching(hlID('perlPOD'), 0)<CR>
nnoremap <silent> z2 :call ShowFoldsMatching(hlID('perlSubFold'), 0)<CR>

function ShowFoldsMatching(sid, negate)
  " Save the bell status, and turn off visual and audible bells.
  let s:saved_vb = &vb || 0
  let s:saved_tvb = &t_vb || ''
  let &vb = 1
  let &t_vb = ''

  " Remember the cursor's position in the file.
  let s:save_cursor = winsaveview()
  let s:save_winline = winline()

  " Top of the file, and close all folds.
  normal gg
  normal zM
  let s:lastline = -1

  " Iterate all folds, opening the ones we want.
  if (a:negate)
    while s:lastline != line('.')
      if count(synstack(line('.'), col('.')), a:sid) < 1
        normal zo
      endif
      let s:lastline = line('.')
      normal zj
    endwhile
  else
    while s:lastline != line('.')
      if count(synstack(line('.'), col('.')), a:sid) > 0
        normal zo
      endif
      let s:lastline = line('.')
      normal zj
    endwhile
  endif

  " Restore the cursor's position on the line.
  call winrestview(s:save_cursor)
  unlet s:save_cursor

  " Restore the cursor's line on the screen.
  let s:winline_off = winline() - s:save_winline

  while (s:winline_off < 0)
    " <C-Y>
    exec "normal \x19"
    let s:winline_off = s:winline_off + 1
  endwhile

  while (s:winline_off > 0)
    " <C-E>
    exec "normal \x05"
    let s:winline_off = s:winline_off - 1
  endwhile

  unlet s:save_winline
  unlet s:winline_off

  " Restore audible and visual bell.
  let &vb = s:saved_vb
  let &t_vb = s:saved_tvb
  unlet s:saved_vb
  unlet s:saved_tvb
endfunction

If you can't make money, at least have fun.

There's a saying: "If you can't make money, at least have fun." My open source projects have never earned a profit, but they've been enormous fun.

They've put me in the position to meet a lot of great people and go to interesting places. It's hard to imagine having more fun than I did speaking at Perl Oasis and YAPC::NA this year. If anyone releasing projects in other languages has more fun than this, how do they survive?

But giving away software is only fun when people use it. Otherwise it's just saving public backups or something.

So hearing that DomainSponsor's distributed server architecture uses one of my modules made my weekend. It was an awesome feeling, and it put me in the mood to update five distributions in the past couple days.

And write a blog post, which you're reading. Thanks, by the way.

So if you're using CPAN modules (and can talk about it, of course) let us authors know. Because most of us aren't making money at it, and you'll help us have more fun and release more code despite that.

Reducing Documentation's Technical Debt

Documentation is a social asset, but it's balanced by a technical debt. Documentation describes code that changes over time. As the code changes, so must the docs.

I want to write good documentation for my Perl distributions, but my standard of "good" documentation requires a lot of effort to write and maintain. I'm working on Pod::Plexus to make it easier. It doesn't replace anything, as far as I know. In fact, I've begun using it with Pod::Weaver in my Reflex distribution.

For example, I tend to write modular, cross-referenced code. Everything but the most superficial interfaces is used somewhere else in the distribution. I'd rather not contrive usage examples, so why not reuse parts of the distribution in the documentation? So Pod::Plexus includes an "=example" directive to insert live code into the documentation.

One of my pet peeves is hierarchical documentation. It's easy to document a subclass by saying "everything else (which I may or may not list here) is documented in this base class". On the down side, that's terrible for first time users. What does the module provide? Which ancestor documents which method? So tonight I added an "=include" directive to insert live documentation from another place.

Here's an example of Pod::Plexus markup. Output follows. It uses "=example" to create a synopsis from the module's live implementation. "=include" imports the description from a nearly identical module. Pod::Plexus includes some templating magic, so [% doc.module %] expands to the module being documented.

package Reflex::Eg::Inheritance::Plain;

use warnings;
use strict;
use base 'Reflex::Timeout';

sub on_done {
    shift()->reset();
    print scalar(localtime()), " - Subclass got timeout.\n";
}

1;

__END__

=pod

=abstract Inheriting a Reflex timer with plain Perl.

=head1 SYNOPSIS

=example Reflex::Eg::Inheritance::Plain

Usage:

    perl -M[% doc.module %] -e '[% doc.module %]->new(delay => 1)->run_all'

=head1 DESCRIPTION

This module is nearly identical to Reflex::Eg::Inheritance::Moose.
It only differs in the mechanism of subclassing Reflex::Timeout.

=include Reflex::Eg::Inheritance::Moose DESCRIPTION

=cut

Here's the output as promised. You'll notice that "=abstract" becomes a NAME section. Had this not been a test, Pod::Weaver would have added several other boilerplate sections.

package Reflex::Eg::Inheritance::Plain;

use warnings;
use strict;
use base 'Reflex::Timeout';

sub on_done {
    shift()->reset();
    print scalar(localtime()), " - Subclass got timeout.\n";
}

1;

__END__

=pod

=head1 NAME

Reflex::Eg::Inheritance::Plain - Inheriting a Reflex timer with plain Perl.

=head1 SYNOPSIS

    package Reflex::Eg::Inheritance::Plain;

    use warnings;
    use strict;
    use base 'Reflex::Timeout';

    sub on_done {
        shift()->reset();
        print scalar(localtime()), " - Subclass got timeout.\n";
    }

    1;


Usage:

    perl -MReflex::Eg::Inheritance::Plain -e 'Reflex::Eg::Inheritance::Plain->new(delay => 1)->run_all'

=head1 DESCRIPTION

This module is nearly identical to Reflex::Eg::Inheritance::Moose.
It only differs in the mechanism of subclassing Reflex::Timeout.

Reflex::Timeout objects normally go dormant after the first time they
call on_done().

Reflex::Eg::Inheritance::Plain implements a simple periodic timer by subclassing and
overriding Reflex::Timeout's on_done() callback.  The act of finishing
the timeout causes itself to be reset.

Since this is an example, the subclass also prints a message so it's
apparent it works.

This is a relatively silly exercise.
Reflex::Interval already implements a periodic interval timer.

=cut

In a future installment, I'll talk briefly about how "=index REGEXP" creates an index of all the distributed modules matching a regular expression.

App::PipeFilter for Top-N Reports

I'm testing some software at work by replaying pcap files at the application. I want to make sure the results in the database match what's in the original packet dump. There are hundreds of packet producers. I want to focus on the top ten to make better use of my time.

I've written a utility to dump interesting packet data as streams of JSON objects, one per packet. Each object includes the source and destination IP and port, among other things.

% jcut -o src_ip -o dest_ip deleteme.json | head -3

{"dest_ip":"10.10.91.77","src_ip":"10.16.250.39"}
{"dest_ip":"10.10.91.77","src_ip":"10.0.250.80"}
{"dest_ip":"10.10.91.77","src_ip":"10.90.250.39"}

A "top N" report for any single field is trivial. Extract values for that field and pass them to sort(1), uniq(1) and tail(1)

% jcut -o src_ip deleteme.json | sort | uniq -c | sort -n | tail -10

 514 {"src_ip":"10.0.250.21"}
 544 {"src_ip":"10.0.250.100"}
 560 {"src_ip":"10.13.250.71"}
 565 {"src_ip":"10.40.250.7"}
 611 {"src_ip":"10.60.250.79"}
 628 {"src_ip":"10.0.50.6"}
 807 {"src_ip":"10.0.250.20"}
1223 {"src_ip":"10.10.250.239"}
2448 {"src_ip":"10.0.250.60"}
2508 {"src_ip":"10.0.250.30"}

I'm also alpha testing json2tsv so I can import this into a spreadsheet. A happy side effect is that tab-separated columns are easier on the eyes.

% json2tsv -o src_ip deleteme.json | sort | uniq -c | sort -n | tail -10

 514 10.0.250.21
 544 10.0.250.100
 560 10.13.250.71
 565 10.40.250.7
 611 10.60.250.79
 628 10.0.50.6
 807 10.0.250.20
1223 10.10.250.239
2448 10.0.250.60
2508 10.0.250.30

json2tsv is in App::PipeFilter's repository, and it's scheduled to appear in the next CPAN release.

Bundlefly - Make Your Bundles Fly

Bundlefly is a hack I've written to build a graph of a bundle's distributions and install them in optimal order. It accelerates the installation of entire library suites for new Perl builds and perlbrew instances. As with App::PipeFilter, it may end up on CPAN if there's interest.

Autobundle snapshots are comprehensive by design. They list all installed modules at a particular point in time. We should rarely be asked to confirm "unsatisfied dependencies" while installing them. The dependencies are almost always somewhere in there.

To compound the suck, we're often asked to install the same fundamental dependencies repeatedly. ExtUtils::MakeMaker and Test::More immediately come to mind. We shouldn't be asked once, yet we're asked several times by the end of the day.

One problem is that autobundle snapshots list distributions alphabetically, and CPAN's shell installs them in that order. Test::More, a distribution used to test a large portion of CPAN, is installed relatively late—after it's already been prepended to the install queue as a dependency of several other distributions.

Bundlefly's dependency graph allows it to install dependencies before dependents. The only "unsatisfied dependencies" one should ever see are those introduced since the last CPANDB build and which aren't listed in the autobundle snapshot.