#!/usr/bin/env perl

use v5.14;
use warnings;
use utf8;

our $VERSION = "1.09";

=encoding utf-8

=head1 NAME

ansifold - fold command handling ANSI terminal sequences

=head1 VERSION

Version 1.09

=head1 SYNOPSIS

ansifold [ options ]

    -w#   --width=#                Folding width (default 72)
          --boundary=word          Fold on word boundary
          --padding                Padding to margin space
          --padchar=_              Padding character
          --ambiguous=narrow|wide  Unicode ambiguous character handling
    -p    --paragraph              Print extra newline
          --separate=string        Set separator string (default newline)
    -n    --nonewline              Same as --separate ''
          --linebreak=mode         Line-break mode (all, runin, runout, none)
          --runin                  Run-in width (default 4)
          --runout                 Run-out width (default 4)
    -s    --smart                  Same as --boundary=word --linebreak=all
    -x[#] --expand[=#]             Expand tabs
          --tabstop=n              Tab-stop position (default 8)
          --tabhead=char           Tab-head character (default space)
          --tabspace=char          Tab-space character (default space)
          --tabstyle=style         Tab expansion style (shade, dot, symbol)

=head1 DESCRIPTION

B<ansifold> is a fold(1) compatible command utilizing
L<Text::ANSI::Fold> module, which enables to handle ANSI terminal
sequences.

=head2 FOLD BY WIDTH

B<ansifold> folds lines in 72 column by default.  Use option B<-w> to
change the folding width.

    $ ansifold -w132

Single field is used repeatedly for the same line.

With option B<--padding>, remained columns are filled by padding
character (space by default).  You can use B<--padchar> to change
padding character.

B<ansifold> handles Unicode multi-byte characters properly.  Option
B<--ambiguous> takes I<wide> or I<narrow> and it specifies the visual
width of Unicode ambiguous characters.

=head2 MULTIPLE WIDTH

Unlike the original fold(1) command, multiple numbers can be
specified.

    $ LANG=C date | ansifold -w 3,1,3,1,2 | cat -n
         1  Wed
         2   
         3  Dec
         4   
         5  19

With multiple fields, unmatched part is discarded as in the above
example.  So you can truncate lines by putting comma at the end of
single field.

    ansifold -w80,

Option C<-w80,> is equivalent to C<-w80,0>.  Zero width is ignored
when seen as a final number, but not ignored otherwise.

=head2 NEGATIVE WIDTH

Negative number fields are discarded.

    $ LANG=C date | ansifold -w 3,-1,3,-1,2
    Wed
    Dec
    19

If the final width is negative, it is not discarded but takes all the
rest instead.  So next commands do the same thing.

    $ colrm 7 10

    $ ansifold -nw 6,-4,-1

Option C<--width -1> does nothing effectively.  Using it with
B<--expand> option implements ANSI/Unicode aware L<expand(1)> command.

    $ ansifold --expand --width -1

This can be written as this.

    $ ansifold -xw-1

=head2 NUMBERS

Number description is handled by L<Getopt::EX::Numbers> module, and
consists of C<start>, C<end>, C<step> and C<length> elements.  For
example,

    $ echo AABBBBCCCCCCDDDDDDDDEEEEEEEEEE | ansifold -w 2:10:2

is equivalent to:

    $ echo AABBBBCCCCCCDDDDDDDDEEEEEEEEEE | ansifold -w 2,4,6,8,10

and produces output like this:

    AA
    BBBB
    CCCCCC
    DDDDDDDD
    EEEEEEEEEE

=head2 SEPARATOR/TERMINATOR

Option B<-n> eliminates newlines between columns.

    $ LANG=C date | ansifold -w 3,-1,3,-1,2 -n
    WedDec19

Option B<--separate> set separator string.

    $ echo ABCDEF | ansifold --separate=: -w 1,0,1,0,1,-1
    A::B::C:DEF

Option B<-n> is a short-cut for C<--separate ''>.

Option B<--paragraph> or B<-p> print extra newline after each lines.
This is convenient when a paragraph is made up of single line, like
microsoft word document.

=head1 LINE BREAKING

Line break adjustment is supported for ASCII word boundaries.  As for
Japanese, more complicated prohibition processing is performed.  Use
option B<-s> to enable everything.

=head2 B<--boundary>=I<word>

Option B<--boundary=word> prohibit breaking line in the middle of
alpha-numeric word.

=head2 B<--linebreak>=I<all>|I<ruunin>|I<runout>|I<none>

Option B<--linebreak> takes a value of I<all>, I<runin>, I<runout> or
I<none>.  Default value is I<none>.

When B<--linebreak> option is enabled, if the cut-off text start with
space or prohibited characters (e.g. closing parenthesis), they are
ran-in at the end of current line as much as possible.

If the trimmed text end with prohibited characters (e.g. opening
parenthesis), they are ran-out to the head of next line, provided it
fits to maximum width.

=head2 B<--runin>=I<width>, B<--runout>=I<width>

Maximum width of run-in/run-out characters are defined by B<--runin>
and B<--runout> option.  Default values are 4.

=head2 B<--smart>, B<-s>

Option B<--smart> (or simply B<-s>) set both B<--boundary=word> and
B<--linebreak=all>, and enables all smart text formatting capability.

=head1 TAB EXPANSION

=head2 B<--expand>

Option B<--expand> (or B<-x>) enables tab character expansion.

    $ ansifold --expand

Takes optional number for tabstop and it precedes to B<--tabstop>
option.

    $ ansifold -x4w-1

=head2 B<--tabhead>, B<--tabspace>

Each tab character is converted to B<tabhead> and following
B<tabspace> characters (both are space by default).  They can be
specified by B<--tabhead> and B<--tabspace> option.  If the option
value is longer than single characger, it is evaluated as unicode
name.  Next example makes tab character visible keeping text layout.

    $ ansifold --expand --tabhead="MEDIUM SHADE" --tabspace="LIGHT SHADE"

=head2 B<--tabstyle>

Option B<--tabstyle> allow to set B<--tabhead> and B<--tabspace>
characters at once according to the given style name.  Select from
C<dot>, C<symbol> or C<shade>.  Styles are defined in
L<Text::ANSI::Fold> library.

    $ ansifold --expand --tabstyle=shade

=head1 FILES

=over 4

=item F<~/.ansifoldrc>

Start-up file.
See L<Getopt::EX::Module> for format.

=back

=head1 INSTALL

=head2 CPANMINUS

    $ cpanm App::ansifold
    or
    $ curl -sL http://cpanmin.us | perl - App::ansifold

=head1 SEE ALSO

L<ansifold|https://github.com/kaz-utashiro/ansifold>

L<Text::ANSI::Fold|https://github.com/kaz-utashiro/Text-ANSI-Fold>

L<Text::ANSI::Fold::Util|https://github.com/kaz-utashiro/Text-ANSI-Fold-Util>

L<Getopt::EX::Numbers>

L<https://www.w3.org/TR/jlreq/>
Requirements for Japanese Text Layout,
W3C Working Group Note 11 August 2020

=head1 AUTHOR

Kazumasa Utashiro

=head1 LICENSE

Copyright 2018- Kazumasa Utashiro

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut

use open IO => 'utf8', ':std';
use Encode;
map { $_ = decode 'utf8', $_ unless utf8::is_utf8($_) } @ARGV;

use Pod::Usage;
use List::Util qw(min);
use Hash::Util qw(lock_keys);
use Text::ANSI::Fold qw(:constants);
use Data::Dumper;

my $DEFAULT_WIDTH = 72;
my $DEFAULT_EXPAND = 0;

##
## when executed as 'ansiexpand'
##
if ($0 =~ /expand$/) {
    $DEFAULT_WIDTH = -1;
    $DEFAULT_EXPAND = 1;
}

use Getopt::EX::Hashed 'has';

has width     => spec => "w =s " , default => [];
has boundary  => spec => "  =s " , default => "none";
has padding   => spec => "  !  " , ;
has padchar   => spec => "  =s " , ;
has ambiguous => spec => "  =s " , ;
has paragraph => spec => "p +  " , default => 0;
has separate  => spec => "  =s " , default => "\n";
has runin     => spec => "  =i " , default => 4;
has runout    => spec => "  =i " , default => 4;
has nonewline => spec => "n    " , default => sub { $_{separate} = "" };
has expand    => spec => "x :-1" , default => $DEFAULT_EXPAND;
has tabstop   => spec => "  =i " ;
has tabhead   => spec => "  =s " ;
has tabspace  => spec => "  =s " ;
has tabstyle  => spec => "  =s " ;
has discard   => spec => "  =s " , default => [];
has version   => spec => "v    " , default => sub {
    print "Version: $VERSION\n ";
    exit;
};

#
# this is a bit tricky.
# {linebreak} is not a option parameter.
# {lb} specifies --linebreak as an alias.
#
has linebreak => default => LINEBREAK_NONE;
has lb => spec => "linebreak=s", default => sub {
    $_->{linebreak} = do {
	local $_ = $_[1];
	my $v = LINEBREAK_NONE;
	$v |= LINEBREAK_ALL    if /all/i;
	$v |= LINEBREAK_RUNIN  if /runin/i;
	$v |= LINEBREAK_RUNOUT if /runout/i;
	$v;
    };
};

has smart => spec => "s !" , default => sub {
    ($_->{boundary}, $_->{linebreak}) = do {
	if ($_[1]) {
	    ('word', LINEBREAK_ALL);
	} else {
	    ('none', LINEBREAK_NONE);
	}
    };
};

no Getopt::EX::Hashed;

use Getopt::EX::Long;
Getopt::Long::Configure("bundling");
my $opt = Getopt::EX::Hashed->new() or die;
GetOptions($opt, $opt->optspec) || pod2usage();

my @width_map;
my @width_index;

use Getopt::EX::Numbers;
my $numbers = Getopt::EX::Numbers->new;

my @width = do {
    map {
	if    (/^$/)                 { 0 }		# empty
	elsif (/^-?\d+$/)            { ($_) }		# 10
	elsif (/^(-?[-\d:]+) (?:\{(\d+)\})? $/x) {	# a:b:c:d{5}
	    ($numbers->parse($1)->sequence) x ($2 // 1);
	}
	else {
	    die "$_: width format error.\n";
	}
    }
    map { split /,/, $_, -1 }
    @{$opt->{width}};
};

$opt->{width} = do {
    if    (@width == 0) { $DEFAULT_WIDTH }
    elsif (@width == 1) { $width[0] }
    else {
	my @map = [ (int(pop @width)) x 2 ];
	unshift @map, map { [ $_ < 0 ? (-$_, 0) : ($_, 1) ] } @width;
	@width = map { $_->[0] } @map;
	@width_index = grep { $map[$_][1] } 0 .. $#map;
	\@width;
    }
};

if ($opt->{expand} > 0) {
    $opt->{tabstop} = $opt->{expand};
}

use charnames ':loose';
for (@{$opt}{qw(tabhead tabspace)}) {
    defined && length > 1 or next;
    $_ = charnames::string_vianame($_) || die "$_: invalid name\n";
}

my $fold = Text::ANSI::Fold->new(
    map  { $_ => $opt->{$_} }
    grep { defined $opt->{$_} }
    qw(width boundary padding padchar ambiguous
       linebreak runin runout
       expand tabstyle tabstop tabhead tabspace discard)
    );

my $separator = do {
    $opt->{separate} =~ s{ ( \\ (.) ) } {
	{ '\\' => '\\', n => "\n" }->{$2} // $1
    }gexr;
};

while (<>) {
    my $chomped = chomp;
    my @chops = $fold->text($_)->chops;
    if (@width_index > 0) {
	@chops = grep { defined } @chops[@width_index];
    }
    print join $separator, @chops;
    print "\n" if $chomped;
    print "\n" x $opt->{paragraph} if $opt->{paragraph} > 0;
}

exit;

#  LocalWords:  unicode ansifold LANG colrm KINSOKU
