#!/usr/bin/env perl

use v5.14;
use warnings;
use utf8;

our $VERSION = "1.07";

=encoding utf-8

=head1 NAME

ansifold - fold command handling ANSI terminal sequences

=head1 VERSION

Version 1.07

=head1 SYNOPSIS

ansifold [ options ]

    -w#, --width=#                Folding width (default 72)
         --boundary=word          Fold on word boundary
         --padding                Padding to margin space
         --padchar=_              Padding character
         --ambiguous=narrow|wide  Unicode ambiguous character handling
    -p,  --paragraph              Print extra newline
         --separate=string        Set separator string (default newline)
    -n                            Short cut for --separate ''
         --linebreak=mode         Line-break adjustment rule (default all)
         --runin                  Run-in width (default 4)
         --runout                 Run-out width (default 4)
    -s,  --smart                  Short cut for --boundary=word --linebreak=all
         --expand[=mode]          Expand tabs
         --tabstop=n              Tab-stop position (default 8)
         --tabhead=char           Tab-head character (default space)
         --tabspace=char          Tab-space character (default space)
         --tabstyle=style         Tab expansion style

=head1 DESCRIPTION

B<ansifold> is a fold(1) compatible command utilizing
L<Text::ANSI::Fold> module, which enables to handle ANSI terminal
sequences and Unicode multi-byte characters properly.

=head2 WIDTH

B<ansifold> folds lines in 72 column by default.  Use option B<-w> to
change the folding width.

    $ ansifold -w132

Single field is used repeatedly for the same line.

=head2 MULTIPLE WIDTH

Unlike the original fold(1) command, multiple numbers can be
specified.

    $ LANG=C date | ansifold -w 3,1,3,1,2 | cat -n
         1  Wed
         2   
         3  Dec
         4   
         5  19

With multiple fields, unmatched part is discarded as in the above
example.  So you can truncate lines by putting comma at the end of
single field.

    ansifold -w80,

Option C<-w80,> is equivalent to C<-w80,0>.  Zero width is ignored
when seen as a final number, but not ignored otherwise.

=head2 NEGATIVE WIDTH

Negative number fields are discarded.

    $ LANG=C date | ansifold -w 3,-1,3,-1,2
    Wed
    Dec
    19

If the final width is negative, it is not discarded but takes all the
rest instead.  So next commands do the same thing.

    $ colrm 7 10

    $ ansifold -nw 6,-4,-1

Option C<-w -1> does nothing effectively.  Using it with B<--expand>
option implements ANSI/Unicode aware L<expand(1)> command.

    $ ansifold -w -1 --expand

=head2 NUMBERS

Number description is handled by L<Getopt::EX::Numbers> module, and
consists of C<start>, C<end>, C<step> and C<length> elements.  For
example,

    $ echo AABBBBCCCCCCDDDDDDDDEEEEEEEEEE | ansifold -w 2:10:2

is equivalent to:

    $ echo AABBBBCCCCCCDDDDDDDDEEEEEEEEEE | ansifold -w 2,4,6,8,10

and produces output like this:

    AA
    BBBB
    CCCCCC
    DDDDDDDD
    EEEEEEEEEE

=head2 SEPARATOR/TERMINATOR

Option B<-n> (or B<--separate> '') eliminates newlines between
columns.

    $ LANG=C date | ansifold -w 3,-1,3,-1,2 -n
    WedDec19

Option B<--separate> set separator string.

    $ echo ABCDEF | ansifold --separate=: -w 1,0,1,0,1,-1
    A::B::C:DEF

Option B<--paragraph> or B<-p> print extra newline after each lines.
This is convenient when a paragraph is made up of single line, like
microsoft word document.

=head1 LINE BREAKING

Option B<--boundary=word> prohibit to break line in the middle of
alphanumeric word.  This version also supports line break adjustment,
mainly to perform Japanese ``KINSOKU'' processing.  Use
B<--linebreak=all> to enable it.

When B<--linebreak> option is enabled, if the cut-off text start with
space or prohibited characters (e.g. closing parenthesis), they are
ran-in at the end of current line as much as possible.

If the trimmed text end with prohibited characters (e.g. opening
parenthesis), they are ran-out to the head of next line, provided it
fits to maximum width.

Option B<--linebreak> takes a value of I<all>, I<runin>, I<runout> or
I<none>.  Default value is I<none>.

Maximum width of run-in/run-out characters are defined by B<--runin>
and B<--runout> option.  Default values are 4.

Option B<--smart> (or simply B<-s>) is shortcut for
"B<--boundary=word> B<--linebreak=all>" and enables all smart text
formatting capability.

=head1 TAB EXPANSION

Option B<--expand> enables tab character expansion.  Each tab
character is converted to B<tabhead> and following B<tabspace>
characters (both are space by default).  They can be specified by
B<--tabhead> and B<--tabspace> option.  If the option value is longer
than single characger, it is evaluated as unicode name.  Next example
makes tab character visible keeping text layout.

    $ ansifold --expand --tabhead="MEDIUM SHADE" --tabspace="LIGHT SHADE"

Option B<--tabstyle> allow to set B<--tabhead> and B<--tabspace>
characters at once according to the given style name.  Select from
C<dot>, C<symbol> or C<shade>.  Styles are defined in
L<Text::ANSI::Fold> library.

=head1 SEE ALSO

L<ansifold|https://github.com/kaz-utashiro/ansifold>

L<Text::ANSI::Fold|https://github.com/kaz-utashiro/Text-ANSI-Fold>

L<Text::ANSI::Fold::Util|https://github.com/kaz-utashiro/Text-ANSI-Fold-Util>

L<Getopt::EX::Numbers>

L<https://www.w3.org/TR/jlreq/>
Requirements for Japanese Text Layout,
W3C Working Group Note 11 August 2020

=head1 AUTHOR

Kazumasa Utashiro

=head1 LICENSE

Copyright 2018- Kazumasa Utashiro

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut

use open IO => 'utf8', ':std';
use Encode;
map { $_ = decode 'utf8', $_ unless utf8::is_utf8($_) } @ARGV;

use Pod::Usage;
use List::Util qw(min);
use Hash::Util qw(lock_keys);
use Text::ANSI::Fold qw(:constants);
use Data::Dumper;

my %opt = (
    width     => [],
    boundary  => "none",
    padding   => undef,
    padchar   => undef,
    ambiguous => undef,
    paragraph => 0,
    separate  => "\n",
    linebreak => LINEBREAK_NONE,
    runin     => 4,
    runout    => 4,
    expand    => undef,
    tabstop   => undef,
    tabhead   => undef,
    tabspace  => undef,
    tabstyle  => undef,
    discard   => [],
    );

lock_keys %opt;

my @optargs = (
    "width|w=s"      =>  $opt{width},
    "boundary=s"     => \$opt{boundary},
    "padding!"       => \$opt{padding},
    "padchar=s"      => \$opt{padchar},
    "ambiguous=s"    => \$opt{ambiguous},
    "paragraph|p+"   => \$opt{paragraph},
    "separate=s"     => \$opt{separate},
    "linebreak|lb=s" => sub {
	$opt{linebreak} = do {
	    local $_ = $_[1];
	    my $v = LINEBREAK_NONE;
	    $v |= LINEBREAK_ALL    if /all/i;
	    $v |= LINEBREAK_RUNIN  if /runin/i;
	    $v |= LINEBREAK_RUNOUT if /runout/i;
	    $v;
	};
    },
    "runin=i"  => \$opt{runin},
    "runout=i" => \$opt{runout},
    "n" => sub { $opt{separate} = "" },
    "smart|s!" => sub {
	($opt{boundary}, $opt{linebreak}) = do {
	    if ($_[1]) {
		('word', LINEBREAK_ALL);
	    } else {
		('none', LINEBREAK_NONE);
	    }
	};
    },
    "expand!"    => \$opt{expand},
    "tabstop=i"  => \$opt{tabstop},
    "tabhead=s"  => \$opt{tabhead},
    "tabspace=s" => \$opt{tabspace},
    "tabstyle=s" => \$opt{tabstyle},
    "discard=s"  =>  $opt{discard},
    "version|v" => sub {
	print "Version: $VERSION\n";
	exit;
    },
    );

use Getopt::EX::Long;
Getopt::Long::Configure("bundling");
GetOptions(@optargs) || pod2usage();

my $DEFAULT_WIDTH = 72;
my @width_map;
my @width_index;

use Getopt::EX::Numbers;
my $numbers = Getopt::EX::Numbers->new;

my @width = do {
    map {
	if    (/^$/)                 { 0 }		# empty
	elsif (/^-?\d+$/)            { ($_) }		# 10
	elsif (/^(-?[-\d:]+) (?:\{(\d+)\})? $/x) {	# a:b:c:d{5}
	    ($numbers->parse($1)->sequence) x ($2 // 1);
	}
	else {
	    die "$_: width format error.\n";
	}
    }
    map { split /,/, $_, -1 }
    @{$opt{width}};
};

$opt{width} = do {
    if    (@width == 0) { $DEFAULT_WIDTH }
    elsif (@width == 1) { $width[0] }
    else {
	my @map = [ (int(pop @width)) x 2 ];
	unshift @map, map { [ $_ < 0 ? (-$_, 0) : ($_, 1) ] } @width;
	@width = map { $_->[0] } @map;
	@width_index = grep { $map[$_][1] } 0 .. $#map;
	\@width;
    }
};

use charnames ':loose';
for (@opt{qw(tabhead tabspace)}) {
    defined && length > 1 or next;
    $_ = charnames::string_vianame($_) || die "$_: invalid name\n";
}

my $fold = Text::ANSI::Fold->new(
    map  { $_ => $opt{$_} }
    grep { defined $opt{$_} }
    qw(width boundary padding padchar ambiguous
       linebreak runin runout
       expand tabstyle tabstop tabhead tabspace discard)
    );

my $separator = do {
    $opt{separate} =~ s{ ( \\ (.) ) } {
	{ '\\' => '\\', n => "\n" }->{$2} // $1
    }gexr;
};

while (<>) {
    my $chomped = chomp;
    my @chops = $fold->text($_)->chops;
    if (@width_index > 0) {
	@chops = grep { defined } @chops[@width_index];
    }
    print join $separator, @chops;
    print "\n" if $chomped;
    print "\n" x $opt{paragraph} if $opt{paragraph} > 0;
}

exit;

#  LocalWords:  unicode ansifold LANG colrm KINSOKU
