#!/usr/bin/perl -w
################################################################################
#
#  scanprov -- scan Perl headers for provided macros, and add known
#              exceptions, and functions we weren't able to otherwise find.
#              Thus the purpose of this file has been expanded beyond what its
#              name says.
#
#  The lines added have a code to signify they are added by us:
#   M means it is a macro
#   X means it is a known exceptional item
#   F means it is a function in embed.fnc that the normal routines didn't find
#
#  The regeneration routines do not know the prototypes for the macros scanned
#  for, which is gotten from documentation in the source.  (If they were
#  documented, they would be put in parts/apidoc.fnc, and test cases generated
#  for them in mktodo.pl).  Therefore these are all undocumented.  It would be
#  best if people would add document to them in the perl source, and then this
#  portion of this function would be minimized.
#
################################################################################
#
#  Version 3.x, Copyright (C) 2004-2013, Marcus Holland-Moritz.
#  Version 2.x, Copyright (C) 2001, Paul Marquess.
#  Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
#
#  This program is free software; you can redistribute it and/or
#  modify it under the same terms as Perl itself.
#
################################################################################

use strict;
use Getopt::Long;

require './parts/ppptools.pl';
require './parts/inc/inctools';
require './devel/devtools.pl';

our %opt = (
  mode    => 'check',
  install => '/tmp/perl/install/default',
  blead   => 'bleadperl',
  debug   => 0,
 'debug-start' => "",
);

GetOptions(\%opt, qw( install=s mode=s blead=s debug=i debug-start=s)) or die;

my $write = $opt{mode} eq 'write';

# Get the list of known macros.  Functions are calculated separately below
my %embed = map { $_->{flags}{m} ? ( $_->{name} => 1 ) : () }
            parse_embed(qw(parts/embed.fnc parts/apidoc.fnc));

# @provided is set to everthing provided
my @provided = map { /^(\w+)/ ? $1 : () } `$^X ppport.h --list-provided`;

# There are a few exceptions that have to be dealt with specially.  Add these
# to the list of things to scan for.
my $hard_to_test_ref = known_but_hard_to_test_for();
push @provided, keys %$hard_to_test_ref;

my $base_dir = 'parts/base';
my $todo_dir = 'parts/todo';

if ($write) {

    # Get the list of files, which are returned sorted, and so the min version
    # is in the 0th element
    my @files = all_files_in_dir($base_dir);
    my $file =  $files[0];
    my $min_perl = $file;
    $min_perl =~ s,.*/,,;    # The name is the integer of __MIN_PERL__

    # There are a very few special cases that we may not find in scanning, but
    # exist all the way back.  Add them now to avoid throwing later things
    # off.
    print "-- $file --\n";
    open F, ">>$file" or die "$file: $!\n";
    for (qw(RETVAL CALL THIS)) { # These are also in hard_to_test_for(),
                                 # so can't be in blead, as they are skipped
                                 # in testing, so no real need to check that
                                 # they aren't dups.
        print "Adding $_ to $file\n";
        print F format_output_line($_, 'X');
    }
    close F;

    # Now we're going to add the hard to test symbols.  The hash has been
    # manually populated and commited, with the version number ppport supports
    # them to.
    #
    # This is a hash ref with the keys being all symbols found in all the
    # files in the directory, and the values being the perl versions of each
    # symbol.
    my $todo = parse_todo($todo_dir);

    # The keys of $hard_to_test_ref are the symbols, and the values are
    # subhashes, with each 'version' key being its proper perl version.
    # Below, we invert %hard_to_test, so that the keys are the version, and
    # the values are the symbols that go in that version
    my %add_by_version;
    for my $hard (keys %$hard_to_test_ref) {

        # But if someone ups the min version we support, we don't want to add
        # something less than that.
        my $version = int_parse_version($hard_to_test_ref->{$hard});
        $version = $min_perl if $version < $min_perl;
        $version = format_version_line($version);

        push @{$add_by_version{$version}}, $hard
                unless grep { $todo->{$_}->{version} eq $hard } keys %$todo;
    }

    # Only a few files will have exceptions that apply to them.  Rewrite each
    foreach my $version (keys %add_by_version) {
        my $file = "$todo_dir/" . int_parse_version($version);
        print "-- Adding known exceptions to $file --\n";
        my $need_version_line = ! -e $file;
        open F, ">>$file" or die "$file: $!\n";
        print F format_version_line($version) . "\n" if $need_version_line;
        foreach my $symbol (sort dictionary_order @{$add_by_version{$version}})
        {
            print "adding $symbol\n";
            print F format_output_line($symbol, 'X');
        }
        close F;
    }
}

# Now that we've added the exceptions to a few files, we can parse
# and deal with all of them.
my $perls_ref = get_and_sort_perls(\%opt);

die "Couldn't find any perls" unless @$perls_ref > 1;

find_first_mentions($perls_ref,   # perls to look in
                    \@provided,   # List of symbol names to look for
                    '*.h',        # Look in all hdrs.
                    1,            # Strip comments
                   'M'
                   );

# Now look for functions that we didn't test in mktodo.pl, generally because
# these were hidden behind #ifdef's.
my $base_ref = parse_todo($base_dir);
my @functions = parse_embed(qw(parts/embed.fnc));

# We could just gather data for the publicly available ones, but having this
# information available for everything is useful (for those who know where to
# look)
#@functions = grep { exists $_->{flags}{A} } @functions;

# The ones we don't have info on are the ones in embed.fnc that aren't in the
# base files.  Certain of these will only be in the Perl_foo form.
my @missing = map { exists $base_ref->{$_->{name}}
                    ? ()
                    : ((exists $_->{flags}{p} && exists $_->{flags}{o})
                       ? ((exists $base_ref->{$_->{"Perl_$_->{name}"}}
                           ? ()
                           : "Perl_$_->{name}"))
                       : $_->{name})
                  } @functions;

# These symbols will be found in the autogen'd files, and they may be
# commented out in them.
find_first_mentions($perls_ref,
                    \@missing,
                    [ 'embed.h', 'proto.h' ],
                    0,          # Don't strip comments
                   'F'
                   );

sub format_output_line
{
    my $sym = shift;
    my $code = shift;

    return sprintf "%-30s # $code added by $0\n", $sym;
}

sub find_first_mentions
{
    my $perls_ref =    shift;   # List of perls to look in
    my $look_for_ref = shift;   # List of symbol names to look for
    my $hdrs =         shift;   # Glob of hdrs to look in
    my $strip_comments = shift;
    my $code           = shift; # Mark entries as having this type

    $hdrs = [ $hdrs ] unless ref $hdrs;

    my @remaining = @$look_for_ref;

    my %v;

    # We look in descending order of perl versions.  Each time through the
    # loop @remaining is narrowed.
    for my $p (@$perls_ref) {
        print "checking perl $p->{version}...\n";

        # Get the hdr files associated with this version
        my $archlib = `$p->{path} -MConfig -l -e 'print \$Config{archlib}'`;
        chomp $archlib;
        local @ARGV;
        push @ARGV, glob "$archlib/CORE/$_" for @$hdrs;

        my %sym;

        # %sym's keys are every single thing that looks like an identifier
        # (beginning with a non-digit \w, followed by \w*) that occurs in all
        # the headers, regardless of where (outside of comments).
        local $/ = undef;
        while (<>) {  # Read in the next file

            # Strip comments, from perl faq
            if ($strip_comments) {
                s#/\*[^*]*\*+([^/*][^*]*\*+)*/|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)#defined $2 ? $2 : ""#gse;
            }

            $sym{$_}++ for /(\b[^\W\d]\w*)/g;
        }

        # @remaining is narrowed to include only those identifier-like things
        # that are mentioned in one of the input hdrs in this release.  (If it
        # isn't even mentioned, it won't exist in the release.)  For those not
        # mentioned, a key is added of the identifier-like thing in %v.  It is
        # a subkey of this release's "todo" release, which is the next higher
        # one.  If we are at version n, we have already done version n+1 and
        # the provided element was mentioned there, and now it no longer is.
        # We take that to mean that to mean that the element became provided
        # for in n+1.
        @remaining = map { $sym{$_} or $v{$p->{todo}}{$_}++;
                            $sym{$_} ? $_ : ()
                        } @remaining;

    }

    $v{$perls_ref->[-1]{file}}{$_}++ for @remaining;

    # Read in the parts/base files.  The hash ref has keys being all symbols
    # found in all the files in base/, which are all we are concerned with
    # became defined in.
    my $base_ref = parse_todo($base_dir);


    # Now add the results from above.  At this point, The keys of %v are the 7
    # digit BCD version numbers, and their subkeys are the symbols provided by
    # D:P that are first mentioned in this version, like this:
    #   '5009002' => {
    #                  'MY_CXT_CLONE' => 1,
    #                  'SV_NOSTEAL' => 1,
    #                  'UTF8_MAXBYTES' => 1
    #                },

    for my $v (keys %v) {

        # Things listed in blead (the most recent file) are special.  They are
        # there by default because we haven't found them anywhere, so they
        # don't really exist as far as we can determine, so shouldn't be
        # listed as existing.
        next if $v > $perls_ref->[0]->{file};

        # @new becomes the symbols for version $v not already in the file for
        # $v
        my @new = sort dictionary_order grep { !exists $base_ref->{$_} }
                                                                keys %{$v{$v}};
        @new or next; # Nothing new, skip writing

        my $file = $v;
        $file =~ s/\.//g;
        $file = "$base_dir/$file";
        -e $file or die "non-existent: $file\n";
        print "-- $file --\n";
        $write and (open F, ">>$file" or die "$file: $!\n");
        for (@new) {
            print "adding $_\n";
            $write and print F format_output_line($_, $code);
        }
        $write and close F;
    }
}
