Mercurial > forge

#!/usr/bin/env perl
#
# Traverse the directory tree and extract author information from .m
# files.  Put author information in the file 'AUTHORS'.
# Understands variations of these entries:
#
#   ## Copyright (C)  year(s)  name1   <email>
#   ## Copyright (C)  year(s)  name2   <email>
#   ## Author:  name3  <email>
#   ## Author:  name4  <email>
#
# Albert Danial Dec 15 2001
#               Nov 30 2002 - Attribute to 'Anonymous' any file which has
#                             licensing terms (ie, granted to public domain)
#                             but no author.  Show code and comment line
#                             counts.
#               Apr 25 2005 - Added -s option, better logic to identify
#                             public domain files.
# Dirk Eddelbuettel 07 Jul 2004  also look at .cc files and // comments
#
# Usage:  admin/get_authors  [-s <# lines>]
#            Scans .m and .cc files and extracts author and copyright
#            information.  Creates the AUTHORS file.
#                       Option:  -s  <n>    Set the size of a "short" program
#                                           to <n> non-commented lines.
#                                           Copyrights aren't needed for short
#                                           programs.  Default <n> is 5.
#
use warnings;
use strict;
use vars qw($opt_s );
use Cwd;
use Getopt::Std;
use File::Find;

getopts('s:');
$opt_s = 5 unless $opt_s;  # set the default value
my $location = getcwd;
my $PATH     = "";
if ($location =~ m{^(.*)/admin$}) {
    chdir "..";
    $PATH = "$1/";
}

my @files = ();       # to contain relative path names of each .m file
find(\&wanted, ".");  # start here & descend recursively; populate @files

my %file_data = ();
classify_files(\@files,       # in
               \%file_data);  # out file_data{file}{'name'} = [ names ]
                              #                    {'year'} = [ years ]
                              #                    {'mail'} = [ email addrs ]
                              #                    {'cpyr'} = [ 'C' or ' '  ]
                              #                    {'n_code'}    = # lines code
                              #                    {'n_comment'} = # lines of
                              #                                    comments

# traverse file_data and extract
#   - files without copyrights
#   - files without authors
#   - files grouped by author
#   - author to email map
#   - number of lines of code, number of lines of comments
my @unattributed_files  = ();
my @uncopyrighted_files = ();
my @short_files         = ();
my @public_domain       = ();
my %email               = ();   # email{ author name } = email address
my %files               = ();   # files{ author name } = [ list of files ]
foreach my $f (sort @files) {
    # each file can have multiple authors, loop over each author
    if (!defined @{$file_data{$f}{name}}) {
	if ($file_data{$f}{n_code} > $opt_s) { # not a short program
           push @unattributed_files, $f;
        } else {
           push @short_files       , $f;
        }
        next;
    }
    my $copyrighted = 0;
    for (my $i = 0; $i < scalar @{$file_data{$f}{name}}; $i++) {
        if (defined $file_data{$f}{mail}[$i]) {
            $email{ $file_data{$f}{name}[$i] } = $file_data{$f}{mail}[$i];
        }
        if (defined $file_data{$f}{cpyr}[$i]) {
            $copyrighted = 1;
            push @public_domain, $f if $file_data{$f}{cpyr}[$i] eq "P";
        }
        $files{ $file_data{$f}{name}[$i] }{$f} = 1;
    }
    next if $copyrighted;
    if ($file_data{$f}{n_code} > $opt_s) { # then it is not a short program
        push @uncopyrighted_files, $f;
    } else {
        push @short_files        , $f;
    }
}

printf "%3d uncopyrighted files:%s lines of code/lines of comments\n",
       scalar @uncopyrighted_files, ' ' x 22;
foreach my $f (sort @uncopyrighted_files) {
    printf "      %-50s %3d/%3d\n",
           $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}

printf "\n%3d unattributed files:%s lines of code/lines of comments\n",
       scalar @unattributed_files, ' ' x 23;
foreach my $f (sort @unattributed_files) {
    printf "      %-50s %3d/%3d\n",
           $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}

printf "\n%3d public domain files:%s lines of code/lines of comments\n",
       scalar @public_domain, ' ' x 22;
foreach my $f (sort @public_domain) {
    printf "      %-50s %3d/%3d\n",
           $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}

printf "\n%3d uncopyrighted short (<= %2d lines) files:%s lines of code/lines of comments\n",
       scalar @short_files, $opt_s, ' ' x 2;
foreach my $f (sort @short_files) {
    printf "      %-50s %3d/%3d\n",
           $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}

my $Auth_file = "AUTHORS";
open(OUT, ">$Auth_file") or die "Cannot write $Auth_file:  $!\n";
printf "%3d authors:\n", scalar keys %files;
foreach my $n (sort keys %files) {
    printf     "%-28s", $n;
    printf OUT "%-28s", $n;
    print     $email{$n} if defined $email{$n};
    print OUT $email{$n} if defined $email{$n};
    print     "\n";
    print OUT "\n";
    my $i = 0;
    foreach my $f (sort keys %{$files{$n}}) {
        printf "%3d. %-50s  %3d/%3d\n",
               ++$i, $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
    }
}
close OUT;
warn "Wrote ${PATH}${Auth_file}\n";

# # # # # # #

#sub by_last_name { # {{{1 for sorting on names
#    (my $A = $a) =~ s/.*?(\w+)$/$1/;
#    (my $B = $b) =~ s/.*?(\w+)$/$1/;
#    return lc($A) cmp lc($B);
#} # 1}}}

# # # # # # #

sub classify_files { # {{{1
    my ($ra_files,  # in, list of files
        $rhha_data, # out,  data{file}{ name | year | cpyr } = [entries]
       ) = @_;
    warn "Found ", scalar grep (/\.m$/,  @{$ra_files}), " .m files; ",
                   scalar grep (/\.cc$/, @{$ra_files}), " .cc files",
                   "\n";
    foreach my $f (@{$ra_files}) {
        open(IN, $f) or die "Cannot read $f: $!\n";
        my $found_copyright = 0;
        my $found_author    = 0;
        $rhha_data->{$f}{n_code}    = 0;
        $rhha_data->{$f}{n_comment} = 0;
        while (<IN>) {
            # find the copyright line and extract author info from it
            if (/^[#%\/]+/) {           # a comment
                ++$rhha_data->{$f}{n_comment};
            } elsif (!/^\s*$/) {        # not a blank line
                ++$rhha_data->{$f}{n_code};
            }
            s/all\s+rights\s+reserved\.?//i;
            s/\bby\s+//i;

            if (/^\s*[#%\/\*]*          #  one or more leading comment markers
                  \s*copyright          #  Copyright
                  \s*(\(c\))?           #  (c)    - optional           $1
                  \s*(\d[,\- 0-9]+)     #  Year (or years)             $2
                  \s+(\w.*?)            #  name                        $3
                  \s*(<.*>)?            #  email  - optional           $4
                  \s*$/ix) {
                $found_copyright = 1;
                $found_author    = 1;
                my $year  = $2;
                my $name  = $3;
                   $name  = "John W. Eaton" if $name eq "jwe";
                   $name  =~ s/\.\s*$//; # strip trailing period
                my $email = "" || $4;
                $name =~ s/^\s+//;  # strip leading  whitespace
                $name =~ s/\s+$//;  # strip trailing whitespace
                push @{$rhha_data->{$f}{name}}, $name;
                push @{$rhha_data->{$f}{year}}, $year;
                push @{$rhha_data->{$f}{mail}}, $email;
                push @{$rhha_data->{$f}{cpyr}}, 'C';
                # don't exit w/last because there could be multiple copyrights
            } elsif (
                /^\s*[#%\/\*]*            # one or more leading comment markers
                    \s*author\s*:?        #  Author
                    \s+(\w.*?)            #  name                        $1
                    \s*(<.*>)?            #  email  - optional           $2
                    \s*$/ix) {
                my $name  = $1;
                   $name  =~ s/\.\s*$//; # strip trailing period
                   $name  = "John W. Eaton" if $name eq "jwe";
                my $email = "" || $2;
                push @{$rhha_data->{$f}{name}}, $name;
                push @{$rhha_data->{$f}{year}}, "";
                push @{$rhha_data->{$f}{mail}}, $email;
                $found_author = 1;
            } elsif (
                /^\s*[#%\/\*]*            # one or more leading comment markers
                  .*?                     #  some leading text
                  \b(grant(ed)?|place(d)?|give(n)?|is)(\s+this)?
                  (\s+(file|program|script|code|algorithm))?(\s+(in|to))?
                  (\s+the)?
                  \s+public\s+domain
                /ix) {
                push @{$rhha_data->{$f}{cpyr}}, 'P';
                $found_copyright = 1;
            }
        }
        if ($found_copyright and !$found_author) {
            push @{$rhha_data->{$f}{name}}, "Anonymous";
            push @{$rhha_data->{$f}{year}}, "";
            push @{$rhha_data->{$f}{mail}}, "";
            push @{$rhha_data->{$f}{cpyr}}, 'C';
        }
        close IN;
    }
} # 1}}}

# # # # # # #

sub wanted { # {{{1 populates global array @files
    return unless -f and /\.(m|cc)$/;  # only want .m files (for now)
    push @files, "$File::Find::name";
} # 1}}}
author	schloegl
date	Sat, 12 Sep 2015 14:16:39 +0000
parents	0bcea6c6961d
children