Windows-Server-2003/tools/parsetable.pm

#---------------------------------------------------------------------
package ParseTable;
#
#   Copyright (c) Microsoft Corporation. All rights reserved.
#
# Version: 1.00 (07/12/2000) : (JeremyD) inital version
#          1.01 (08/25/2000) : (JeremyD) allow single heading tables
#---------------------------------------------------------------------
use strict;
use vars qw(@ISA @EXPORT $VERSION);
use IO::File;
use Carp;
use Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(parse_table_lines parse_table_file);

$VERSION = '1.01';


sub parse_table_lines (\@;$) {
    my $lines_ref = shift; # the array of lines is modified in place
    my $storage = shift; # an array or hash ref to stuff the data in, if 
                         #  this is not a ref we quietly discard the data
                         #  this could be useful to skip one table
    my @heading; # the current set of headings

  LINE:
    while (my $line = shift @$lines_ref) {
        chomp $line;
        next LINE if $line =~ /^\s*$/; # skip empty lines
        if ($line =~ /^\s*[#;](.*)/) { # comments may contain headings
            my $comment = $1;
            if ($comment =~ /^\s*(?:\[\w+\]\s*)+$/) { # bracketed names seperated
                                                    #  by whitespace
                if (@heading) { # already have headings, must be a new table
                    unshift @$lines_ref, $line; # this line is part of the next
                                                #  table, we need to put it back
                    last LINE; # a new table implies the end of the current one
                } else { # found our first set of headings
                    while ($comment =~ /\[(\w+)\]/g) { # look for headings
                        push @heading, $1;
                    }
                }
            }
            next LINE; # done parsing this comment
        }

        next unless @heading; # no data processing until we have our headings

        # fields are seperated by 2 or more white space characters, however
        #  a single tab will also suffice
        my @data = split /(?=\t)\s+|\s{2,}/, $line;

        next unless $#heading == $#data; # require 1 data field per heading

        # use our current headings as keys and make a hash of the data
        my %hash;
        for (my $i=0; $i<@heading; $i++) {
            $hash{$heading[$i]} = $data[$i];
        }

        # store our current line's data in the reference passed to us
        if (ref $storage eq 'ARRAY') {
            push @$storage, \%hash;
        } elsif (ref $storage eq 'HASH') {
            $storage->{$data[0]} = \%hash;
        } else {
            # do nothing
            #  this allows skipping a table by passing in a non-ref storage
        }
    }

    # the data array was modified in place, parsed lines have been removed
    #  successive calls will parse any remaining tables found in the data array
    #  return the number of unparsed lines, 0 indicates no remaining tables
    return scalar @$lines_ref;
}

sub parse_table_file ($;@) {
    my $filename = shift;
    my @store_refs = @_;
    my $fh = new IO::File $filename, "r";
    if (defined $fh) {
        my @lines = $fh->getlines;
        my $i = 0;
        while (@lines) {
            parse_table_lines(@lines, $store_refs[$i++]);
        }
        undef $fh;
    } else {
        croak "Unable to open file $filename: $!";
    }
}


1;

__END__

=head1 NAME

ParseTable - Extract data from a formatted text table

=head1 SYNOPSIS

  use ParseTable;

  parse_table_file("foobar.txt", \%table_one, \@table_two, ...);

  $lines_remaining = parse_table_lines(@data_lines,\%table);


=head1 DESCRIPTION

This module provides an easy way to extract formatted data from text files. 

=over 4

=item parse_table_file( $filename, @storage_refs )

parse_table_file takes a filename to parse and a list of storage locations 
for the tables found within that file.

=item parse_table_lines( @data_lines, $storage_ref )

parse_table_lines takes an array of data lines and a storage location for 
the first table found in the lines. It modifies the array in place and returns 
the number of unparsed lines.

=back

The format for a table is:

 ;comments
 ; [heading1] [heading2]
 item1  item2
 item3 with internal space  item4
 item5	item6


Each line of data in a table is stored as a hash with the heading names as 
keys and the data items as values.

If an array reference is specified as the storage location the data hash for 
each line will be pushed on to the array.

If the storage location is a hash reference then the data hash for each line 
will be stored using the value of the first column as the key. In the case of 
duplicate data items the last one appearing in the table takes precedence.

=head1 EXAMPLES

 parse_table_file("codetable.txt",\@data)
 for $data (@data) {
     print "$data->{Lang} is the lang code for $data->{Comments}\n";
 }


 parse_table_file("codetable.txt",\%data,\%flavors)
 print "your site is $data->{$user_lang}{Site}\n";
 print "your flavor is $flavors->{$user_lang}{$user_arch}\n";


 codetable.txt:
 ;
 ;     This is just an example of a file with two tables
 ;


 ;[Lang] [LCID] [Class] [Site]  [Comments] 
 ;-------------------------------------------------------------
 ;       
 ARA  0x0401    @CS    REDMOND  Arabic
 CHS  0x0804    @FE    REDMOND  Chinese Simplified (PR China)
 CHT  0x0404    @FE    REDMOND  Chinese Traditional (Taiwan Region)
 CHH  0x0404    @FE    REDMOND  Chinese Traditional (Hong Kong Region)
 FR   0x040C    @EU    DUBLIN   French
 GER  0x0407    @EU    REDMOND  German
 ;[Lang]        [x86]                 [ia64]
 ;=============================================
 USA            per;pro;srv;ads;dtc   pro;ads;dtc
 GER            per;pro;srv;ads       pro;ads
 CHT            per;pro;srv;ads       pro;ads
 CHH            per;pro;srv;ads       pro;ads
 CHS            per;pro;srv;ads       pro;ads
 ARA            per;pro               pro

=head1 NOTES

The parser can handle blank lines and comments beginning with either ';' or 
'#'.

A heading line must appear before any data lines. A heading line is a special 
form of comment consisting of field names enclosed in brackets [].

Data lines must have exactly as many fields as heading lines.

Data fields must be seperated by 2 or more spaces. Single spaces within data 
items do not require quoting or escaping.

Quoting and escaping are not supported in any way. This means you may not 
have a data field with the value "" (empty string) or more than 1 space in a row.

Storage locations are not before parsing begins.

Heading names must match the regex /\w+/.

Should probably be expanded to handle returning a plain array for single column 
tables (lists of filenames, etc).

=head1 SEE ALSO

  hashtext.pm

=head1 AUTHOR

Jeremy Devenport <JeremyD>

=head1 COPYRIGHT

Copyright (c) Microsoft Corporation. All rights reserved.

=cut
Initiall commit 2024-08-04 01:28:15 +02:00			`#---------------------------------------------------------------------`
			`package ParseTable;`
			`#`
			`# Copyright (c) Microsoft Corporation. All rights reserved.`
			`#`
			`# Version: 1.00 (07/12/2000) : (JeremyD) inital version`
			`# 1.01 (08/25/2000) : (JeremyD) allow single heading tables`
			`#---------------------------------------------------------------------`
			`use strict;`
			`use vars qw(@ISA @EXPORT $VERSION);`
			`use IO::File;`
			`use Carp;`
			`use Exporter;`
			`@ISA = qw(Exporter);`
			`@EXPORT = qw(parse_table_lines parse_table_file);`

			`$VERSION = '1.01';`


			`sub parse_table_lines (\@;$) {`
			`my $lines_ref = shift; # the array of lines is modified in place`
			`my $storage = shift; # an array or hash ref to stuff the data in, if`
			`# this is not a ref we quietly discard the data`
			`# this could be useful to skip one table`
			`my @heading; # the current set of headings`

			`LINE:`
			`while (my $line = shift @$lines_ref) {`
			`chomp $line;`
			`next LINE if $line =~ /^\s*$/; # skip empty lines`
			`if ($line =~ /^\s[#;](.)/) { # comments may contain headings`
			`my $comment = $1;`
			`if ($comment =~ /^\s(?:\[\w+\]\s)+$/) { # bracketed names seperated`
			`# by whitespace`
			`if (@heading) { # already have headings, must be a new table`
			`unshift @$lines_ref, $line; # this line is part of the next`
			`# table, we need to put it back`
			`last LINE; # a new table implies the end of the current one`
			`} else { # found our first set of headings`
			`while ($comment =~ /\[(\w+)\]/g) { # look for headings`
			`push @heading, $1;`
			`}`
			`}`
			`}`
			`next LINE; # done parsing this comment`
			`}`

			`next unless @heading; # no data processing until we have our headings`

			`# fields are seperated by 2 or more white space characters, however`
			`# a single tab will also suffice`
			`my @data = split /(?=\t)\s+\|\s{2,}/, $line;`

			`next unless $#heading == $#data; # require 1 data field per heading`

			`# use our current headings as keys and make a hash of the data`
			`my %hash;`
			`for (my $i=0; $i<@heading; $i++) {`
			`$hash{$heading[$i]} = $data[$i];`
			`}`

			`# store our current line's data in the reference passed to us`
			`if (ref $storage eq 'ARRAY') {`
			`push @$storage, \%hash;`
			`} elsif (ref $storage eq 'HASH') {`
			`$storage->{$data[0]} = \%hash;`
			`} else {`
			`# do nothing`
			`# this allows skipping a table by passing in a non-ref storage`
			`}`
			`}`

			`# the data array was modified in place, parsed lines have been removed`
			`# successive calls will parse any remaining tables found in the data array`
			`# return the number of unparsed lines, 0 indicates no remaining tables`
			`return scalar @$lines_ref;`
			`}`

			`sub parse_table_file ($;@) {`
			`my $filename = shift;`
			`my @store_refs = @_;`
			`my $fh = new IO::File $filename, "r";`
			`if (defined $fh) {`
			`my @lines = $fh->getlines;`
			`my $i = 0;`
			`while (@lines) {`
			`parse_table_lines(@lines, $store_refs[$i++]);`
			`}`
			`undef $fh;`
			`} else {`
			`croak "Unable to open file $filename: $!";`
			`}`
			`}`


			`1;`

			`__END__`

			`=head1 NAME`

			`ParseTable - Extract data from a formatted text table`

			`=head1 SYNOPSIS`

			`use ParseTable;`

			`parse_table_file("foobar.txt", \%table_one, \@table_two, ...);`

			`$lines_remaining = parse_table_lines(@data_lines,\%table);`


			`=head1 DESCRIPTION`

			`This module provides an easy way to extract formatted data from text files.`

			`=over 4`

			`=item parse_table_file( $filename, @storage_refs )`

			`parse_table_file takes a filename to parse and a list of storage locations`
			`for the tables found within that file.`

			`=item parse_table_lines( @data_lines, $storage_ref )`

			`parse_table_lines takes an array of data lines and a storage location for`
			`the first table found in the lines. It modifies the array in place and returns`
			`the number of unparsed lines.`

			`=back`

			`The format for a table is:`

			`;comments`
			`; [heading1] [heading2]`
			`item1 item2`
			`item3 with internal space item4`
			`item5 item6`


			`Each line of data in a table is stored as a hash with the heading names as`
			`keys and the data items as values.`

			`If an array reference is specified as the storage location the data hash for`
			`each line will be pushed on to the array.`

			`If the storage location is a hash reference then the data hash for each line`
			`will be stored using the value of the first column as the key. In the case of`
			`duplicate data items the last one appearing in the table takes precedence.`

			`=head1 EXAMPLES`

			`parse_table_file("codetable.txt",\@data)`
			`for $data (@data) {`
			`print "$data->{Lang} is the lang code for $data->{Comments}\n";`
			`}`


			`parse_table_file("codetable.txt",\%data,\%flavors)`
			`print "your site is $data->{$user_lang}{Site}\n";`
			`print "your flavor is $flavors->{$user_lang}{$user_arch}\n";`



			`codetable.txt:`
			`;`
			`; This is just an example of a file with two tables`
			`;`


			`;[Lang] [LCID] [Class] [Site] [Comments]`
			`;-------------------------------------------------------------`
			`;`
			`ARA 0x0401 @CS REDMOND Arabic`
			`CHS 0x0804 @FE REDMOND Chinese Simplified (PR China)`
			`CHT 0x0404 @FE REDMOND Chinese Traditional (Taiwan Region)`
			`CHH 0x0404 @FE REDMOND Chinese Traditional (Hong Kong Region)`
			`FR 0x040C @EU DUBLIN French`
			`GER 0x0407 @EU REDMOND German`
			`;[Lang] [x86] [ia64]`
			`;=============================================`
			`USA per;pro;srv;ads;dtc pro;ads;dtc`
			`GER per;pro;srv;ads pro;ads`
			`CHT per;pro;srv;ads pro;ads`
			`CHH per;pro;srv;ads pro;ads`
			`CHS per;pro;srv;ads pro;ads`
			`ARA per;pro pro`

			`=head1 NOTES`

			`The parser can handle blank lines and comments beginning with either ';' or`
			`'#'.`

			`A heading line must appear before any data lines. A heading line is a special`
			`form of comment consisting of field names enclosed in brackets [].`

			`Data lines must have exactly as many fields as heading lines.`

			`Data fields must be seperated by 2 or more spaces. Single spaces within data`
			`items do not require quoting or escaping.`

			`Quoting and escaping are not supported in any way. This means you may not`
			`have a data field with the value "" (empty string) or more than 1 space in a row.`

			`Storage locations are not before parsing begins.`

			`Heading names must match the regex /\w+/.`

			`Should probably be expanded to handle returning a plain array for single column`
			`tables (lists of filenames, etc).`

			`=head1 SEE ALSO`

			`hashtext.pm`

			`=head1 AUTHOR`

			`Jeremy Devenport <JeremyD>`

			`=head1 COPYRIGHT`

			`Copyright (c) Microsoft Corporation. All rights reserved.`

			`=cut`