#!/usr/bin/perl
# sgml-catalog-check.pl -- check sgml catalog file
#
#Author: apharris@onshore.com (A. P. Harris)
#$Date: 1999/07/14 08:03:45 $
#$Revision: 1.11 $
#
#Todo:
#	cross check the links/dtds and make sure they all appear in the
#	  SGML catalog
#	convert to use perl sgml stuff instead of hand-rolling?
#	make a nice lintian script from this
#	deal with declation and notation files

use Getopt::Std;

$Verbose = 1;			# verboseness, 1 == chatty, 2 == loud
$SGMLdir = "debian/tmp/usr/lib/sgml"; # default dir for link making etc
$Catalog = "sgml.catalog";	# default SGML catalog file
$MakeLinks = 0;			# whether to make the SGML links
@MakeDir = ("install", "-d", "-o", "root", "-g", "root", 
	    "-m", "755");	# how to make a directory recursively

$Usage = "Usage: $0 [-l] [-d <SGML dir>] [<SGML catalog file>]
Check SGML catalog file, create the links as documented in the SGML
sub-policy, and also ensure that the files referenced from the catalog
file actually exists.
   -l                   create SGML links if they do not exist
   -d <SGML dir>        base dir, default is $SGMLdir
   -v <number>		verbosity amount, 0=silent, 1=default, 2=debug
   <SGML catalog file>  default is $Catalog
";

$warnings = $errors = 0;	# error and warning count

&getopts('hlv:d:') || die $Usage;

if ( $opt_h ) 
{
    print $Usage;
    $opt_h && exit;		# shut up -w
}
elsif ( $opt_d == 1 ) {
    die "option '-d' must have an argument\n$Usage";
} 
elsif ( $opt_d ) {
    $SGMLdir = $opt_d;
}

if ( defined($opt_v) ) {
    $Verbose = $opt_v;
}

if ( $opt_l ) {
    $MakeLinks = $opt_l;
}

if ( $#ARGV > 0 )
{
    die "too many arguments\n$Usage";
}
elsif ( $#ARGV == 0 ) {
    $Catalog = $ARGV[0];
}

( -f $Catalog ) or
    die "catalog file $Catalog does not exist\n$Usage";
( -d $SGMLdir ) or
    die "SGML directory $SGMLdir does not exist\n$Usage";

open(CAT, "<$Catalog") or
    die "cannot read $Catalog: $!\n";

while (<CAT>) {
    chomp;
    if ( m/^PUBLIC\s+\"([^\"]+)\"\s+\"?([^\s"]+)\"?/ ) {
	( $id, $file ) = ( $1, $2 );
	debug("found identifier \"$id\"");
	debug("source file is $file");
	if ( ! -f "$SGMLdir/$file" ) {
	    error("referenced-file-does-not-exist $SGMLdir/$file");
	    next;
	}
	
	if ( $id =~ m!^(.+)//(?:([^/]+)//)?(ELEMENTS|DOCUMENT|ENTITIES|DTD)\s+([^/]+)//(.+)$! ) {
	    ( $reg, $vendor, $type, $name, $misc ) = ( $1, $2, $3, $4, $5 );

	    if ( $type eq "ENTITIES" ) {
		( ! $file =~ m!^entities/! ) &&
		    error("SGML-entity-not-in-entities-dir $file");
	    } 
	    elsif ( $type eq "DTD" || $type eq "ELEMENTS" ) {
		( ! $file =~ m!^dtd/! ) &&
		    error("SGML-DTD-not-in-dtd-dir $file");
	    }
	    elsif ( $type eq "DOCUMENT" ) {
		( $file =~ m!^dtd/! || $file =~ m!^entities! ) &&
		    error("DOCUMENT-in-dtd-or-entities-dir $file");
	    }
	    else {
		error("identifier-type-not-recognized $type on FPI $id");
	    }
	    
	    # would be nice to check that the DTD file is reasonable
	    # oh well...
	    
	    # now check if the link is there
	    $link = $id;
	    $link =~ s!^(\-|\+)//!!;	# registered mark doesnt need dir
	    $link =~ s/  / /g;		# compact spaces
	    $link =~ s![ %]!_!g;	# convert spaces/% to _
	    $link =~ s!//EN$!!;		# remove language specifier
	    $link =~ s!//EN//.*$!!;	# remove language specifier plus
	    $link =~ s!//!\0!g;		# convert // to NULL
	    $link =~ s!/!_!g;		# convert slash to _
	    $link =~ s!\0!/!g;		# convert NULL to /
	    $link =~ s!/DTD_!/dtd/!;	# it is a DTD
	    $link =~ s!/ELEMENTS_!/dtd/!; # it is a DTD
	    $link =~ s!/ENTITIES_!/entities/!; # it is an entity
	    debug("checking link $link");
	    if ( -l "$SGMLdir/$link" ) {
		1;			# the link is ok, follow it?
	    }
	    elsif ( -f "$SGMLdir/$link" ) {
		error("SGML-entity-link-is-a-file $link");
	    }
	    elsif ( $MakeLinks ) {
		# first lets find out if the dir exists
		($dest = "$SGMLdir/$link") =~ s!/[^/]+$!!;
		if ( ! -d $dest ) {
		    inform("creating dir $dest");
		    system((@MakeDir, $dest)) == 0 or
			die("cannot create dir $dest: $?\n  command was " . join(" ", @MakeDir) . "\n");
		}
		# now lets formulate what to link to
		($dest = $link) =~ s![^/]!!g; # remove non /
		$dest =~ s!/!../!g; # make .. for each /
		$dest .= $file;
		inform("SGML-entity-link-will-be-created $SGMLdir/$link => $dest");
		symlink($dest, "$SGMLdir/$link") or
		    die("cannot create link: $!\n");
	    } else {
		error("SGML-entity-link-does-not-exist $link");
	    }

	    # we don't know what to do with these, if anything
	    $reg = $reg;
	    $vendor = $vendor;
	    $name = $name;
	    $misc = $misc;
	}
	else {
	    error("SGML-identifier-not-in-recognized-form $id");
	    next;
	}
    }
    else {
	debug("skipped catalog line:\n   $_");
	next;
    }
}

if ( $errors ) {
    exit(1);
}
exit(0);

sub debug {
    local($msg) = @_;
    ( $Verbose > 1 ) && warn("D: $msg\n");
}

sub inform {
    local($msg) = @_;
    ( $Verbose ) && warn("N: $msg\n");
}

sub warning {
    local($msg) = @_;
    $warnings++;
    warn("W: $msg\n");
}

sub error {
    local($msg) = @_;
    $errors++;
    warn("E: $msg\n");
}
