#!/usr/bin/perl
# fields -- lintian check script

# Copyright (C) 1998 by Richard Braakman
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.

%known_archs = map { $_ => 1 }
('i386', 'm68k', 'alpha', 'powerpc', 'sparc', 'arm', 'hurd-i386', 'any', 'all');

%known_sections = map { $_ => 1 }
('admin', 'base', 'comm', 'devel', 'doc', 'editors', 'electronics',
 'games', 'graphics', 'hamradio', 'interpreters', 'libs', 'mail',
 'math', 'misc', 'net', 'news', 'oldlibs', 'otherosfs', 'shells',
 'sound', 'tex', 'text', 'utils', 'web', 'x11'
 );

%known_non_us = map { $_ => 1 } ('non-free', 'contrib', 'main' );

%known_distributions = map { $_ => 1 } ('non-free', 'contrib', 'non-US', 'non-us' );

%known_prios = map { $_ => 1 }
('required', 'important', 'standard', 'optional', 'extra');

%known_source_fields = map { $_ => 1 }
('source', 'version', 'maintainer', 'binary', 'architecture',
 'standards-version', 'files', 'build-depends', 'build-depends-indep', 
 'build-conflicts', 'build-conflicts-indep' );

%known_binary_fields = map { $_ => 1 }
('package', 'version', 'architecture', 'depends', 'pre-depends',
 'recommends', 'suggests', 'conflicts', 'provides', 'replaces',
 'essential', 'maintainer', 'section', 'priority', 'source',
 'description', 'installed-size');

%known_obsolete_fields = map { $_ => 1 }
('revision', 'package-revision', 'package_revision',
 'recommended', 'optional', 'class');

%known_essential = map { $_ => 1 }
('base-files', 'base-passwd', 'bash', 'bsdutils', 'debianutils',
 'diff', 'dpkg', 'e2fsprogs', 'fileutils', 'findutils', 'grep', 'gzip',
 'hostname', 'ldso', 'login', 'mount', 'ncurses-base', 'ncurses-bin',
 'perl-base', 'sed', 'shellutils', 'sysvinit', 'tar', 'textutils',
 'update', 'util-linux');

%obsolete_packages = map { $_ => 1 }
('libstdc++2.8', 'ncurses3.4', 'slang0.99.38', 'newt0.25', 'mesag2',
 'libjpegg6a', 'gmp2', 'libgtop0', 'libghttp0', 'libwraster2',
 'libpgsql', 'tk4.2', 'tcl7.6', 'libpng0g', 'xbase');

($#ARGV == 1) or fail("syntax: field <pkg> <type>");
$pkg = shift;
$type = shift;

unless (-d "fields") {
    fail("directory in lintian laboratory for $type package $pkg missing: fields");
}

# In general, read entire files in one go
undef $/;

# Complex regexps used more than once
$package_re = "[A-Za-z0-9][A-Za-z0-9+.-]+";
$version_re = "([0-9]+:)?([0-9a-zA-Z][0-9a-zA-Z+.:\-]*?)(-[0-9a-zA-Z+.]+)?";
# lenient regexp also allows underlines to appear, which were once used
# in package names and may still appear in Conflicts and Replaces lines
$lenient_package_re = "[A-Za-z0-9][_A-Za-z0-9+.-]+";

if (not open(IN, "fields/package")) {
    if ($type eq 'binary') {
	tag_error("no-package-name");
    }
} else {
    chop($package = <IN>);
    close(IN);
    single_line('package', $package);
    if ($package !~ /^$package_re$/so) {
	tag_error("bad-package-name", $_);
    } elsif ($package =~ /[A-Z]/) {
	tag_error("package-not-lowercase", $_);
    }
}

if (not open(IN, "fields/version")) {
    tag_error("no-version-field");
} else {
    chop($_ = <IN>);
    close(IN);
    single_line('version', $_);
    if (not (@version = /^$version_re$/so)) {
	tag_error("bad-version-number", $_);
    } else {
	# Could have more detailed tags here, but they would be pretty long.
	if ($version[1] =~ /:/ and not $version[0]) {
	    tag_error("bad-version-number", $_);
	} elsif ($version[1] =~ /-/ and not $version[2]) {
	    tag_error("bad-version-number", $_);
	}
	if ($version[1] !~ /^\d/) {
	    tag_error("upstream-version-not-numeric", $version[1]);
	}
    }
}

if (not open(IN, "fields/architecture")) {
    tag_error("no-architecture-field");
} else {
    # Packaging manual 4.2.3 says that multiple architectures should be
    # separated by "spaces", not "whitespace".  But I'll allow any
    # whitespace here anyway.  It's not important enough to check for.
    chop($_ = <IN>);
    @archs = split;
    close(IN);

    single_line('architecture', $_);

    if ($type eq 'source') {
	# Special architectures "all" and "any" should only occur alone.
	if ($#archs > 0 and grep {$_ eq 'all' or $_ eq 'any'} @archs) {
	    tag_error("magic-arch-in-arch-list");
	}

	foreach my $arch (@archs) {
	    if (not exists $known_archs{$arch}) {
		tag_warn("unknown-architecture", $arch);
	    }
	}
    } else {
	if ($#archs > 0) {
	    tag_error("too-many-architectures");
	}
	if ($archs[0] eq 'any') {
	    tag_error("arch-any-in-binary-pkg");
	}
	if (not exists $known_archs{$archs[0]}) {
	    tag_warn("unknown-architecture", $archs[0]);
	}
    }
}

if (not open(IN, "fields/maintainer")) {
    tag_error("no-maintainer-field");
} else {
    chop($_ = <IN>);
    close(IN);

    single_line('maintainer', $_);

    # Zap leading and trailing whitespace
    s/^\s+//;  s/\s+$//;

    # Parse maintainer-address.  Packaging manual 4.2.4 says:
    #   The package maintainer's name and email address. The name should
    #   come first, then the email address inside angle brackets <> (in
    #   RFC822 format).
    # Note that it is _not_ necessary for the name to be in any particular
    # format.  However, lintian will emit warnings if it doesn't look
    # like a full name.
    if (not /(.*?)<(.*?)>(.*)/) {
	if (/@/) {
	    # Name is missing and address does not have <> around it
	    tag_error("maintainer-name-missing", $_);
	} else {
	    # address is missing
	    tag_error("maintainer-address-missing", $_);
	}
    } else {
	($name, $addr, $rest) = ($1, $2, $3);
	# Check that there is something before the address and nothing
	# after it, and that the address looks vaguely like user@domain.foo.
	# Full RFC822 parsing is probably overkill.
	if (not $name) {
	    tag_error("maintainer-name-missing", $_);
	} elsif ($rest or $addr !~ /.+@.+\..+/) {
	    tag_error("maintainer-address-malformed", $_);
	} elsif ($name !~ /\s\S/) {
	    # Also complain if the maintainer name has no embedded spaces
	    tag_warn("maintainer-not-full-name", $name);
	} elsif ($name !~ /\s$/) {
	    # And complain if there is no whitespace between the
	    # name and the address.
	    tag_warn("maintainer-address-looks-weird", $_);
	}
    }
}

if (not open(IN, "fields/source")) {
    if ($type eq 'source') {
	tag_error("no-source-field");
    }
} else {
    chop($_ = <IN>);
    close(IN);

    single_line('source', $_);

    if ($type eq 'source') {
	if ($_ ne $pkg) {
	    tag_error("source-field-does-not-match-pkg-name", $_);
	}
    } else {
	if (not /^($package_re)\s*(?:\(\s*($version_re)\s*\))?$/so) {
	    tag_error("source-field-malformed", $_);
	}
    }
}

if (open(IN, "fields/essential")) {
    chop($_ = <IN>);
    close(IN);

    single_line('essential', $_);

    if ($type eq 'source') {
	tag_error("essential-in-source-package");
    } else {
	if ($_ eq 'no') {
	    tag_warn("essential-no-not-needed");
	} elsif ($_ ne 'yes') {
	    tag_error("unknown-essential-value", $_);
	} elsif (not $known_essential{$pkg}) {
	    tag_warn("new-essential-package");
	}
    }
}

# typo in packaging manual, end of fourth paragraph of 4.2.9:
# "priorities" should be "sections".

if (not open(IN, "fields/section")) {
    # The section and priority fields are mandatory in the debian/control
    # files, but they are not copied to the .dsc files, so we can't check
    # them for source packages unless we have a full-blown debian/control
    # parser.
    # It is an informational tag, because most packages do not yet use
    # -isp when building packages, and it is not yet policy to do so.
    tag_warn("no-section-field")
	unless $type eq 'source';
} else {
    chop($section = <IN>);
    close(IN);

    single_line('section', $section);
    
    # Packages in the main distribution have a simple section field,
    # but others have "non-free/section" or "contrib/section".
    @foo = split(/\//, $section, 2);
    if ($#foo > 0) {
	if (not exists $known_distributions{$foo[0]}) {
	    tag_warn("unknown-section", $section);
	}
	if ( $foo[0] eq 'non-us' ) {
		tag_info("non-us-spelling");
	}
	if ( $foo[0] =~ m/non-us/i )
	{
	    if (not exists $known_non_us{$foo[1]}) {
		tag_warn("unknown-section", $section);
	    }
	}
	else	
	{
	    if (not exists $known_sections{$foo[1]}) {
		tag_warn("unknown-section", $section);
	    }
	}
    }
    else {
    	if (not exists $known_sections{$foo[0]}) {
		tag_warn("unknown-section", $section);
	}
    }
}
    
if (not open(IN, "fields/priority")) {
    tag_warn("no-priority-field")
	unless $type eq 'source';
} else {
    chop($_ = <IN>);
    close(IN);

    single_line('priority', $_);

    if (not exists $known_prios{$_}) {
	tag_error("unknown-priority $_");
    }
}

if ($type eq 'binary') {
    for $fld ('depends', 'pre-depends', 'recommends', 'suggests', 'conflicts',
	      'provides', 'replaces') {
	next if not open(IN, "fields/$fld");
	chop($_ = <IN>);
	close(IN);

	single_line($fld, $_);
	
	# zap whitespace at the edges
	s/^\s+//;  s/\s+$//;
	
	@conjunctions = split(/\s*,\s*/);
	for $conj (@conjunctions) {
	    @alternates = split(/\s*\|\s*/, $conj);
	    if ($#alternates >= 1 and
		$fld ne 'depends' and $fld ne 'recommends' and
		$fld ne 'suggests' and $fld ne 'pre-depends') {
		tag_error("alternates-not-allowed", "$fld:", $conj)
		}
	    for $alt (@alternates) {
		if ($alt =~ m/^(\S+)\s*\((<<|<=|=|>=|>>|<|>)\s*(\S+)\s*\)$/) {
		    ($relpkg, $relation, $version) = ($1, $2, $3);
		    
		    tag_error("versioned-provides")
			if $fld eq 'provides';
		    
		    tag_warn("obsolete-relation-form", "$fld:", $alt)
			if ($relation eq '<' or $relation eq '>');
		    
		    tag_warn("bad-version-in-relation", "$fld:", $alt)
			unless $version =~ m/^$version_re$/so;
		    
		    $versioned = 1;
		} else {
		    $relpkg = $alt;
		    $versioned = 0;
		}

		if (not $relpkg =~ m/^$package_re$/so) {
		    tag_error("bad-relation", "$fld:", $alt)
			unless (($fld eq 'conflicts' or $fld eq 'replaces')
				and $relpkg =~ m/^$lenient_package_re$/so);
		}

		if ($relpkg eq $pkg) {
		    tag_warn("package-relation-with-self", "$fld:", $alt)
			unless ($fld eq 'conflicts' and not $versioned);
		}

		if ($pkg eq "$relpkg-doc" and $fld eq 'depends') {
		    tag_warn("doc-package-depends-on-main-package", "$fld:", $alt);
		}

		if (($fld eq 'depends' or $fld eq 'pre-depends') and
		    (exists $obsolete_packages{$relpkg} or
		     $relpkg =~ m/^libgtk1\.1/)) {
		    tag_error("depends-on-obsolete-package", "$fld:", $alt)
			unless $pkg eq "$relpkg-dev" or $pkg eq "$relpkg-dbg";
		}
	    }
	}
    }
}

# Not really anything to check in the Binary field, except for the
# syntax which I assume dpkg will get right.

# Not much to check about Installed-Size either.  It is generated by
# dpkg-gencontrol automatically.  What could be checked in the future
# is whether the figure matches the actual contents of the .deb.

# The check for the Files field may be a good place to verify the
# md5sums, but dinstall does that already.

# Standards-Version is checked separately.

# Description field is checked separately

# Distribution, Urgency, Date, Format, Changes occur only in .changes files.
# Filename, MSDOS-Filename, Size and MD5sum occur in Packages files but
# not in the package control files themselves.

# Status, Config-Version, and Conffiles occur only in status files.

opendir(FIELDS, "fields") or fail("cannot open fields directory: $!");
@fields = readdir(FIELDS);
closedir(FIELDS);

foreach (@fields) {
    if ($_ eq '.' or $_ eq '..') {
	# skip
    } elsif (exists $known_obsolete_fields{$_}) {
	tag_error("obsolete-field", $_);
    } elsif ($type eq 'source' and not exists $known_source_fields{$_}) {
	tag_info("unknown-field-in-dsc", $_);
    } elsif ($type eq 'binary' and not exists $known_binary_fields{$_}) {
	tag_info("unknown-field-in-control", $_);
    }
}

exit 0;

# -----------------------------------

sub fail {
    if ($_[0]) {
	print STDERR "internal error: $_[0]\n";
    } elsif ($!) {
	print STDERR "internal error: $!\n";
    } else {
	print STDERR "internal error.\n";
    }
    exit 1;
}

sub tag_error {
    my $tag = shift;
    if ($#_ >= 0) {
	# We can't have newlines in a tag message, so turn them into \n
	map { s,\n,\\n, } @_;
	my $args = join(' ', @_);
	print "E: $pkg $type: $tag $args\n";
    } else {
	print "E: $pkg $type: $tag\n";
    }
}

sub tag_warn {
    my $tag = shift;
    if ($#_ >= 0) {
	# We can't have newlines in a tag message, so turn them into \n
	map { s,\n,\\n, } @_;
	my $args = join(' ', @_);
	print "W: $pkg $type: $tag $args\n";
    } else {
	print "W: $pkg $type: $tag\n";
    }
}

sub tag_info {
    my $tag = shift;
    if ($#_ >= 0) {
	# We can't have newlines in a tag message, so turn them into \n
	map { s,\n,\\n, } @_;
	my $args = join(' ', @_);
	print "I: $pkg $type: $tag $args\n";
    } else {
	print "I: $pkg $type: $tag\n";
    }
}

sub single_line {
    my $fieldname = shift;
    my $fieldval = shift;

    if ($fieldval =~ m/\n/) {
	tag_error("multiline-field", $fieldname);
	return undef;
    }

    return 1;
}
