#===========================================================================
#
# NewsHound profile support: block stories unless they match a specific
# scoring profile.
#
# Contributed by James Brown. Thanks JHB!

package Sitescooper::NewsHound;

use strict;

# Scoring Algorythm
# Count number of sentences (.?!)
# Number of hits * 2 on all required words
# 	Plus number of hits on desired words
# Determine percentage of hits to sentences
#
#	Usage: ScoreStory($profileRef, $storyText)
#
sub ScoreStory
{
	my ($profile, $story) = @_;

	my $hits = 0;

	# local $^W = 0;	# turn warnings off if they are on

#print "Story length = " . length($story) . "\n";

	# check for excluded keywords
	# return 0 score if we find any
#print "Checking exclude keywords\n";
	my $keyword;
	foreach $keyword (@{$profile->{exclude}})
	{
		return 0 if ($story =~ /$keyword/si);	# excluded word found
	}

	# make sure all required keywords are present
#print "Checking required keywords\n";
	foreach $keyword (@{$profile->{required}})
	{
		my $hit;

		$hit = CountWordHits($story, $keyword);
		return 0 if ($hit == 0);	# required keyword not found

		$hits += 2 * $hit;
	}

#print "Checking desired keywords\n";
	foreach $keyword (@{$profile->{desired}})
	{
		$hits += CountWordHits($story, $keyword);
	}

	my $sentenceCount = 1;
	while ($story =~ /(\.|\!|\?)/g)
	{
		$sentenceCount++;
	}

#print "Hits: $hits, Sentence count: $sentenceCount\n";
	return $hits >= $sentenceCount ? 100 : int ($hits * 100 / $sentenceCount);
}	

sub CountWordHits
{
	my ($story, $keyword) = @_;
	my $count = 0;

	while ($story =~ /$keyword/sig)
	{
		$count++;
	}
	return $count;
}	

# Load the profile from a file
#	Usage: LoadProfile($filename)
#
sub LoadProfile
{
	my ($filename) = @_;

	my ($prof, $profText);

	return 0 unless open PROFILE, "<$filename";

	# read and parse profile
	while (<PROFILE>)
	{
		$profText .= $_;
	}
	close PROFILE;
	$prof = ParseProfile($profText);

	return $prof;
}	

# Parse the profile
#	Usage: ParseProfile($profileText)
#
sub ParseProfile
{
	my ($profileText) = @_;

	my (@profileLines,
		$profile);

	$profile = {
		name 		=> "",
		required	=> [],
		excluded	=> [],
		desired		=> [],
		score		=> 30,
	};	# profile is a reference to an anon. hash

	@profileLines = split /^/m, $profileText;

	my $field;
	foreach (@profileLines) {
		chomp;

		s/#.*$//g;	# jm: strip comments
		s/^\s+//g;	# strip leading spaces
		s/\s+$//g;	# strip trailing spaces
		next if (/^$/);	# skip blank lines
#print "Line: '$_'\n";

		$field = '' if (/^\w+:/); #if it looks like a field, reset variable

		if (/Name:\s+(.+)$/i)
		{
			$profile->{name} = $1;
			next;
		}

		if (/Score:\s+(\d+)$/i)
		{
			die "Invalid score (0-100): $1\n" if ($1 < 0 || $1 > 100);
			$profile->{score} = $1;
			next;
		}

		if (/Required:\s+(.*)$/i)
		{
			$field = 'required';
			$_ = $1;
		}
			
		if (/Desired:\s+(.*)$/i)
		{
			$field = 'desired';
			$_ = $1;
		}
			
		if (/Exclude:\s+(.*)$/i)
		{
			$field = 'exclude';
			$_ = $1;
		}

		die "No valid field specification found:\n\t$_\n" if ($field eq '');

		# continue splitting lines into the last seen field
		push @{$profile->{$field}}, split /, *|^/;
#print "Pushing " . join(':', split /, *|^/) . " onto $field\n";
	}

	return $profile;
}


# Print the profile
#	Usage: PrintProfile($profileRef)
#
sub PrintProfile
{
	my ($profile) = @_;

	print "Profile: $profile->{name}\n\n";
	print "\tScore: $profile->{score}\n\n";
	print "\tRequired: " . join(', ', @{$profile->{required}}) . "\n\n";
	print "\tExclude: " . join(', ', @{$profile->{exclude}}) . "\n\n";
	print "\tDesired: " . join(', ', @{$profile->{desired}}) . "\n\n";
	print "\n";
}

1;
