#!/bin/sh
# next line is a comment in tcl \
exec tclsh "$0" ${1+"$@"}

package require Tcl 8.3
package require Tcldgr

###########################################################################
#
# IHI
#
# An implementation of Inheritance Hierachies Inference (IHI)
#
# Classifies objects with varying patterns of attributes 
#
# Based on the paper:
#	"A Simple and Efficient Algorithm for Inferring Inheritance Heirarchies"
#	Ivan Moore, Tim Clement
#	Department of Computer Science, University of Manchester
#   Published in TOOLS Europe, 1996, Prentice-Hall
#
# tcl + tcldg   implementation by John Ellson  (ellson@lucent.com)
#
# usage:  ihi <file
#         ihi file
#
# where files contain one object per line each object described
# by a set of features, e.g:
#
#   a b e
#   a b f
#   a c d g i
#   a c d g h j
#   d h
#
# (contrary to this example, the features of each object do
# not have to be sorted, and the objects do not have to have unique
# feature sets)
#
#
###########################################################################

set options(removefeatureedges) true
set options(removefeaturenodes) true
set options(removeobjectnodes) true
set options(labelclasses) true

proc classify {g f} {
    while {![eof $f]} {
        set feature_set [gets $f]
        if {![string length $feature_set]} {continue}
        extract_features $g $feature_set
    }
    introduce_classes $g
    add_inheritance_edges $g
    remove_unnecessary_edges $g
    label_classes $g
    cleanup $g
}

proc extract_features {g object} {
	foreach feature $object {
		if {![catch {$g addnode $feature type feature shape triangle}]} {
			lappend feature_set $feature
		}
	}
	if {![info exists feature_set]} {return}

	# sort feature_set so that objects with the same features but in
	#  different order are not duplicated
	set feature_set [lsort $feature_set]
	if {[catch {$g addnode $feature_set type object shape box} o]} {
		set o [$g findnode $feature_set]
	}
	foreach feature $feature_set {
		catch {$g addedge $feature $o color red}
	}
}

proc introduce_classes {g} {
	global options

	# create the mapping graph  -- introduces all the classes
	foreach feature [$g listnodes type feature] {
		set objects {}
		foreach featureedge [$feature listoutedges] {
			lappend objects [$featureedge headof]
			if {$options(removefeatureedges)} {
				$featureedge delete
			}
		}
		set objects [lsort $objects]

		# if the set of objects matches those of a previously added
		# class node then this addnode operation returns the handle of
		# that existing node rather than creating a new one.
		set class [$g addnode $objects type class label { }]
		$feature addedge $class
	}
	foreach class [$g listnodes type class] {
		foreach o [$class showname] {
			$class addedge $o
		}
	}
}

proc add_inheritance_edges {g} {
	#sort the classes into buckets according to number of subclasses
	# this aids the finding of subclasses which are proper subsets of 
	# superclasses since the subclass must have at least one less member
	set max 0
	foreach class [$g listnodes type class] {
		set count [$class countoutedges]
		lappend classes($count) [$class showname]
		if {$count > $max} {set max $count}
	}

	# add inheritance edges
	for {set i $max} {$i > 1} {incr i -1} {
		for {set j [expr $i -1]} {$j} {incr j -1} {
			if {! [info exists classes($j)]} {continue}
			foreach sub $classes($j) {
				if {! [info exists classes($i)]} {continue}
				set nsub [$g findnode $sub]
				foreach sup $classes($i) {
					# compare till mismatch
					set index 0
					set length [llength $sup]
					foreach object $sub {
						set found 0
						for {set k $index} {$k < $length} {incr k} {
							if {[string equal $object [lindex $sup $k]} {
								set found 1
								break
							}
						}
						if {! $found} {break}
						set index [incr k]
					}
					if {$found} {
						set nsup [$g findnode $sup]
						$nsup addedge $nsub
					}
				}
			}
		}
	}
}

proc remove_unnecessary_edges {g} {
	# remove inheritance edges that are unnecessary due to transtivity
	foreach class [$g listnodes type class] {
		set subs {}
		foreach sub [$class listoutedges] {
			lappend subs [$sub headof]
		}
		foreach sub $subs {
			remove_transitive_recurse $class $sub
		}
	}
}

proc remove_transitive_recurse {sup sub} {
	set subsubs {}
	foreach subsub [$sub listoutedges] {
		lappend subsubs [$subsub headof]
	}
	foreach subsub $subsubs {
		remove_transitive_recurse $sup $subsub
		foreach supsubsub [$subsub listinedges] {
			if {[string equal [$supsubsub tailof] $sup]} {
				$supsubsub delete
			}
		}
	}
}

proc label_classes {g} {
	global options

	# label classes with their aggregate feature set
	#   i.e. the features they add plus the ones they inherit
	if {$options(labelclasses)} {
		foreach class [$g listnodes type class] {
			foreach name [array names features] {unset features($name)}
			foreach feature [list_features_recurse $class] {
				set features($feature) 1
			}
			$class set label [lsort [array names features]]
		}
	}
}

proc list_features_recurse {sup} {
	if {[string equal [$sup set type] feature]} {
		set result [$sup showname]
	} {
		set result {}
		foreach sup [$sup listinedges] {
			set result [concat $result [list_features_recurse [$sup tailof]]]
		}
	}
	return $result
}

proc cleanup {g} {
	global options

	# remove feature nodes
	if {$options(removefeaturenodes)} {
		foreach feature [$g listnodes type feature] {
			$feature delete
		}
	}

	# remove objects and mark instantiable classes
	if {$options(removeobjectnodes)} {
		foreach object [$g listnodes type object] {
			set sup [$object listinedges]
			if {[llength $sup] > 1} {
				error "object has more than one class"
			}
			if {![string length $sup]} {
				$object setattributes peripheries 2 shape {}
			} {
				[$sup tailof] set peripheries 2
				$object delete
			}
		}
	}
}

#######################
# display code

#package require Tk
#set c [canvas .c]
#pack $c
#set v [dgview dynadag]


#######################

if {[string length $argv]} {
    set f [open $argv r]
} {
    set f stdin
}

#set g [dgnew digraphstrict]
set g [dgnew digraph]

classify $g $f

# display graph
set display "dot -Tps | gv -"
set f [open |$display w]
$g write $f
