#!/bin/sh
#\- compact long lines and cut out garbage in whatis database
#usage: %[/usr/bin/{apropos,whatis} keyword-list] | whatis-filter-jar
#some lines of whatis database have a long list of files/programs 
#or a long decription
#this filter compacts both parts of the whatis database line if size
#limits are exceeded
#then sorts and retains only unique entries

cat $* |
 	sed -e "s/,.*^M//" -e "s/[*][*][*][*][*][*][*].*$//" |
	awk '
length > 79	{
	bpt = index($0," - ")
	alen = bpt
	if (bpt > 50) alen = 50 
	Astr = substr($0,1,alen)
	Bstr = substr($0,bpt,159 - alen)
	print Astr Bstr 
	next
}
	{
	print 
}' |
	sort -fbu #| uniq -u

	
