#! /usr/bin/env xs

# Clean the $history file by removing:
#  a) leading and trailing blanks
#  b) one-word commands
#  c) syntactically invalid commands
#  d) consecutive repetitions of commands
#  e) if the remaining file has at least 1,500 but fewer than 3,000 lines,
#     remove duplicates from the first 2/3 of the file; if there are at
#     least 3,000 lines, remove duplicates from all but the last 500 lines.
#  f) if, after step (e) the remaining file has more than 3,000 lines,
#     delete all but the last 3,000 lines.

if {!~ $history () && access -f $history} {
	hstfile = $history
	history -n
	lc_start = `{cat $hstfile | wc -l}
	tmpfile = `mktemp
	count = 0
	line = go
	until {~ $line ()} {
		count = `($count + 1)
		~ `($count % 100) 0 && {printf \r%d%% `(100*$count/$lc_start) >[1=2]}
		line = <=read
		line = `` '' {echo $line | sed 's/^ \+//;s/ \+$//'|tr -d \n}
		drop = <={{!~ $line *\ *} \
				|| {echo $line | !xs -n >[2]/dev/null} \
				|| {$line :eq $prev}}
		result $drop || {echo $line; prev = $line}
	} <$hstfile >$tmpfile
	lc_pass1 = `{cat $tmpfile | wc -l}
	if {$lc_pass1 :ge 1500} {
		if {$lc_pass1 :ge 3000} {
			len0 = `($lc_pass1-500)
		} else {
			len0 = `($lc_pass1*2/3)
		}
		prefix = /tmp/$pid^_
		split -l $len0 -d -a 1 $tmpfile $prefix
		cat <{cat $prefix^0 | sort | uniq | shuf} $prefix^1 > $tmpfile
		rm $prefix^?
		lc_pass2 = `{cat $tmpfile | wc -l}
		if {$lc_pass2 :gt 3000} {
			len0 = `($lc_pass2-3000)
			split -l $len0 -d -a 1 $tmpfile $prefix
			cat $prefix^1 > $tmpfile
			rm $prefix^?
		}
	}
	cp $tmpfile $hstfile
	rm $tmpfile
	lc_finish = `{cat $hstfile | wc -l}
	printf \r'Removed %d lines'\n `($lc_start - $lc_finish)
	history -y
}
