This program outputs to stdout what is read from stdin, removing duplicated lines.
It is supposed to be efficient but RAM consuming (linear to the number of different lines). More precisely, it builds a balanced tree containing each different line read to know whether a line had been read before or not.
If ever you want the lines sorted, or if you don't mind about it, use sort -u
(************************************************************************) (* (c) 2006 Philippe Wang ( mail@philippewang.info ) *) (* *) (* This program outputs to stdout what is read from stdin, *) (* removing duplicated lines *) (* *) (* GPL >= 2 *) (* *) (************************************************************************) (* $Id: script.unique.whp,v 1.2 2007/08/19 09:40:48 philippeb Exp $ *) module M = Set.Make (String) module Q = Queue open M let tree = ref empty let () = if Array.length Sys.argv > 1 then begin print_string Sys.argv.(0); print_string (" usage: "^Sys.argv.(0)^" outputs to stdout what is read from stdin, removing duplicated lines N.B. you may need a lot of RAM if you give a lot of different lines! "); exit 1 end; try while true do let l = read_line () in if mem l !tree then () else begin tree := add l !tree; print_string l; print_char '\n' end done with End_of_file -> exit 0 (* end of unique.ml *)
(************************************************************************) (* (c) 2006 Philippe Wang ( mail@philippewang.info ) *) (* *) (* This program outputs to stdout what is read from stdin, *) (* removing duplicated lines *) (* *) (* GPL >= 2 *) (* *) (************************************************************************) (* $Id: script.unique.whp,v 1.2 2007/08/19 09:40:48 philippeb Exp $ *) module M = Set.Make (String) module Q = Queue open M let tree = ref empty let lines : string Q.t = Q.create () let () = if Array.length Sys.argv > 1 then begin print_string Sys.argv.(0); print_string (" usage: "^Sys.argv.(0)^" outputs to stdout what is read from stdin, removing duplicated lines N.B. you may need a lot of RAM if you give a lot of different lines! "); exit 1 end; try while true do let l = read_line () in if mem l !tree then () else begin tree := add l !tree; Q.add l lines; end done with End_of_file -> Q.iter (fun s -> print_string s ; print_char '\n') lines (* end of unique.ml *)