open Fclassify
open Syntax

let terms_used : (trm, int * int * int * int) Hashtbl.t = Hashtbl.create 10000

let str_of_training (ftrs, lbl) =
  let fs = List.map fst (FClassifier.list_of_map ftrs) in
  String.concat ", " fs ^ " -> " ^ lbl

let add_xx axs (dp, dn) m dd =
  let di = if List.mem m axs then 1 else 0 in
  Utils.hashtbl_map0 terms_used m
  (fun (i,p,n,d) -> (i+di,p+dp,n+dn,d+dd)) (0,0,0,0)

let add_training_file (classifier : classifier_t) input =
  let (cj, axs, proc, refut) = try Utils.with_in input Training.load_training
  with _ -> failwith ("Error loading " ^ input) in
  Printf.printf "Read %s with %d examples\n%!" input (Hashtbl.length proc);

  Hashtbl.iter (fun m p -> let d = if List.mem m refut then (1,0) else (0,1) in add_xx axs d m p) proc;

  let training = Fclassify.training_to_classifier pos in
  (*List.iter print_endline (List.map str_of_training training);*)

  FClassifier.add_training_exs classifier training


let _ =
  if Array.length Sys.argv < 2 then begin
    Format.printf "Usage: hasher <OUT> [FILE(S)]\n"; exit 1
  end;

  let output = Sys.argv.(1) in

  let classifier =
    try Utils.with_in output FClassifier.load
    with _ -> FClassifier.empty () in
  let (t, f, l) = FClassifier.get_stats classifier in
  Printf.printf "Read: %i training examples\n%!" t;
  Printf.printf "Read: %i features\n%!" f;
  Printf.printf "Read: %i labels\n%!" l;

  for i = 2 to Array.length Sys.argv - 1 do
    add_training_file classifier Sys.argv.(i)
  done;

  Hashtbl.iter (fun k (i, p, n, d) -> Format.printf "%d\t%d\t%d\t%d\t%s\n" i p n d (trm_str k)) terms_used;

  FClassifier.write classifier Sys.argv.(1);
  let (t', _, _) = FClassifier.get_stats classifier in
  Printf.printf "Wrote %s with %i training examples\n%!" output t'
