%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Author: Jose Santos <jcas81@gmail.com>
% Date: 2009-01-30
%
% This file has the hypothesis generator (could also be seen as the top theory interpreter)
%
% Its focus is to generate hypotheses compatible with a given example
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

:- module(hypothesis_generator,
           [
             hyp/2,
             prove/3
           ]
         ).

% GILPS modules
:- use_module(top_theory, [theory/3, addLiteral2Body/1]).
:- use_module('../utils/list', [elemsAtPos/3, randomMember/2, randomMember/3, split/4]).
:- use_module('../utils/clause', [signatureConstIndexs/2, buildPredCall/4, singletons/2]).
:- use_module('../utils/control', [uniqueInterpretations/3]).
:- use_module('../settings/settings', [setting/2]).

% YAP modules
:- use_module(library(lists), [member/2, reverse/2, memberchk/2]).

:- dynamic cache/2.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% hyp(+Example, -Hypothesis)
%
% Given:
%   Example: an example (a ground fact, e.g. class(dog, mammal).
%
% Returns:
%   Hypothesis: an hypothesis that is generated from this example
%
% Notes:
%   The hypothesis is generated by unfolding the top theory, and is a list of literals.
%   To convert it to a clause use the predicate: literals2clause(+list(Literal), -Hypothesis),
%   from module clause
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

hyp(Example, Hypothesis):-
  prove(Example, Proof, GroundHypothesis),
  proof2Signatures(Proof, Signatures),
  generalize(GroundHypothesis, Signatures, Hypothesis),
  verifySingletons(Hypothesis).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% verifySingletons(+Clause)
%
% Given:
%   Clause: a clause as a list of literals
%
% Succeeds if the number of singletons is within the bounds of minimum and maximum singletons allowed
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

verifySingletons(_Clause):-
  setting(minimum_singletons_in_clause, 0),
  setting(maximum_singletons_in_clause, inf),
  !.%with these settings there is no need to actually count singletons

verifySingletons(Clause):-
  setting(minimum_singletons_in_clause, MinimumSingletons),
  setting(maximum_singletons_in_clause, MaximumSingletons),
  singletons(Clause, Singletons),
  length(Singletons, NumSingletons),
  NumSingletons>=MinimumSingletons,
  NumSingletons=<MaximumSingletons.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% proof2Signatures(+Proof, -Signatures)
%
%   Signatures just has the signatures of the literals appearing in Proof
%
%   E.g.:  tidyProof([modeh(good(+molecule),[]),32,22,30,12,100,20,modeb(atom(+molecule,#atom),
%                    [o]),30,12,100,20,modeb(atom(+molecule,#atom),[c]),30,11], 
%                    [good(+molecule),atom(+molecule,#atom),atom(+molecule,#atom)]).
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

proof2Signatures([], []).
proof2Signatures([H|T], [Signature|R]):-
  H=..[_Mode,Signature,_ConstValues],!,%_Mode is either modeh or modeb
  proof2Signatures(T, R).
proof2Signatures([_|T], R):-
  proof2Signatures(T, R).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% generalizeArgs(+LiteralArgs, +SignatureArgs, +CurrentTermVariables, -GeneralizedArgs, -NewTerm2Variables)
%
% E.g.:
%   generalizeArgs([x,y,3,5], [+char,-char,#int,+int], [], GA, NT)
%         GA = [_A,_B,3,_C],  NT = [(int,_C,5),(char,_B,y),(char,_A,x)]
%   generalizeArgs([graph(x,y),x], [graph(+char,-char),-char], [], GA, NT)
%         GA = [_A,_B,_A],    NT = [(char,_B,y),(char,_A,x)] 
%
%
%  generalized hypothesis is not correct when there is a not in the body.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

generalizeArgs([], [], TermVariables, [], TermVariables).
generalizeArgs([Constant|GroundTerms], [#_Type|SigArgs], CurTermVars, [Constant|GenArgs], NewTermVars):-
  !, %When processing a constant we don't care about CurTermVars
  generalizeArgs(GroundTerms, SigArgs, CurTermVars, GenArgs, NewTermVars).
generalizeArgs([Constant|GroundTerms], [SimpleType|SigArgs], CurTermVars, [Var|GenArgs], NewTermVars):-
  SimpleType=..[IOMode,Type],
  (IOMode='+';IOMode='-'),
  (ground(Constant)-> % this will always be true, except when it is the output variable of a negated literal
     (memberchk((Type,Var,Constant), CurTermVars)-> %Succeed if constant already exists in CurTermVars
         NCurTermVars=CurTermVars
       ;
         %constant does not exist in CurTermVars, add it
         NCurTermVars=[(Type,Var,Constant)|CurTermVars] % this Var, is a new var
     )
   ; true), % if not a ground constant, succeed and don't test for presence in CurTermVars
  !,
  generalizeArgs(GroundTerms, SigArgs, NCurTermVars, GenArgs, NewTermVars).
generalizeArgs([ComplexTerm|GroundTerms], [ComplexTermSignature|SigArgs], CurTermVars, [GenArg|GenArgs], NewTermVars):-
  %We have a complex term
  ComplexTerm=..[TermName|ComplexTermGroundArgs],
  ComplexTermSignature=..[TermName|ComplexTermSignatureArgs],
  generalizeArgs(ComplexTermGroundArgs, ComplexTermSignatureArgs, CurTermVars, ComplexTermGenArgs, NCurTermVars),
  GenArg=..[TermName|ComplexTermGenArgs],
  generalizeArgs(GroundTerms, SigArgs, NCurTermVars, GenArgs, NewTermVars).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% generalizeGroundLiteral(+GroundLiteral, +Signature, +CurrentTerm2Variables, -GeneralizedLiteral, -NewTerm2Variables)
%
%  Given:
%    Ground literal: a ground literal e.g. atom(m1, 'o', 4.5).
%    Signature: the signature for the literal, e.g. atom(+molecule, +atomid, +charge)
%    CurrentTerm2Variables: a list with triples (Type, Variable, Constant) mapping each variable (of type Type) to constant Constant
%
%  Returns:
%    GeneralizedLiteral: the most general version of GroundLiteral, matching the constants in ground literal to the ones
%  existing in CurrentTerm2Variables. any new constant is added to CurrentTerm2Variables
%    NewTerm2Variables: Updated CurrentTerm2Variables with new constants appearing in GroundLiteral
%
%  Notes:
%    If Signature is of the form 'not(_)' then the output variables of signature are not grounded and shouldn't
%  be added to NewTerm2Variables (nor bind existing Current2Variables)
%
% E.g.
%
% generalizeGroundLiteral(not a(m1,_5894), not a(+molecule,-int), [(int,X,5)], G, V)
%        G = not a(_A,_B), V = [(molecule,_A,m1),(int,X,5)]
% generalizeGroundLiteral(a(f(m1),5), a(+molecule,-int), [], G, V)
%        G = a(_A,_B), V = [(int,_B,5),(molecule,_A,f(m1))]
% generalizeGroundLiteral(a(f(m1),5), a(f(+molecule),-int), [], G, V)
%        G = a(f(_A),_B), V = [(int,_B,5),(molecule,_A,m1)]
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

generalizeGroundLiteral(GroundLiteral, Signature, CurrentTerm2Variables, GeneralizedLiteral, NewTerm2Variables):-
  GroundLiteral=..[PredName|LiteralArgs],
  Signature=..[PredName|SignatureArgs],
  generalizeArgs(LiteralArgs, SignatureArgs, CurrentTerm2Variables, GeneralizedArgs, NewTerm2Variables),
  GeneralizedLiteral=..[PredName|GeneralizedArgs].

%generalizeAux(+GroundBody, +CurrentTermsVariables, +Signatures, -GeneralizedBody)
%
% CurrentTermsVariables is a list of the form Type(Var,Const)

generalizeAux([], _, [], []).
generalizeAux([GroundBody|GroundBodys], CurTermsVars, [Signature|Signatures], [BodyLiteral|BodyLiterals]):-
  generalizeGroundLiteral(GroundBody, Signature, CurTermsVars, BodyLiteral, NCurTermsVars),
  generalizeAux(GroundBodys, NCurTermsVars, Signatures, BodyLiterals).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% generalize(+GroundHypothesis, +Signatures, -Hypothesis)
%
% Given:
%   Ground hypothesis: a ground hypothesis (i.e. with no variables)
%   Signatures: a list of signatures of each literal in the ground hypothesis
%
% Returns:
%   Hypothesis: the most general hypothesis that can be constructed (in clause form)
%
%
% E.g. generalize([good(m1),atom(m1,c),atom(m1,o),not a(m1,_5894)],
%                 [good(+molecule),atom(+molecule,-atom),atom(+molecule,-atom),not a(+molecule,-int)],
%                 H)
%      H=good(_A):-atom(_A,_B),atom(_A,_C),not a(_A,_D) 
%
%
% An interesting question: When we have 2 variables of the same type in Hypothesis, by construction
% we known their ground contents are always different. Should we express this in our hypothesis by
% adding a few extra literals like A\=B (if A and B are of the same type) ?
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

generalize([GroundHead|GroundBody], [HeadSignature|BodySignatures], [HeadLiteral|BodyLiterals]):-
  generalizeGroundLiteral(GroundHead, HeadSignature, [], HeadLiteral, Term2Variables),
  generalizeAux(GroundBody, Term2Variables, BodySignatures, BodyLiterals).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% getPredCallArgs(+PredCall, +Signature, +Indexs, -Values)
%
%    This is an auxiliary predicate. It returns in Values the values of the
%  arguments of PredCall (a predicate with signature Signature), at indexs Indexs)
%    PredCall has signature Signature
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

getPredCallArgs(PredCall, Signature, ConstIndexs, ConstValues):-
  buildPredCall(Signature, IOCVars, _Types, PredCall),
  elemsAtPos(IOCVars, ConstIndexs, ConstValues).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% prove(+Example, -Proof, -GroundHypothesis)
%
% Given:
%   Example: a ground fact to be proved
% 
% Returns:
%
%   Proof: a proof of this ground fact obtained by unfolding the Top theory
%   GroundHypothesis: a list of ground literals that are the facts from the background knowledge
%                     that were used in the proof of the given example
%
% e.g. prove(good(m1), P, GH)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

prove(Example, [modeh(Signature,ConstValues)|Proof], GroundHypothesis):-
  theory(1, call_modeh(Signature, Example), ClauseBody),
  signatureConstIndexs(Signature, ConstIndexs),
  getPredCallArgs(Example, Signature, ConstIndexs, ConstValues),
  proveAux(ClauseBody, [], RProof, (1,[Example]), (_HypLen, RevGroundHypothesis)),
  reverse(RProof, Proof),
  reverse(RevGroundHypothesis, GroundHypothesis).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% proveAux(+ListOfClauses, +CurrentProof, -FinalProof, +(HypothesisLength,CurrentGroundHypothesis), -(FinalHypLength,FinalGroundHypothesis))
%
% Given:
%   ListOfClauses: list of clauses to prove
%   CurrentProof
% 
% Returns:
%   FinalProof: the final proof for the list of clauses reversed
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

proveAux([], P, P, GH, GH):-!.
proveAux([call_randomMember(Elem, List)|Clauses], Proof, FinalProof, CurGroundHyp, FinalGroundHyp):-
  !,
  randomMember(Elem, Pos, List),
  proveAux(Clauses, [randomMemberPos(Pos)|Proof], FinalProof, CurGroundHyp, FinalGroundHyp).
proveAux([call_modeb(Recall, Signature, PredCall)|Clauses], Proof, FinalProof, CurGroundHyp, FinalGroundHyp):-
  !,
  signatureConstIndexs(Signature, ConstIndexs),
  getPredCallArgs(PredCall, Signature, ConstIndexs, ConstValues),  % bind the ConstValues, even when PredCall is not instantiated
  solve(Recall, PredCall, CurGroundHyp, NCurGroundHyp),
  proveAux(Clauses, [modeb(Signature,ConstValues)|Proof], FinalProof, NCurGroundHyp, FinalGroundHyp).
proveAux([Clause|Clauses], Proof, FinalProof, (CurHypLen,CurGroundHyp), FinalGroundHyp):-
  (addLiteral2Body(Clause)-> % if clause is responsible for adding a literal to the body of the hypothesis
     setting(clause_length, MaxClauseLength), % check if we are not already on the limit lengtht
     CurHypLen<MaxClauseLength,
     findall(theory(Nk, ClauseK, ClauseBodyK), top_theory:theory(Nk, ClauseK, ClauseBodyK), ClauseBodies),
     randomMember(theory(N, Clause, ClauseBody), ClauseBodies) % and we pick one of its possible definitions (i.e. literals to add) randomly
   ;
     theory(N, Clause, ClauseBody)
  ),
  (split(ClauseBody, !, ClauseBodyBeforeCut, ClauseBodyAfterCut)->
    %ClauseBody has cut, emulate its interpretation
      proveAux(ClauseBodyBeforeCut, Proof, TProof, (CurHypLen,CurGroundHyp), N0CurGroundHyp),!,
      proveAux(ClauseBodyAfterCut, TProof, Proof1, N0CurGroundHyp, NCurGroundHyp)
    ;
      proveAux(ClauseBody, Proof, Proof1, (CurHypLen,CurGroundHyp), NCurGroundHyp) % no cut, do it all together
  ),
  proveAux(Clauses, Proof1, FinalProof, NCurGroundHyp, FinalGroundHyp).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% solve(+Recall, +Predicate, +(CurHypLen,CurrentGroundHypothesis), -(FinalHypLen,FinalGroundHypothesis))
%
%  Returns a random solution (i.e. ground instantiation of its variables) for Predicate from
%  its first Recall solutions, ensuring that the instantiation found doesn't already exist in the body
%  of the current hypothesis
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

solve(_Recall, Predicate, CurGroundHyp, FinalGroundHyp):-
  cache(Predicate, Solutions),!, %see if instantiations for Predicate are available in the cache
  %format("Solutions for ~w found in cache: ~w~N", [Predicate, Solutions]),
  pickRandomSolution(Predicate, Solutions, CurGroundHyp, FinalGroundHyp).
solve(Recall, Predicate, CurGroundHyp, FinalGroundHyp):-
  % Predicate calls not cached yet, add them to cache first
  uniqueInterpretations(Recall, Predicate, SolsForPredCall),
  %format("Adding pair: ~w,~w to cache~N", [Predicate, SolsForPredCall]),
  asserta(cache(Predicate, SolsForPredCall)),
  pickRandomSolution(Predicate, SolsForPredCall, CurGroundHyp, FinalGroundHyp).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% pickRandomSolution(+Predicate, +Solutions, +(CurHypLen,CurrentGroundHypothesis), -(FinalHypLen,FinalGroundHypothesis))
%
% Given:
%   Predicate: a predicate to be called (with variables to ground)
%   Solutions: a list of possible groundings for this predicate
%   CurrentGroundHypothesis: the current hypothesis so far
% Returns:
%   In the FinalGroundHypothesis, an head is created and is a randomly choosed grouding for Predicate
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

pickRandomSolution(Predicate, Solutions, (CurHypLen, CurGroundHyp), (NHypLen,[Predicate|CurGroundHyp])):-
  randomMember(Predicate, Solutions), % pick a Predicate at random from the several solutions
  \+member(Predicate, CurGroundHyp),  % ensure it does not appear yet in the current hypothesis
  NHypLen is CurHypLen+1.             % increase the size of the current hypothesis
