% the descriptionLength and claLength is different - -coupled with the score_GE_Mutant_time...


% Main difference from flux record, is that the flux is passed top-down: (1)expandNode
% this is to match the flux record in coverageSorted_fluxRecord
% this is based on 'hypothesisSelection_noEIs_combineCheck.pl', but the candidateSelection is different, where grouping based on EI coverage is done first. 

/*
The scoreing is -(coverage-descriptionLength) -- so the smaller the better a score. This leads to: when same score, teh samller the descriptionLength, the better is a candidate;
Need to change the combine_and_competing, not to start the last one, since the better one is not at the top of list. 
*/




% apart from checking combinable when combining two candidates; you also need to lose the constraint that not always the best one.

% call hypothesisSelection(startNode,FinalHypothesis)
% output is a candidate hypothesis which include the current node. This hypothesis is accompanied by its properties: DL-EITIs-H. H = sets of ClaID with their coverage. 
% why EITIs, rather than EIs -- that will make the scoring even easier? -- But when you calculate the intersection with current node, EITI will make it easy. -- that is why EIs is not included, because it will be changed after computing the intersection.

hypothesisSelection(CurrentNode-CurrendNodeEITIs,BestCandidate):-
write('------------------------Another round: CurrentNode is '),portray_clause(CurrentNode-CurrendNodeEITIs),nl,
	expandNode(CurrentNode-CurrendNodeEITIs,ChildNodes),
	extractEndNode(ChildNodes,EndNodeEITIs0,NonEndChildNodes),sort(EndNodeEITIs0,EndNodeEITIs), 
	write('expanded results'),write({NonEndChildNodes,endNode-EndNodeEITIs}),nl,
	maplist(hypothesisSelection,NonEndChildNodes,ReturnedCandidates), % if NonEndChildNodes=[], then--???
	(EndNodeEITIs0==[]->
		candidateSelection(ReturnedCandidates,CurrentNode-CurrendNodeEITIs,BestCandidate);
		candidateSelection([0-EndNodeEITIs-[(endNode-EndNodeEITIs)-EndNodeEITIs]|ReturnedCandidates],CurrentNode-CurrendNodeEITIs,BestCandidate)),
	nl.


expandNode(CurrentNode-CurrendNodeEITIs,ChildNodes):-
	findall(Child-NewFlux,(connect(CurrentNode,Child,Flux0),ord_intersection(Flux0,CurrendNodeEITIs,NewFlux),NewFlux\==[]),ChildNodes).

extractEndNode([],[],[]).
extractEndNode([ChildNode|ChildNodes],EndNodeCoverge,NonEndNode):-
	extractEndNode(ChildNodes,PreEndNodeCoverge,PreNonEndNode),
	(ChildNode=endNode-EI-TI-EITIs ->
		EndNodeCoverge=[EI-TI|PreEndNodeCoverge], NonEndNode=PreNonEndNode;	
		EndNodeCoverge=PreEndNodeCoverge, NonEndNode=[ChildNode|PreNonEndNode]
	).
	

% The returned candidate include the current node, while the scoring doesn't: when combining, don't want to include the current node repeatedly.
% score, then sort, then combine -- start from the highest score
% group is done before scoring? there is scoring and sorting within each group 
% then another scoring and sorting for those chosen from the group.

t(X).

candidateSelection(ReturnedCandidates,CurrentNode-CurrentCoverage,DescriptionLength-EITIs-[CurrentNode-EITIs|NewCandidate]):-
	maplist(scoreCandidateBranches,ReturnedCandidates,ScoredCandidates0),
	%ScoredCandidates0=ScoredCandidates, %
	selectlist(nonEmptyCandidate,ScoredCandidates0,ScoredCandidates),	% filter out those empty EI list 

	sort(ScoredCandidates,SortedScoredCandidates), write('*** Candidates are '),nl,print_list(SortedScoredCandidates),nl,

	group_basedOn_EI(SortedScoredCandidates,GroupedCandidates),write('*** Candidates are group as here'),nl,print_list(GroupedCandidates),nl,
	%SortedScoredCandidates=[LowestScore-BestDL-BestEIs-BestEITIs-BestCandidate|RestScoredCandidates],
	%group_basedOn_EI(RestScoredCandidates,[BestEIs-[BestDL-BestEITIs-BestCandidate]],GroupedScoredCandidates),

	combine_and_competing(GroupedCandidates,[],NewCandidate),extractEITI(NewCandidate,EITIs),descriptionLength(NewCandidate,DescriptionLength0),
	write('***Combine Results are: '),write(DescriptionLength0-EITIs-NewCandidate),nl,t(1),
	exExtraction(EITIs,ClaEICoverage), 
	claLength(CurrentNode,ClaEICoverage,CurrentNodeDescriptionLength),
	DescriptionLength is DescriptionLength0+CurrentNodeDescriptionLength.
	

nonNegativeScoreBranch(Score-DL-EIs-EITIs-CandidateBranch):- Score>=0.
nonEmptyCombinedCandidate(DL-EIRemovedNum-CombinedCandidate):- CombinedCandidate\== [].
nonEmptyCandidate(Score-DL-EIs-EITIs-CandidateBranch):-EIs\==[].



scoreCandidateBranches(CandidateDescriptionLength-EITIs-Candidate,Score-CandidateDescriptionLength-EIs-EITIs-Candidate):-	
	exExtraction(EITIs,EIs), 	
	length(EIs,EINum),
	Score is CandidateDescriptionLength-10*EINum. % now the smaller the score, the better it is. %10*NewEINum-CandidateDescriptionLength.



% Input is [1-1,1-2,2-1,2-2,2-3], then the output is [1,2] 
exExtraction([],[]).
exExtraction([EI-TI|Record],EIs):- 
	exExtraction(Record,PreEIs),
	(PreEIs=[EI|Rest] ->
		EIs=PreEIs;
		EIs=[EI|PreEIs]
	).

extractEITI(Candidate,EITIs):-maplist(getEITIs,Candidate,EITIs0),ord_union(EITIs0,EITIs).

getEITIs(ClaID-EITIs,EITIs).

exExtractionFromCandidate(Candidate,EIs):-
	 extractEITI(Candidate,EITIs),
	 exExtraction(EITIs,EIs).



% Before scoring, it is CandidateDescriptionLength-EITIs-Candidate; 
% After scoring, it is Score-DL-EITIs-Candidate
/*
can the score also give EI, so that later, 
In terms of complexity, it will better to group first, then sorting, since, the O(Nlog(N)) is smaller. 
Since you'll need to compare their maximum later, so perhaps it is easier to 
*/

% Input is a list of Score-CandidateDescriptionLength-NewEIs-NewEITIs-EITIupdated_Candidate; Output is a list of SameEIs-[Candidate1,Candidate2]

group_basedOn_EI([],[]).
group_basedOn_EI([Score-DL-EIs-EITIs-Candidate|Rest],GroupResult):-
	group_basedOn_EI(Rest,PreGroupResult),
	(append(Pre,[EIs-EIsCandidates|Post],PreGroupResult)->
		 append(Pre,[EIs-[DL-EITIs-Candidate|EIsCandidates]|Post],GroupResult);
		 GroupResult=[EIs-[DL-EITIs-Candidate]|PreGroupResult]
	).


% initiate the sofar with 
% query with combine_and_competing(GroupedCandidates,0-[]-[],DL-EITIs-Candidate)
combine_and_competing([],Candidate,Candidate).
combine_and_competing([EIs-EIsCandidates|GroupedCandidates],CandidateSofar,Candidate):-
	exExtractionFromCandidate(CandidateSofar,EIsSofar),identifyComputingPart(EIs,EIsSofar,CompetingEIs),
	member(DL_Candidate-EITIsCandidate-EIsCandidate,EIsCandidates),
	(CompetingEIs=[] ->
		combineTwoCandidate(EIsCandidate,CandidateSofar,CandidateUpdated);
		powersets(CompetingEIs,Possibles),
		maplist(getCompensatedPair(CompetingEIs),Possibles,CompensatedPairs),
		maplist(combineTwoCompetingCandidates({EIsCandidate,CandidateSofar}),CompensatedPairs,ScoredPossibleCombinations0),
		selectlist(nonEmptyCombinedCandidate,ScoredPossibleCombinations0,ScoredPossibleCombinations),
		sort(ScoredPossibleCombinations,SortedScoredPossibleCombinations), write('Here are the possible Combinations'),print_list(SortedScoredPossibleCombinations),nl,nl,
		member(DLUpdated-CandidateUpdated,SortedScoredPossibleCombinations)  % choose the head:the one with lowest DL. 
		%SortedScoredPossibleCombinations=[TopDL-NewEITIs-NewCandidate|Rest],
		%topScoreList(SortedScoredPossibleCombinations,TopDL,TopScoreList),chooseMaximumPreviousSelection(TopScoreList,CandidateUpdated)
	),
	combine_and_competing(GroupedCandidates,CandidateUpdated,Candidate).
  

topScoreList([DL-Candidate|SortedScoredPossibleCombinations],TopDL,[]):- DL\==TopDL,!.
topScoreList([TopDL-Candidate|SortedScoredPossibleCombinations],TopDL,[Candidate|TopScoreList]):-
	    topScoreList(SortedScoredPossibleCombinations,TopDL,TopScoreList).
      

% Different from previous, here the input is EIs, rather than EITIs
identifyComputingPart(EIs,PreEIs,CompetingEIs):-
		ord_intersection(EIs,PreEIs,CompetingEIs).


getCompensatedPair(TotalSet,Set,{Set,CompensatedSet}):-
	ord_subtract(TotalSet,Set,CompensatedSet).

% another function that take in such a pair -- produce a candidate -- re-score(remove-duplicate(be careful -- they may just have different EI_Coverage)->description length -- their EI coverage remains the same -- all is comparisons about description length )	
combineTwoCompetingCandidates({Candidate1,Candidate2},{EIs1,EIs2},NewDL-NumRemovedEIs-CombinedCandidate):-
	isolation(Candidate1,EIs1,RemainEITIs1-RemainCandidate1), %OverlapEITIs-CandidateOverlapPart),
	isolation(Candidate2,EIs2,RemainEITIs2-RemainCandidate2),	
	(combineTwoCandidate(RemainCandidate1,RemainCandidate2,CombinedCandidate0)->
		CombinedCandidate=CombinedCandidate0;
		CombinedCandidate0=[]
	),
	descriptionLength(CombinedCandidate,NewDL),
	length(EIs2,NumRemovedEIs).
	


% *** when you remove certain bits, the EITI coverage is changed! -- the EITI is not simply the isolation you have now


combineTwoCandidate([],PreCandidate,PreCandidate).
combineTwoCandidate([ClaID-ClaEITIs|Candidate],PreCandidate,NewCandidateUpdated):-
	combineTwoCandidate(Candidate,PreCandidate,NewCandidate),
	%write('---Checking'),checkCombinable(ClaID,PreCandidate),write('good, pass checking'),nl,
	(append(PreClas,[ClaID-ClaPreEITIs|AfterClas],NewCandidate) ->
		ord_union(ClaEITIs,ClaPreEITIs,ClaNewEITIs),
		append(PreClas,[ClaID-ClaNewEITIs|AfterClas],NewCandidateUpdated);
		NewCandidateUpdated=[ClaID-ClaEITIs|NewCandidate]		
	).

%/* this is for syngenta project
checkCombinable(endNode-EndNodeEITIs,PreCandidate):-!. %endNode is always combinable
checkCombinable(ClaID,PreCandidate):-
	ClaID=[rs-{ReactionID,LimitingType,State,Time}],
	(member([rs-{ReactionID,LimitingType2,State2,Time}]-ClaEITIs,PreCandidate)-> 
		State2==State, LimitingType2==LimitingType;		
		Foo=0
	).
%*/

/* This is for Grammar example
checkCombinable(endNode-EndNodeEITIs,PreCandidate):-!. %endNode is always combinable
checkCombinable([Predicate-Word],PreCandidate):- !,
	(member([PrePredicate-Word]-ClaEITIs,PreCandidate)-> 
		Predicate == PrePredicate;		
		Foo=0
	).
checkCombinable(RestClaTypes,Candidate). % check redundency. no worry, it is taken care when addign. 
*/


	
	
% number of CompetingEIs is to be removed, so the less it is, the more remain.
% remove the ClaID from Candidate, if it contains CompetingEIs alone *** need to update the corresponding RemainEITIs as well!
isolation(Candidate,CompetingEIs,RemainEITIs-RemainCandidate_noRedundant):-
	clasSeparation(Candidate,CompetingEIs,RemainEITIs0-RemainCandidate,OverlapEITIs-CandidateOverlapPart),
	maplist(removeAlones(OverlapEITIs),RemainCandidate,RemainCandidate_noRedundant),
	ord_subtract(RemainEITIs0,OverlapEITIs,RemainEITIs).


% you can make this part more efficient, by doing ord_union at the end, rather than every time do a ord_union
clasSeparation([],CompetingEIs,[]-[],[]-[]).
clasSeparation([ClaID-ClaEITIs|Candidate],CompetingEIs,RemainEITIs-RemainCandidate,OverlapEITIs-CandidateOverlapPart):-
		clasSeparation(Candidate,CompetingEIs,PreRemainEITIs-PreRemainCandidate,PreOverlapEITIs-PreCandidateOverlapPart),
		exExtraction(ClaEITIs,ClaEIs),
		(ord_subset(ClaEIs,CompetingEIs)-> 	% if it has some parts not in CompetingEIs, then it shouldn't be removed
				RemainEITIs-RemainCandidate=PreRemainEITIs-PreRemainCandidate,
				ord_union(ClaEITIs,PreOverlapEITIs,OverlapEITIs),
				CandidateOverlapPart=[ClaID-ClaEITIs|PreCandidateOverlapPart];
				OverlapEITIs-CandidateOverlapPart=PreOverlapEITIs-PreCandidateOverlapPart,
				ord_union(ClaEITIs,PreRemainEITIs,RemainEITIs),
				RemainCandidate=[ClaID-ClaEITIs|PreRemainCandidate]
		).
	
removeAlones(AloneEITIs,ClaID-EITIs,ClaID-EITIsNoAlone):-
	%nl,write({EITIs,AloneEITIs}), write('check whether they are in order'),nl,
	ord_subtract(EITIs,AloneEITIs,EITIsNoAlone). % EITIs doesn't necessarily include everything in the AloneEITIs, but  



% if a subset, then remove that clause -- update the EITI in the rest of the clauses. 
% what if the remaining become uncompressed because -- no worry, it will be removed when new branch come. -- simply check whether the remaining still has coverage -- remove the one with single coverage -- long description: e.g. two computing components -- you can't simply remove that branch, maybe the rest still worth for that -- you are allowing the flexibility to decompose the module. % no worry, you can make the selection at this step more complicated later.


% selectlist(extractCompetingCla(CompetingEIs),) % no, it is better to isolate 
% for each claID, removing those belongs -- for the competing part, it doesn't necessarily always one branch, maybe a combination. 



descriptionLength([],0).
descriptionLength([ClaID-ClaEITIs|Candidate],DescriptionLength):-
	descriptionLength(Candidate,PreDescriptionLength),
	%claInterprete(ClaID,Cla),length(Cla,ClaDescriptionLength), -- don't worry about translate them more than once, since there is no redundant node -- probably there is (due to the DAG)
	
	exExtraction(ClaEITIs,ClaEICoverage), 
	claLength(ClaID,ClaEICoverage,ClaDescriptionLength),
	DescriptionLength is PreDescriptionLength+ClaDescriptionLength.

/*
claLength(endNode-EITIs,0):-!.
claLength(startNode,0):-!.
claLength(ClaID,ClaDescriptionLength):-
	claInterpreter(ClaID,Cla),
	length(Cla,ClaDescriptionLength).
% if you are looking for problem-specific scoring, like the Syngenta data, where GE degree is used to compute the score. 
*/

