/**
 * EasierSBS project - Java file
 * Copyright (C) 2011 EBM WebSourcing - Petals Link
 * 
 * EasierSBS is free project: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * EasierSBS is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public 
 * License along with this program.
 * If not, see <http://www.gnu.org/licenses/lgpl-3.0.txt>.	
 * 
 */ 
package com.petalslink.easiersbs.matching.service.util;

import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.petalslink.easiersbs.matching.service.api.matcher.DegreeOfMatch;
import com.petalslink.easiersbs.matching.service.api.profile.inferred.InferredConcept;
import com.petalslink.easiersbs.matching.service.api.profile.inferred.InferredElement;
import com.petalslink.easiersbs.matching.service.api.profile.inferred.RatedURI;
import com.petalslink.easiersbs.matching.service.api.profile.rated.RatedSemanticElement;
import com.petalslink.easiersbs.matching.service.profile.inferred.RatedURIImpl;
import com.petalslink.easiersbs.matching.service.profile.rated.RatedSemanticElementImpl;
import com.petalslink.easiersbs.registry.service.api.model.SemanticElement;
import com.petalslink.easiersbs.registry.service.api.model.generic.GenericElement;
import com.petalslink.easiersbs.registry.service.api.model.generic.GenericPart;
import com.petalslink.easiersbs.registry.service.impl.util.ServiceUtil;

/**
 * @author Nicolas Boissel-Dallier - Petals Link
 */
public class SimilarityUtil {
	
	public static List<String> extractElementRelatedWords(Set<? extends GenericPart> parts){
		List<String> res = new ArrayList<String>();
		
		for(GenericPart part : parts){
			if(part.getName() != null){
				res.addAll(splitInWords(part.getName()));
			}
			if(! part.getSemanticConcepts().isEmpty()){
				res.addAll(extractUriRelatedWords(part.getSemanticConcepts()));
			}
		}
		
		return res;
	}
	
	public static List<String> extractUriRelatedWords(Set<URI> uris){
		List<String> res = new ArrayList<String>();
		
		for(URI uri : uris){
			String frag = uri.getFragment();
			if (frag != null){
				res.addAll(splitInWords(frag));
			}
		}
		return res;
	}
	
	public static List<String> splitInWords(String txt){
		List<String> res = new ArrayList<String>();
		// TODO improve regex to extract acronyms
		for(String s : txt.split("(?=[A-Z][a-z]+)|[-_0-9\\s]")){
			if(!s.isEmpty()){
				res.add(s.toLowerCase());
			}
		}
		return res;
	}
	
	
	/**
	 * Measure the semantic similarity between a set of semantic inferred concepts
	 * and a set of target concepts
	 * 
	 * @param inferedConcepts
	 * @param targetConcepts
	 * @return double Average measure of semantic similarity
	 */
	public static double measureSemanticSimilarity(Set<InferredConcept> inferedConcepts, 
			Set<URI> targetConcepts){
		
		double res = 0.0;
		if(! inferedConcepts.isEmpty() && ! targetConcepts.isEmpty()) {
			double sum = 0.0;
			
			// We search for each concept from the search profile the best rate 
			// of service concept similarities
			for(InferredConcept inferedConcept : inferedConcepts){
				Set<RatedURI> ratedConcepts = inferedConcept.getRatedSemanticConcepts();
				double conceptRate = 0.0;
				
				for(URI targetConceptUri : targetConcepts){
					RatedURI serviceConcept = new RatedURIImpl(targetConceptUri);
					if(ratedConcepts.contains(serviceConcept)){
						Iterator<RatedURI> it = ratedConcepts.iterator();
						while(it.hasNext()){
							RatedURI uri = it.next();
							if(uri.equals(serviceConcept)){
								conceptRate = Math.max(conceptRate, uri.getRate());
								break;
							}
						}
					}
				}
				sum += conceptRate;
			}
			res = sum / inferedConcepts.size();
		}
		return res;
	}

	
	/**
	 * Measure semantic similarity between a set of inferred elements and a set of target elements
	 * according to properties
	 * 
	 * @param infElements input/output inferred main element
	 * @param servElements list of potential target elements
	 * @param subsumeMark double corresponding to subsume penalty
	 * @param pluginMark double corresponding to plug-in penalty
	 * @return rate for input/output
	 */
	public static RatedSemanticElement measureElementSimilarity(InferredElement infElement, SemanticElement targetElement, 
													double subsumeMark, double pluginMark){
		
		RatedSemanticElement res = new RatedSemanticElementImpl();
		
		if( infElement != null && targetElement != null){
			
			// Creation of coverage map of targetConcepts (Plug-in detection)
			Map<SemanticElement, Double> pluginMap = new HashMap<SemanticElement, Double>();
			for(SemanticElement element : ServiceUtil.getFlattenElements(targetElement)){
				pluginMap.put(element, 0.0);
			}
			
			// Creation of coverage map of inferred concepts (Subsume detection)
			// TODO: try to avoid this map, returning both rate and boolean for current element
			Map<InferredElement, Double> subsumeMap = new HashMap<InferredElement, Double>();
			for(InferredElement element : ServiceUtil.getFlattenElements(infElement)){
				subsumeMap.put(element, 0.0);
			}
			
			// Compute root element rate
			Set<InferredElement> hierarchy = new HashSet<InferredElement>();
			hierarchy.add(infElement);
			double rate = measureElementSimilarity(infElement, targetElement, pluginMap, subsumeMap, hierarchy);

			// Compute penalty (taking worst case, depending of element type (input/output))
			double penalty = 1.0;
			
			// Fail
			if(rate == 0.0){ 
				res.setCoverage(DegreeOfMatch.FAIL);
			} else {
				// Plug-in
				if( ! isCoverageAcceptable(targetElement, pluginMap)){ 
					res.setCoverage(DegreeOfMatch.PLUGIN);
					penalty = pluginMark;
				}
				// Subsume (only root element matter)
				if( ! isCoverageAcceptable(infElement, subsumeMap)){ 
					if(res.getCoverage() != DegreeOfMatch.PLUGIN
							|| pluginMark > subsumeMark){
						res.setCoverage(DegreeOfMatch.SUBSUMES);
					}
					penalty = Math.min(penalty, subsumeMark);
				}
				// Exact
				if(penalty == 1.0) {
					res.setCoverage(DegreeOfMatch.EXACT);
				}
			}
			
			res.setRate(rate * penalty);
		}
		return res;
		
	}
	
	/**
	 * Compute rate for specific inferedElement, taking account to its children
	 * 
	 * @param infElement current searched element
	 * @param targetElements list of potential elements
	 * @param pluginMap coverage map for plug-in detection
	 * @param subsumeMap coverage map for subsume detection
	 * @return element rate according to available target elements
	 */
	private static double measureElementSimilarity(InferredElement infElement, SemanticElement targetElement,
			Map<SemanticElement, Double> pluginMap, Map<InferredElement, Double> subsumeMap, Set<InferredElement> hierarchy) {
		
		// Computation of inferred element semantic rate
		double semanticRate = 0.0;
		SemanticElement bestTarget = null;
		for(SemanticElement target : ServiceUtil.getFlattenElements(targetElement)){
			double targetRate = measureSemanticSimilarity(infElement.getInferedSemanticConcepts(), 
														target.getSemanticConcepts());
			// We keep best rate amongst targets
			if(targetRate > semanticRate){
				bestTarget = target;
				semanticRate = targetRate;
			}
		}
		
		// If a potential target is found, we declare results in coverage maps
		if(bestTarget != null){
			pluginMap.put(bestTarget, semanticRate);
			subsumeMap.put(infElement, semanticRate);
		}
		
		// Computation of child rates (if semantic rate can be improved)
		double childrenRate = 0.0;
		if(infElement.hasChildElement() && semanticRate != 1.0){
			Double[] childRates = new Double[infElement.getChildElements().size()];
			int i = 0;
			for(InferredElement infChild : infElement.getChildElements()){
				if(subsumeMap.get(infChild) != 0.0 || hierarchy.contains(infChild)){
					childRates[i++] = subsumeMap.get(infChild);
				} else {
					hierarchy.add(infChild);
					childRates[i++] = measureElementSimilarity(infChild, targetElement, pluginMap, subsumeMap, hierarchy);
					hierarchy.remove(infChild);
				}
			}
			childrenRate = VectorUtil.average(childRates);
		}
		
		// Final element rate
		return Math.max(semanticRate, childrenRate);
	}
	
	/**
	 * Return true if the element or all its children are covered, false otherwise
	 * 
	 * @param element to study
	 * @param coverageMap 
	 * @return boolean depending of element coverage
	 */
	private static <E extends GenericElement<E>> boolean isCoverageAcceptable(E element, Map<E, Double> coverageMap){
		return isCoverageAcceptable(element, coverageMap, new HashSet<E>());
	}
	
	private static <E extends GenericElement<E>> boolean isCoverageAcceptable(E element, Map<E, Double> coverageMap, Set<E> elementList){
		if(coverageMap.get(element) != 0.0){
			return true;
		} else if(element.hasChildElement() 
				&& ! elementList.contains(element)) {
			elementList.add(element);
			for(E child : element.getChildElements()){
				if( ! isCoverageAcceptable(child, coverageMap, elementList)){
					return false;
				}
			}
			return true;
		}
		return false;
	}
	

	
}
