/**
 * EasierSBS project - Java file
 * Copyright (C) 2011 EBM WebSourcing - Petals Link
 * 
 * EasierSBS is free project: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * EasierSBS is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public 
 * License along with this program.
 * If not, see <http://www.gnu.org/licenses/lgpl-3.0.txt>.	
 * 
 */ 
package com.petalslink.easiersbs.matching.service.util;

import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.petalslink.easiersbs.matching.service.api.profile.infered.InferedElement;
import com.petalslink.easiersbs.matching.service.api.profile.infered.InferedSemanticConcept;
import com.petalslink.easiersbs.matching.service.api.profile.infered.RatedURI;
import com.petalslink.easiersbs.matching.service.profile.infered.RatedURIImpl;
import com.petalslink.easiersbs.registry.service.api.model.SemanticElement;
import com.petalslink.easiersbs.registry.service.api.model.SemanticPart;

/**
 * @author Nicolas Boissel-Dallier - Petals Link
 */
public class SimilarityUtil {
	
	public static List<String> extractElementRelatedWords(Set<? extends SemanticPart> parts){
		List<String> res = new ArrayList<String>();
		
		for(SemanticPart part : parts){
			if(part.getName() != null){
				res.addAll(splitInWords(part.getName()));
			}
			if(! part.getSemanticConcepts().isEmpty()){
				res.addAll(extractUriRelatedWords(part.getSemanticConcepts()));
			}
		}
		
		return res;
	}
	
	public static List<String> extractUriRelatedWords(Set<URI> uris){
		List<String> res = new ArrayList<String>();
		
		for(URI uri : uris){
			String frag = uri.getFragment();
			if (frag != null){
				res.addAll(splitInWords(frag));
			}
		}
		return res;
	}
	
	public static List<String> splitInWords(String txt){
		List<String> res = new ArrayList<String>();
		// TODO improve regex to extract acronyms
		for(String s : txt.split("(?=[A-Z][a-z]+)|[-_0-9\\s]")){
			if(!s.isEmpty()){
				res.add(s.toLowerCase());
			}
		}
		return res;
	}
	
	
	/**
	 * Measure the semantic similarity between a set of semantic infered concepts
	 * and a set of target concepts
	 * 
	 * @param inferedConcepts
	 * @param targetConcepts
	 * @return double Average measure of semantic similarity
	 */
	public static double measureSemanticSimilarity(Set<InferedSemanticConcept> inferedConcepts, 
			Set<URI> targetConcepts){
		
		double res = 0.0;
		if(! inferedConcepts.isEmpty() && ! targetConcepts.isEmpty()) {
			double sum = 0.0;
			
			// We search for each concept from the search profile the best rate 
			// of service concept similarities
			for(InferedSemanticConcept inferedConcept : inferedConcepts){
				Set<RatedURI> ratedConcepts = inferedConcept.getRatedSemanticConcepts();
				double conceptRate = 0.0;
				
				for(URI targetConceptUri : targetConcepts){
					RatedURI serviceConcept = new RatedURIImpl(targetConceptUri);
					if(ratedConcepts.contains(serviceConcept)){
						Iterator<RatedURI> it = ratedConcepts.iterator();
						while(it.hasNext()){
							RatedURI uri = it.next();
							if(uri.equals(serviceConcept)){
								conceptRate = Math.max(conceptRate, uri.getRate());
								break;
							}
						}
					}
				}
				sum += conceptRate;
			}
			res = sum / inferedConcepts.size();
		}
		return res;
	}

	
	/**
	 * Measure semantic similarity between a set of infered elements and a set of target elements
	 * according to properties
	 * 
	 * @param infElements
	 * @param servElements
	 * @return
	 */
	public static double measureElementSimilarity(Set<InferedElement> infElements, Set<SemanticElement> targetElements,
													double subsumeMark, double pluginMark){
		
		if(! infElements.isEmpty() && ! targetElements.isEmpty()){
			
			boolean unusedServElement = false;
			
			// Creation of coverage map
			Map<InferedElement, Double> coverageMap = new HashMap<InferedElement, Double>();
			for(InferedElement element : infElements){
				coverageMap.put(element, 0.0);
			}
			
			for(SemanticElement targetElement : targetElements){
				double maxRate = 0.0;
				
				for(InferedElement infElement : infElements){
					
					double rate = measureSemanticSimilarity(
										infElement.getInferedSemanticConcepts(), 
										targetElement.getSemanticConcepts());
					
					if(rate != 0){
						coverageMap.put(infElement, Math.max(coverageMap.get(infElement), rate));
						maxRate = Math.max(rate, maxRate);
					}
					
				}
				
				if(maxRate == 0.0){
					unusedServElement = true;
				}
			}
			
			// Treatment (measure according to previous ranks)
			// Rate = DegreeOfMatch * Avg of best concepts' rank
			double avg = VectorUtil.average(coverageMap.values().toArray(new Double[coverageMap.size()]));
			
			// Fail
			if(avg == 0.0){ 
				return 0.0;
				
			// Subsume
			} else if(unusedServElement){ 
				return (avg * subsumeMark); 
				
			// Plugin
			} else if(coverageMap.containsValue(0.0)){ 
				return (avg * pluginMark);
				
			// Exact
			} else { 
				return avg;
			}
			
		} else {
			return 0.0;
		}
		
	}
}
