/**
 * EasierSBS project - Java file
 * Copyright (C) 2011 EBM WebSourcing - Petals Link
 * 
 * EasierSBS is free project: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * EasierSBS is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public 
 * License along with this program.
 * If not, see <http://www.gnu.org/licenses/lgpl-3.0.txt>.	
 * 
 */ 
package com.petalslink.easiersbs.matching.service.matcher.similarity;

import java.util.List;
import java.util.Map;

import com.petalslink.easiersbs.matching.service.util.VectorUtil;

/**
 * Cosine Similarity Measure = (A.B) / (|A|.|B|)
 * 
 * @author Nicolas Boissel-Dallier - Petals Link
 */
public class CosineSimilarityImpl extends AbstractSimilarityMeasureImpl {

	public CosineSimilarityImpl(){
		
	}
	
	public CosineSimilarityImpl(int levenshteinLimit){
		super.setLevenshteinLimit(levenshteinLimit);
	}
	
	public double measureSimilarity(List<String> words1, List<String> words2) {
		if(words1.isEmpty() || words2.isEmpty()){
			return 0;
		}
		
		Map<String, Integer> wordFrequencies1 = countWordFrequency(words1);
		Map<String, Integer> wordFrequencies2 = countWordFrequency(words2);
		
		DoubleVector vectors = getProperVectors(wordFrequencies1, wordFrequencies2);
		
		// Divisor can't be equal to 0
		double res = VectorUtil.dotProduct(vectors.getVector1(), vectors.getVector2()) 
						/ (VectorUtil.magnitude(vectors.getVector1()) * VectorUtil.magnitude(vectors.getVector2()));
		
		logger.finest("Cosine similarity fully measured: " + res);
		return res;
	}

}
