/**
 * EasierSBS project - Java file
 * Copyright (C) 2011 EBM WebSourcing - Petals Link
 * 
 * EasierSBS is free project: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * EasierSBS is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public 
 * License along with this program.
 * If not, see <http://www.gnu.org/licenses/lgpl-3.0.txt>.	
 * 
 */ 
package com.petalslink.easiersbs.matching.service.matcher.similarity;

import java.util.List;
import java.util.Map;

/**
 * Dice Coefficient Similarity Measure: (2*Common Terms) / (Number of terms in words1 + Number of terms in words2)
 * 
 * @author Nicolas Boissel-Dallier - Petals Link
 */
public class DiceSimilarityImpl extends AbstractSimilarityMeasureImpl{

	public DiceSimilarityImpl(){
		
	}
	
	public DiceSimilarityImpl(int levenshteinLimit){
		super.setLevenshteinLimit(levenshteinLimit);
	}


	public double measureSimilarity(List<String> words1, List<String> words2) {
		if(words1.isEmpty() || words2.isEmpty()){
			return 0;
		}
		
		Map<String, Integer> wordFrequencies1 = countWordFrequency(words1);
		Map<String, Integer> wordFrequencies2 = countWordFrequency(words2);
		
		DoubleVector vectors = getProperVectors(wordFrequencies1, wordFrequencies2);
		
		int commons = 0;
		// We count again terms in w1 et w2 in case of levenshtein changes
		int nbW1 = 0;
		int nbW2 = 0;
		for(int i = 0 ; i < vectors.length() ; i++){
			if(vectors.getVector1()[i] != 0 && vectors.getVector2()[i] != 0){
				commons++;
				nbW1++;
				nbW2++;
			} else if (vectors.getVector1()[i] != 0){
				nbW1++;
			} else {
				nbW2++;
			}
		}
		
		double res = (2.0 * commons) / (nbW1 + nbW2);
		logger.finest("Dice's Coefficient similarity fully measured: " + res);
		return res;
	}
	
}
