/**
 * EasierSBS project - Java file
 * Copyright (C) 2011 EBM WebSourcing - Petals Link
 * 
 * EasierSBS is free project: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * EasierSBS is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public 
 * License along with this program.
 * If not, see <http://www.gnu.org/licenses/lgpl-3.0.txt>.	
 * 
 */ 
package com.petalslink.easiersbs.matching.service.matcher.similarity;

import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.logging.Logger;

import org.apache.commons.lang.StringUtils;

import com.petalslink.easiersbs.matching.service.api.matcher.similarity.SimilarityMeasure;

/**
 * @author Nicolas Boissel-Dallier - Petals Link
 */
public abstract class AbstractSimilarityMeasureImpl implements SimilarityMeasure {

	protected static Logger logger = Logger.getLogger(AbstractSimilarityMeasureImpl.class.getName());
	
	private int levenshteinLimit = 0;

	protected Map<String, Integer> countWordFrequency(List<String> words){
		Map<String, Integer> res = new HashMap<String, Integer>();
		for(String word : words){
			if(res.containsKey(word)){
				res.put(word, res.get(word).intValue() + 1);
			} else {
				// Levenshtein distance recognition
				boolean found = false;
				if(levenshteinLimit > 0 && word.length() > 3 && word.length() < 12){
					for(String key : res.keySet()){
						if(Math.abs(key.length() - word.length()) <= levenshteinLimit
							&& StringUtils.getLevenshteinDistance(key, word) <= levenshteinLimit){
							res.put(key, res.get(key).intValue() + 1);
							found = true;
						}
					}
				}
				if(!found){
					res.put(word, 1);
				}
			}
		}
		return res;
	}
	
	protected DoubleVector getProperVectors(Map<String, Integer> wordFrequencies1, Map<String, Integer> wordFrequencies2){
		Set<String> words = new HashSet<String>();
		Map<String, String> closeWords = null;
		for(String w1 : wordFrequencies1.keySet()){
			words.add(w1);
		}
		if(levenshteinLimit > 0){
			closeWords = getCloseWords(wordFrequencies1.keySet(), wordFrequencies2.keySet());
			for(String w2 : wordFrequencies2.keySet()){
				if(!closeWords.containsKey(w2)){
					words.add(w2);
				}
			}
		} else {
			for(String w2 : wordFrequencies2.keySet()){
				words.add(w2);
			}
		}
		
		int[] vector1 = new int[words.size()];
		int[] vector2 = new int[words.size()];
		
		int i = 0;
		for(String word : words){
			if(wordFrequencies1.containsKey(word)){
				vector1[i] = wordFrequencies1.get(word).intValue();
			} else {
				vector1[i] = 0;
			}
			if(levenshteinLimit > 0 && closeWords.containsValue(word)){
				for(Entry<String, String> entry : closeWords.entrySet()){
					if(entry.getValue().equals(word)){
						vector2[i] += wordFrequencies2.get(entry.getKey()).intValue();
					}
				}
			} else if(wordFrequencies2.containsKey(word)){
				vector2[i] = wordFrequencies2.get(word).intValue();
			} else {
				vector2[i] = 0;
			}
			i++;
		}
		
		return new DoubleVector(vector1, vector2);
	}
	
	
	/**
	 * Check correspondences between words from two sets
	 * 
	 * @param Words from the first set
	 * @param Words from the second set
	 * @return Map<words2, eqwords1> containing all correspondences between sets
	 */
	private Map<String, String> getCloseWords(Set<String> words1, Set<String> words2){
		Map<String, String> closeWords = new HashMap<String, String>();
		for(String w1 : words1){
			for(String w2 : words2){
				if(w1.equals(w2)){
					closeWords.put(w2, w1);
				} else if(w1.length() < 12 && w2.length() < 12
					&& Math.abs(w1.length() - w2.length()) <= levenshteinLimit){
					if(StringUtils.getLevenshteinDistance(w1, w2) <= levenshteinLimit){
						closeWords.put(w2, w1);
						break;
					}
				}
			}
		}
		return closeWords;
	}

	/**
	 * @return the levenshteinLimit
	 */
	public int getLevenshteinLimit() {
		return levenshteinLimit;
	}

	/**
	 * Set the limit for Levenshtein distance. This value must be inferior to 3.
	 * 
	 * @param levenshteinLimit the levenshteinLimit to set
	 */
	public void setLevenshteinLimit(int levenshteinLimit) {
		if(levenshteinLimit>2){
			this.levenshteinLimit = 2;
		} else {
			this.levenshteinLimit = levenshteinLimit;
		}
	}
	
}
