This is an automated email from the git hooks/post-receive script. New commit to branch feature/comparatif-lunce-pg in repository coselmar. See https://gitlab.nuiton.org/codelutin/coselmar.git commit 92ac2f3d0fe116b3f055c2434f16fac80186f817 Author: Yannick Martel <martel@©odelutin.com> Date: Wed Jun 7 17:02:07 2017 +0200 Revert the disparition of ExperimentationService with rebase :o --- .gitignore | 3 - .../services/v1/ExperimentationService.java | 126 +++++++++++++++++++++ coselmar-rest/src/main/resources/mapping | 3 + 3 files changed, 129 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index fc6f9da..4ac8fb8 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,3 @@ /*/.settings /*/*.zargo~ coselmar-ui/src/main/webapp/version.txt - - -coselmar-ui/src/main/webapp/version.txt diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java new file mode 100644 index 0000000..76b3101 --- /dev/null +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java @@ -0,0 +1,126 @@ +package fr.ifremer.coselmar.services.v1; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Ordering; +import fr.ifremer.coselmar.beans.CloudWord; +import fr.ifremer.coselmar.persistence.entity.Question; +import fr.ifremer.coselmar.persistence.entity.Status; +import fr.ifremer.coselmar.services.CoselmarWebServiceSupport; +import fr.ifremer.coselmar.services.indexation.DocumentsIndexationService; +import fr.ifremer.coselmar.services.indexation.QuestionsIndexationService; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.nuiton.topia.persistence.TopiaNoResultException; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * @author ymartel (martel@codelutin.com) + */ +public class ExperimentationService extends CoselmarWebServiceSupport { + + private static final Log log = LogFactory.getLog(ExperimentationService.class); + + public List<String> evaluateTopWordsGeneration() { + Question question = getQuestionDao().forStatusEquals(Status.IN_PROGRESS).findAny(); + long start = System.currentTimeMillis(); + String questionId = getShortIdFromFull(question.getTopiaId()); + System.out.println("Question : " + questionId); + List<CloudWord> luceneTopWords = getLuceneTopWords(questionId); + long stop = System.currentTimeMillis(); + String luceneTiming = String.format("Recherche par Lucene : %d termes en %d ms", luceneTopWords.size(), stop - start); + start = System.currentTimeMillis(); + List<CloudWord> postgresTopWords = getPostgresTopWords(questionId); + stop = System.currentTimeMillis(); + String pgTiming = String.format("Recherche par Postgresql : %d termes en %d ms", postgresTopWords.size(), stop - start); + + return Lists.newArrayList(luceneTiming, pgTiming); + } + + public List<CloudWord> getLuceneTopWords(String questionId) { + + // Retrieve Question + String fullQuestionId = getFullIdFromShort(Question.class, questionId); + Question question = getQuestionDao().forTopiaIdEquals(fullQuestionId).findUnique(); + + List<CloudWord> topWords = new ArrayList<>(); + + QuestionsIndexationService questionsIndexationService = getServicesContext().newService(QuestionsIndexationService.class); + DocumentsIndexationService documentsIndexationService = getServicesContext().newService(DocumentsIndexationService.class); + try { + Map<String, Long> topQuestionsTerms = questionsIndexationService.getTopQuestionsTerms(Lists.newArrayList(questionId)); + List<String> shortDocumentIds = getShortDocumentIds(question); + Map<String, Long> topDocumentsTerms = documentsIndexationService.getTopDocumentsTerms(shortDocumentIds); + for (Map.Entry<String, Long> documentTermFreq : topDocumentsTerms.entrySet()) { + String term = documentTermFreq.getKey(); + Long frequence = documentTermFreq.getValue(); + if (topQuestionsTerms.containsKey(term)) { + topQuestionsTerms.put(term, topQuestionsTerms.get(term) + frequence); + } else { + topQuestionsTerms.put(term, frequence); + } + } + + for (Map.Entry<String, Long> termFreq : topQuestionsTerms.entrySet()) { + String term = termFreq.getKey(); + CloudWord cloudWord = new CloudWord(term, termFreq.getValue()); + topWords.add(cloudWord); + } + + } catch (IOException e) { + if (log.isErrorEnabled()) { + log.error("Unable to index new question", e); + } + } + + ImmutableList<CloudWord> cloudWords = ImmutableList.copyOf(Ordering.natural().onResultOf(new Function<CloudWord, Long>() { + public Long apply(CloudWord input) { + return input.getWeight(); + } + }).reverse().sortedCopy(topWords)); + + return cloudWords; + } + + public List<CloudWord> getPostgresTopWords(String questionId) { + + List<CloudWord> topWords; + if (getCoselmarServicesConfig().isPostgresqlDatabase()) { + try { + topWords = getQuestionDao().findTopWords(getFullIdFromShort(Question.class, questionId)); + } catch (TopiaNoResultException e) { + if (log.isErrorEnabled()) { + log.error("Try to find top words for non existing questionId" + questionId, e); + } + topWords = Collections.EMPTY_LIST; + } + } else { + topWords = Collections.EMPTY_LIST; + } + + return topWords; + } + + //////////////////////////////////////////////////////////////////////////// + /////////////////////// Internal Parts ///////////////////////////// + //////////////////////////////////////////////////////////////////////////// + + protected List<String> getShortDocumentIds(Question question) { + List<String> shortDocumentIds = new ArrayList<>(); + for (String relatedDocumentId : question.getRelatedDocumentsTopiaIds()) { + String shortIdFromFull = getShortIdFromFull(relatedDocumentId); + shortDocumentIds.add(shortIdFromFull); + } + for (String closingDocumentId : question.getClosingDocumentsTopiaIds()) { + String shortIdFromFull = getShortIdFromFull(closingDocumentId); + shortDocumentIds.add(shortIdFromFull); + } + return shortDocumentIds; + } +} diff --git a/coselmar-rest/src/main/resources/mapping b/coselmar-rest/src/main/resources/mapping index b498a97..7f2f678 100644 --- a/coselmar-rest/src/main/resources/mapping +++ b/coselmar-rest/src/main/resources/mapping @@ -78,6 +78,9 @@ GET /v1/general/topwords GeneralWebService.getTopWords # Admin API POST /v1/admin/lucene/index AdminWebService.refreshLuceneIndex +GET /v1/experimentation/topwords ExperimentationService.evaluateTopWordsGeneration +GET /v1/experimentation/lucenetopwords/{questionId} ExperimentationService.getLuceneTopWords +GET /v1/experimentation/pgtopwords/{questionId} ExperimentationService.getPostgresTopWords # Export -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.