This is an automated email from the git hooks/post-receive script. New change to branch feature/comparatif-lunce-pg in repository coselmar. See https://gitlab.nuiton.org/codelutin/coselmar.git from 0dd6ace Review cloudtag request from postgresql and fix indexation from lucene adds ba59165 Remove ExperimentationService adds f37fe4f refs #9197 review way to use Tika, filter file content indexation for pdf, opendocument text/presentation and ms word/powerpoint adds 458b1e6 fix pdf mimetype in TikaUtils adds 4cd2f7d change version to 3.0-SNAPSHOT adds 24f863d Dude, remember there is document with no file during index refresh adds e9a5d00 Remove 3 chars words from cloud in homepage new 92ac2f3 Revert the disparition of ExperimentationService with rebase :o The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Detailed log of new commits: commit 92ac2f3d0fe116b3f055c2434f16fac80186f817 Author: Yannick Martel <martel@©odelutin.com> Date: Wed Jun 7 17:02:07 2017 +0200 Revert the disparition of ExperimentationService with rebase :o Summary of changes: .gitignore | 3 - coselmar-bundle/pom.xml | 2 +- coselmar-persistence/pom.xml | 2 +- coselmar-rest/pom.xml | 2 +- .../indexation/DocumentsIndexationService.java | 21 ++----- .../coselmar/services/indexation/LuceneUtils.java | 7 --- .../coselmar/services/indexation/TikaUtils.java | 54 +++++++++++++++++ .../indexation/TransverseIndexationService.java | 4 +- .../coselmar/services/v1/AdminWebService.java | 19 ++---- .../coselmar/services/v1/DocumentsWebService.java | 28 ++------- .../services/v1/ExperimentationService.java | 67 +--------------------- coselmar-rest/src/main/resources/mapping | 5 +- coselmar-ui/pom.xml | 2 +- pom.xml | 2 +- 14 files changed, 79 insertions(+), 139 deletions(-) create mode 100644 coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TikaUtils.java -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.
This is an automated email from the git hooks/post-receive script. New commit to branch feature/comparatif-lunce-pg in repository coselmar. See https://gitlab.nuiton.org/codelutin/coselmar.git commit 92ac2f3d0fe116b3f055c2434f16fac80186f817 Author: Yannick Martel <martel@©odelutin.com> Date: Wed Jun 7 17:02:07 2017 +0200 Revert the disparition of ExperimentationService with rebase :o --- .gitignore | 3 - .../services/v1/ExperimentationService.java | 126 +++++++++++++++++++++ coselmar-rest/src/main/resources/mapping | 3 + 3 files changed, 129 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index fc6f9da..4ac8fb8 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,3 @@ /*/.settings /*/*.zargo~ coselmar-ui/src/main/webapp/version.txt - - -coselmar-ui/src/main/webapp/version.txt diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java new file mode 100644 index 0000000..76b3101 --- /dev/null +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java @@ -0,0 +1,126 @@ +package fr.ifremer.coselmar.services.v1; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Ordering; +import fr.ifremer.coselmar.beans.CloudWord; +import fr.ifremer.coselmar.persistence.entity.Question; +import fr.ifremer.coselmar.persistence.entity.Status; +import fr.ifremer.coselmar.services.CoselmarWebServiceSupport; +import fr.ifremer.coselmar.services.indexation.DocumentsIndexationService; +import fr.ifremer.coselmar.services.indexation.QuestionsIndexationService; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.nuiton.topia.persistence.TopiaNoResultException; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * @author ymartel (martel@codelutin.com) + */ +public class ExperimentationService extends CoselmarWebServiceSupport { + + private static final Log log = LogFactory.getLog(ExperimentationService.class); + + public List<String> evaluateTopWordsGeneration() { + Question question = getQuestionDao().forStatusEquals(Status.IN_PROGRESS).findAny(); + long start = System.currentTimeMillis(); + String questionId = getShortIdFromFull(question.getTopiaId()); + System.out.println("Question : " + questionId); + List<CloudWord> luceneTopWords = getLuceneTopWords(questionId); + long stop = System.currentTimeMillis(); + String luceneTiming = String.format("Recherche par Lucene : %d termes en %d ms", luceneTopWords.size(), stop - start); + start = System.currentTimeMillis(); + List<CloudWord> postgresTopWords = getPostgresTopWords(questionId); + stop = System.currentTimeMillis(); + String pgTiming = String.format("Recherche par Postgresql : %d termes en %d ms", postgresTopWords.size(), stop - start); + + return Lists.newArrayList(luceneTiming, pgTiming); + } + + public List<CloudWord> getLuceneTopWords(String questionId) { + + // Retrieve Question + String fullQuestionId = getFullIdFromShort(Question.class, questionId); + Question question = getQuestionDao().forTopiaIdEquals(fullQuestionId).findUnique(); + + List<CloudWord> topWords = new ArrayList<>(); + + QuestionsIndexationService questionsIndexationService = getServicesContext().newService(QuestionsIndexationService.class); + DocumentsIndexationService documentsIndexationService = getServicesContext().newService(DocumentsIndexationService.class); + try { + Map<String, Long> topQuestionsTerms = questionsIndexationService.getTopQuestionsTerms(Lists.newArrayList(questionId)); + List<String> shortDocumentIds = getShortDocumentIds(question); + Map<String, Long> topDocumentsTerms = documentsIndexationService.getTopDocumentsTerms(shortDocumentIds); + for (Map.Entry<String, Long> documentTermFreq : topDocumentsTerms.entrySet()) { + String term = documentTermFreq.getKey(); + Long frequence = documentTermFreq.getValue(); + if (topQuestionsTerms.containsKey(term)) { + topQuestionsTerms.put(term, topQuestionsTerms.get(term) + frequence); + } else { + topQuestionsTerms.put(term, frequence); + } + } + + for (Map.Entry<String, Long> termFreq : topQuestionsTerms.entrySet()) { + String term = termFreq.getKey(); + CloudWord cloudWord = new CloudWord(term, termFreq.getValue()); + topWords.add(cloudWord); + } + + } catch (IOException e) { + if (log.isErrorEnabled()) { + log.error("Unable to index new question", e); + } + } + + ImmutableList<CloudWord> cloudWords = ImmutableList.copyOf(Ordering.natural().onResultOf(new Function<CloudWord, Long>() { + public Long apply(CloudWord input) { + return input.getWeight(); + } + }).reverse().sortedCopy(topWords)); + + return cloudWords; + } + + public List<CloudWord> getPostgresTopWords(String questionId) { + + List<CloudWord> topWords; + if (getCoselmarServicesConfig().isPostgresqlDatabase()) { + try { + topWords = getQuestionDao().findTopWords(getFullIdFromShort(Question.class, questionId)); + } catch (TopiaNoResultException e) { + if (log.isErrorEnabled()) { + log.error("Try to find top words for non existing questionId" + questionId, e); + } + topWords = Collections.EMPTY_LIST; + } + } else { + topWords = Collections.EMPTY_LIST; + } + + return topWords; + } + + //////////////////////////////////////////////////////////////////////////// + /////////////////////// Internal Parts ///////////////////////////// + //////////////////////////////////////////////////////////////////////////// + + protected List<String> getShortDocumentIds(Question question) { + List<String> shortDocumentIds = new ArrayList<>(); + for (String relatedDocumentId : question.getRelatedDocumentsTopiaIds()) { + String shortIdFromFull = getShortIdFromFull(relatedDocumentId); + shortDocumentIds.add(shortIdFromFull); + } + for (String closingDocumentId : question.getClosingDocumentsTopiaIds()) { + String shortIdFromFull = getShortIdFromFull(closingDocumentId); + shortDocumentIds.add(shortIdFromFull); + } + return shortDocumentIds; + } +} diff --git a/coselmar-rest/src/main/resources/mapping b/coselmar-rest/src/main/resources/mapping index b498a97..7f2f678 100644 --- a/coselmar-rest/src/main/resources/mapping +++ b/coselmar-rest/src/main/resources/mapping @@ -78,6 +78,9 @@ GET /v1/general/topwords GeneralWebService.getTopWords # Admin API POST /v1/admin/lucene/index AdminWebService.refreshLuceneIndex +GET /v1/experimentation/topwords ExperimentationService.evaluateTopWordsGeneration +GET /v1/experimentation/lucenetopwords/{questionId} ExperimentationService.getLuceneTopWords +GET /v1/experimentation/pgtopwords/{questionId} ExperimentationService.getPostgresTopWords # Export -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.
participants (1)
-
codelutin.com scm