This is an automated email from the git hooks/post-receive script. New commit to branch develop in repository coselmar. See http://git.codelutin.com/coselmar.git commit e9f2ed5a2a494dfcba39cc4941c3e26a618382ee Author: Yannick Martel <martel@©odelutin.com> Date: Fri Jan 29 12:47:29 2016 +0100 fixes #7922 dot not have word with less than 3 letters in cloud tag --- .../indexation/DocumentsIndexationService.java | 42 +++++++++++++-- .../indexation/QuestionsIndexationService.java | 39 ++++++++++++-- .../indexation/TransverseIndexationService.java | 59 ++++++++++++++++++---- 3 files changed, 122 insertions(+), 18 deletions(-) diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java index 144f4a2..4825315 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java @@ -66,6 +66,9 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { protected static final String DOCUMENT_AUTHORS_INDEX_PROPERTY = "documentAuthors"; protected static final String DOCUMENT_SUMMARY_INDEX_PROPERTY = "documentSummary"; protected static final String DOCUMENT_KEYWORD_INDEX_PROPERTY = "documentKeyword"; + protected static final String DOCUMENT_NAME_CLOUD_TAG_PROPERTY = "documentCloudTagName"; + protected static final String DOCUMENT_SUMMARY_CLOUD_TAG_PROPERTY = "documentCloudTagSummary"; + protected static final String DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY = "documentCloudTagKeyword"; protected static final String DOCUMENT_TYPE = "documentindextype"; public void indexDocument(DocumentBean document) throws IOException { @@ -73,15 +76,31 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { Document doc = new Document(); doc.add(new StringField(DOCUMENT_ID_INDEX_PROPERTY, document.getId(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), Field.Store.YES)); + String documentName = document.getName(); + String documentSummary = document.getSummary(); + + doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, documentName, Field.Store.YES)); doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), Field.Store.YES)); + doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, documentSummary, Field.Store.YES)); doc.add(new Field("type", DOCUMENT_TYPE, TextField.TYPE_STORED)); + // Cloud Tag management + if (documentName.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new TextField(DOCUMENT_NAME_CLOUD_TAG_PROPERTY, documentName, Field.Store.YES)); + } +// if (documentSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { +// doc.add(new TextField(DOCUMENT_SUMMARY_CLOUD_TAG_PROPERTY, documentSummary, Field.Store.YES)); +// } + Set<String> keywords = document.getKeywords(); if (keywords != null) { for (String keyword : keywords) { doc.add(new Field(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword, TextField.TYPE_STORED)); + + // Cloud Tag management + if (keyword.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new Field(DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY, keyword, TextField.TYPE_STORED)); + } } } @@ -204,14 +223,29 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { Document doc = new Document(); doc.add(new StringField(DOCUMENT_ID_INDEX_PROPERTY, document.getId(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), Field.Store.YES)); + String documentName = document.getName(); + String documentSummary = document.getSummary(); + doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, documentName, Field.Store.YES)); doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), Field.Store.YES)); + doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, documentSummary, Field.Store.YES)); + + // Cloud Tag management + if (documentName.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new TextField(DOCUMENT_NAME_CLOUD_TAG_PROPERTY, documentName, Field.Store.YES)); + } +// if (documentSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { +// doc.add(new TextField(DOCUMENT_SUMMARY_CLOUD_TAG_PROPERTY, documentSummary, Field.Store.YES)); +// } Set<String> keywords = document.getKeywords(); if (keywords != null) { for (String keyword : keywords) { doc.add(new Field(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword, TextField.TYPE_STORED)); + + // Cloud Tag management + if (keyword.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new Field(DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY, keyword, TextField.TYPE_STORED)); + } } } diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java index b59fb41..0650d11 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java @@ -76,6 +76,9 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { protected static final String QUESTION_THEME_INDEX_PROPERTY = "questionTheme"; protected static final String QUESTION_STATUS_INDEX_PROPERTY = "questionStatus"; protected static final String QUESTION_PRIVACY_INDEX_PROPERTY = "questionPrivacy"; + protected static final String QUESTION_TITLE_CLOUD_TAG_PROPERTY = "questionCloudTagTitle"; + protected static final String QUESTION_SUMMARY_CLOUD_TAG_PROPERTY = "questionCloudTagSummary"; + protected static final String QUESTION_THEME_CLOUD_TAG_PROPERTY = "questionCloudTagTheme"; protected static final String DOCUMENT_TYPE = "questionindextype"; public void indexQuestion(QuestionBean question) throws IOException { @@ -91,18 +94,33 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { .build(); ScoreDoc[] hits = isearcher.search(query, 1000).scoreDocs; + String questionTitle = question.getTitle(); + String questionSummary = question.getSummary(); if (hits.length > 0) { Document doc = new Document(); doc.add(new StringField(QUESTION_ID_INDEX_PROPERTY, question.getId(), Field.Store.YES)); - doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, question.getTitle(), Field.Store.YES)); - doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, question.getSummary(), Field.Store.YES)); + doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, questionTitle, Field.Store.YES)); + doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, questionSummary, Field.Store.YES)); + + // Cloud Tag management + if (questionTitle.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new TextField(QUESTION_TITLE_CLOUD_TAG_PROPERTY, questionTitle, Field.Store.YES)); + } +// if (questionSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { +// doc.add(new TextField(QUESTION_SUMMARY_CLOUD_TAG_PROPERTY, questionSummary, Field.Store.YES)); +// } Set<String> themes = question.getThemes(); if (themes != null) { for (String theme : themes) { doc.add(new TextField(QUESTION_THEME_INDEX_PROPERTY, theme, Field.Store.YES)); + + // Cloud Tag management + if (theme.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new Field(QUESTION_THEME_CLOUD_TAG_PROPERTY, theme, TextField.TYPE_STORED)); + } } } @@ -120,16 +138,29 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { Document doc = new Document(); doc.add(new StringField(QUESTION_ID_INDEX_PROPERTY, question.getId(), Field.Store.YES)); - doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, question.getTitle(), Field.Store.YES)); - doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, question.getSummary(), Field.Store.YES)); + doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, questionTitle, Field.Store.YES)); + doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, questionSummary, Field.Store.YES)); doc.add(new TextField(QUESTION_STATUS_INDEX_PROPERTY, question.getStatus(), Field.Store.YES)); doc.add(new TextField(QUESTION_PRIVACY_INDEX_PROPERTY, question.getPrivacy(), Field.Store.YES)); + // Cloud Tag management + if (questionTitle.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new TextField(QUESTION_TITLE_CLOUD_TAG_PROPERTY, questionTitle, Field.Store.YES)); + } +// if (questionSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { +// doc.add(new TextField(QUESTION_SUMMARY_CLOUD_TAG_PROPERTY, questionSummary, Field.Store.YES)); +// } + Set<String> themes = question.getThemes(); if (themes != null) { for (String theme : themes) { doc.add(new Field(QUESTION_THEME_INDEX_PROPERTY, theme, TextField.TYPE_STORED)); + + // Cloud Tag management + if (theme.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new Field(QUESTION_THEME_CLOUD_TAG_PROPERTY, theme, TextField.TYPE_STORED)); + } } } diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java index c9df56e..c193ed9 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java @@ -37,8 +37,10 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.TermQuery; import java.io.IOException; +import java.util.Comparator; import java.util.LinkedHashMap; import java.util.Map; +import java.util.TreeMap; /** * This Services provides operations about indexed Objects. @@ -54,6 +56,8 @@ import java.util.Map; */ public class TransverseIndexationService extends CoselmarSimpleServiceSupport { + public static final int CLOUD_TAG_WORD_MIN_SIZE = 3; + protected void cleanAllIndex() throws IOException { BooleanQuery query = new BooleanQuery.Builder() .add(new TermQuery(new Term("type", QuestionsIndexationService.DOCUMENT_TYPE)), BooleanClause.Occur.SHOULD) @@ -70,23 +74,27 @@ public class TransverseIndexationService extends CoselmarSimpleServiceSupport { DirectoryReader indexReader = DirectoryReader.open(getLuceneUtils().getIndexWriter(), false); - Map<String, Long> result = new LinkedHashMap<>(); + Map<String, Long> topWords = new LinkedHashMap<>(); try { String[] searchedFields = { - QuestionsIndexationService.QUESTION_TITLE_INDEX_PROPERTY, - QuestionsIndexationService.QUESTION_THEME_INDEX_PROPERTY, - DocumentsIndexationService.DOCUMENT_NAME_INDEX_PROPERTY, - DocumentsIndexationService.DOCUMENT_KEYWORD_INDEX_PROPERTY + QuestionsIndexationService.QUESTION_TITLE_CLOUD_TAG_PROPERTY, + QuestionsIndexationService.QUESTION_THEME_CLOUD_TAG_PROPERTY, + DocumentsIndexationService.DOCUMENT_NAME_CLOUD_TAG_PROPERTY, + DocumentsIndexationService.DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY }; - TermStats[] highFreqTerms = HighFreqTermsMultiFields.getHighFreqTermsMultiFields(indexReader, 20, searchedFields, new HighFreqTerms.TotalTermFreqComparator()); + TermStats[] highFreqTerms = HighFreqTermsMultiFields.getHighFreqTermsMultiFields(indexReader, 40, searchedFields, new HighFreqTerms.TotalTermFreqComparator()); + for (TermStats termStats : highFreqTerms) { long totalTermFreq = termStats.totalTermFreq; String value = termStats.termtext.utf8ToString(); - if (result.containsKey(value)) { - result.put(value, result.get(value) + totalTermFreq); - } else { - result.put(value, totalTermFreq); + if (value.length() >= CLOUD_TAG_WORD_MIN_SIZE) { + + if (topWords.containsKey(value)) { + topWords.put(value, topWords.get(value) + totalTermFreq); + } else { + topWords.put(value, totalTermFreq); + } } } @@ -95,7 +103,38 @@ public class TransverseIndexationService extends CoselmarSimpleServiceSupport { } indexReader.close(); + MapValueComparator valueComparator = new MapValueComparator(topWords); + TreeMap<String, Long> sortedResult = new TreeMap(valueComparator); + sortedResult.putAll(topWords); + + TreeMap result = new TreeMap(); + int count = 20; + for (Map.Entry<String, Long> key : sortedResult.entrySet()) { + result.put(key.getKey(), key.getValue()); + if (--count == 0) { + break; + } + + } return result; } } + +class MapValueComparator implements Comparator<String> { + Map<String, Long> base; + + public MapValueComparator(Map<String, Long> base) { + this.base = base; + } + + // Note: this comparator imposes orderings that are inconsistent with + // equals. + public int compare(String a, String b) { + if (base.get(a) >= base.get(b)) { + return -1; + } else { + return 1; + } // returning 0 would merge keys + } +} \ No newline at end of file -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.