branch develop updated (3c733ba -> e9f2ed5)
This is an automated email from the git hooks/post-receive script. New change to branch develop in repository coselmar. See http://git.codelutin.com/coselmar.git from 3c733ba fixes #7931 Add project in related projects when a document is a clsoing documentfor the project new e9f2ed5 fixes #7922 dot not have word with less than 3 letters in cloud tag The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Detailed log of new commits: commit e9f2ed5a2a494dfcba39cc4941c3e26a618382ee Author: Yannick Martel <martel@©odelutin.com> Date: Fri Jan 29 12:47:29 2016 +0100 fixes #7922 dot not have word with less than 3 letters in cloud tag Summary of changes: .../indexation/DocumentsIndexationService.java | 42 +++++++++++++-- .../indexation/QuestionsIndexationService.java | 39 ++++++++++++-- .../indexation/TransverseIndexationService.java | 59 ++++++++++++++++++---- 3 files changed, 122 insertions(+), 18 deletions(-) -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.
This is an automated email from the git hooks/post-receive script. New commit to branch develop in repository coselmar. See http://git.codelutin.com/coselmar.git commit e9f2ed5a2a494dfcba39cc4941c3e26a618382ee Author: Yannick Martel <martel@©odelutin.com> Date: Fri Jan 29 12:47:29 2016 +0100 fixes #7922 dot not have word with less than 3 letters in cloud tag --- .../indexation/DocumentsIndexationService.java | 42 +++++++++++++-- .../indexation/QuestionsIndexationService.java | 39 ++++++++++++-- .../indexation/TransverseIndexationService.java | 59 ++++++++++++++++++---- 3 files changed, 122 insertions(+), 18 deletions(-) diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java index 144f4a2..4825315 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java @@ -66,6 +66,9 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { protected static final String DOCUMENT_AUTHORS_INDEX_PROPERTY = "documentAuthors"; protected static final String DOCUMENT_SUMMARY_INDEX_PROPERTY = "documentSummary"; protected static final String DOCUMENT_KEYWORD_INDEX_PROPERTY = "documentKeyword"; + protected static final String DOCUMENT_NAME_CLOUD_TAG_PROPERTY = "documentCloudTagName"; + protected static final String DOCUMENT_SUMMARY_CLOUD_TAG_PROPERTY = "documentCloudTagSummary"; + protected static final String DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY = "documentCloudTagKeyword"; protected static final String DOCUMENT_TYPE = "documentindextype"; public void indexDocument(DocumentBean document) throws IOException { @@ -73,15 +76,31 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { Document doc = new Document(); doc.add(new StringField(DOCUMENT_ID_INDEX_PROPERTY, document.getId(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), Field.Store.YES)); + String documentName = document.getName(); + String documentSummary = document.getSummary(); + + doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, documentName, Field.Store.YES)); doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), Field.Store.YES)); + doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, documentSummary, Field.Store.YES)); doc.add(new Field("type", DOCUMENT_TYPE, TextField.TYPE_STORED)); + // Cloud Tag management + if (documentName.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new TextField(DOCUMENT_NAME_CLOUD_TAG_PROPERTY, documentName, Field.Store.YES)); + } +// if (documentSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { +// doc.add(new TextField(DOCUMENT_SUMMARY_CLOUD_TAG_PROPERTY, documentSummary, Field.Store.YES)); +// } + Set<String> keywords = document.getKeywords(); if (keywords != null) { for (String keyword : keywords) { doc.add(new Field(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword, TextField.TYPE_STORED)); + + // Cloud Tag management + if (keyword.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new Field(DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY, keyword, TextField.TYPE_STORED)); + } } } @@ -204,14 +223,29 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { Document doc = new Document(); doc.add(new StringField(DOCUMENT_ID_INDEX_PROPERTY, document.getId(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), Field.Store.YES)); + String documentName = document.getName(); + String documentSummary = document.getSummary(); + doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, documentName, Field.Store.YES)); doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); - doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), Field.Store.YES)); + doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, documentSummary, Field.Store.YES)); + + // Cloud Tag management + if (documentName.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new TextField(DOCUMENT_NAME_CLOUD_TAG_PROPERTY, documentName, Field.Store.YES)); + } +// if (documentSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { +// doc.add(new TextField(DOCUMENT_SUMMARY_CLOUD_TAG_PROPERTY, documentSummary, Field.Store.YES)); +// } Set<String> keywords = document.getKeywords(); if (keywords != null) { for (String keyword : keywords) { doc.add(new Field(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword, TextField.TYPE_STORED)); + + // Cloud Tag management + if (keyword.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new Field(DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY, keyword, TextField.TYPE_STORED)); + } } } diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java index b59fb41..0650d11 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java @@ -76,6 +76,9 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { protected static final String QUESTION_THEME_INDEX_PROPERTY = "questionTheme"; protected static final String QUESTION_STATUS_INDEX_PROPERTY = "questionStatus"; protected static final String QUESTION_PRIVACY_INDEX_PROPERTY = "questionPrivacy"; + protected static final String QUESTION_TITLE_CLOUD_TAG_PROPERTY = "questionCloudTagTitle"; + protected static final String QUESTION_SUMMARY_CLOUD_TAG_PROPERTY = "questionCloudTagSummary"; + protected static final String QUESTION_THEME_CLOUD_TAG_PROPERTY = "questionCloudTagTheme"; protected static final String DOCUMENT_TYPE = "questionindextype"; public void indexQuestion(QuestionBean question) throws IOException { @@ -91,18 +94,33 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { .build(); ScoreDoc[] hits = isearcher.search(query, 1000).scoreDocs; + String questionTitle = question.getTitle(); + String questionSummary = question.getSummary(); if (hits.length > 0) { Document doc = new Document(); doc.add(new StringField(QUESTION_ID_INDEX_PROPERTY, question.getId(), Field.Store.YES)); - doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, question.getTitle(), Field.Store.YES)); - doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, question.getSummary(), Field.Store.YES)); + doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, questionTitle, Field.Store.YES)); + doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, questionSummary, Field.Store.YES)); + + // Cloud Tag management + if (questionTitle.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new TextField(QUESTION_TITLE_CLOUD_TAG_PROPERTY, questionTitle, Field.Store.YES)); + } +// if (questionSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { +// doc.add(new TextField(QUESTION_SUMMARY_CLOUD_TAG_PROPERTY, questionSummary, Field.Store.YES)); +// } Set<String> themes = question.getThemes(); if (themes != null) { for (String theme : themes) { doc.add(new TextField(QUESTION_THEME_INDEX_PROPERTY, theme, Field.Store.YES)); + + // Cloud Tag management + if (theme.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new Field(QUESTION_THEME_CLOUD_TAG_PROPERTY, theme, TextField.TYPE_STORED)); + } } } @@ -120,16 +138,29 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { Document doc = new Document(); doc.add(new StringField(QUESTION_ID_INDEX_PROPERTY, question.getId(), Field.Store.YES)); - doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, question.getTitle(), Field.Store.YES)); - doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, question.getSummary(), Field.Store.YES)); + doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, questionTitle, Field.Store.YES)); + doc.add(new TextField(QUESTION_SUMMARY_INDEX_PROPERTY, questionSummary, Field.Store.YES)); doc.add(new TextField(QUESTION_STATUS_INDEX_PROPERTY, question.getStatus(), Field.Store.YES)); doc.add(new TextField(QUESTION_PRIVACY_INDEX_PROPERTY, question.getPrivacy(), Field.Store.YES)); + // Cloud Tag management + if (questionTitle.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new TextField(QUESTION_TITLE_CLOUD_TAG_PROPERTY, questionTitle, Field.Store.YES)); + } +// if (questionSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { +// doc.add(new TextField(QUESTION_SUMMARY_CLOUD_TAG_PROPERTY, questionSummary, Field.Store.YES)); +// } + Set<String> themes = question.getThemes(); if (themes != null) { for (String theme : themes) { doc.add(new Field(QUESTION_THEME_INDEX_PROPERTY, theme, TextField.TYPE_STORED)); + + // Cloud Tag management + if (theme.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { + doc.add(new Field(QUESTION_THEME_CLOUD_TAG_PROPERTY, theme, TextField.TYPE_STORED)); + } } } diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java index c9df56e..c193ed9 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/TransverseIndexationService.java @@ -37,8 +37,10 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.TermQuery; import java.io.IOException; +import java.util.Comparator; import java.util.LinkedHashMap; import java.util.Map; +import java.util.TreeMap; /** * This Services provides operations about indexed Objects. @@ -54,6 +56,8 @@ import java.util.Map; */ public class TransverseIndexationService extends CoselmarSimpleServiceSupport { + public static final int CLOUD_TAG_WORD_MIN_SIZE = 3; + protected void cleanAllIndex() throws IOException { BooleanQuery query = new BooleanQuery.Builder() .add(new TermQuery(new Term("type", QuestionsIndexationService.DOCUMENT_TYPE)), BooleanClause.Occur.SHOULD) @@ -70,23 +74,27 @@ public class TransverseIndexationService extends CoselmarSimpleServiceSupport { DirectoryReader indexReader = DirectoryReader.open(getLuceneUtils().getIndexWriter(), false); - Map<String, Long> result = new LinkedHashMap<>(); + Map<String, Long> topWords = new LinkedHashMap<>(); try { String[] searchedFields = { - QuestionsIndexationService.QUESTION_TITLE_INDEX_PROPERTY, - QuestionsIndexationService.QUESTION_THEME_INDEX_PROPERTY, - DocumentsIndexationService.DOCUMENT_NAME_INDEX_PROPERTY, - DocumentsIndexationService.DOCUMENT_KEYWORD_INDEX_PROPERTY + QuestionsIndexationService.QUESTION_TITLE_CLOUD_TAG_PROPERTY, + QuestionsIndexationService.QUESTION_THEME_CLOUD_TAG_PROPERTY, + DocumentsIndexationService.DOCUMENT_NAME_CLOUD_TAG_PROPERTY, + DocumentsIndexationService.DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY }; - TermStats[] highFreqTerms = HighFreqTermsMultiFields.getHighFreqTermsMultiFields(indexReader, 20, searchedFields, new HighFreqTerms.TotalTermFreqComparator()); + TermStats[] highFreqTerms = HighFreqTermsMultiFields.getHighFreqTermsMultiFields(indexReader, 40, searchedFields, new HighFreqTerms.TotalTermFreqComparator()); + for (TermStats termStats : highFreqTerms) { long totalTermFreq = termStats.totalTermFreq; String value = termStats.termtext.utf8ToString(); - if (result.containsKey(value)) { - result.put(value, result.get(value) + totalTermFreq); - } else { - result.put(value, totalTermFreq); + if (value.length() >= CLOUD_TAG_WORD_MIN_SIZE) { + + if (topWords.containsKey(value)) { + topWords.put(value, topWords.get(value) + totalTermFreq); + } else { + topWords.put(value, totalTermFreq); + } } } @@ -95,7 +103,38 @@ public class TransverseIndexationService extends CoselmarSimpleServiceSupport { } indexReader.close(); + MapValueComparator valueComparator = new MapValueComparator(topWords); + TreeMap<String, Long> sortedResult = new TreeMap(valueComparator); + sortedResult.putAll(topWords); + + TreeMap result = new TreeMap(); + int count = 20; + for (Map.Entry<String, Long> key : sortedResult.entrySet()) { + result.put(key.getKey(), key.getValue()); + if (--count == 0) { + break; + } + + } return result; } } + +class MapValueComparator implements Comparator<String> { + Map<String, Long> base; + + public MapValueComparator(Map<String, Long> base) { + this.base = base; + } + + // Note: this comparator imposes orderings that are inconsistent with + // equals. + public int compare(String a, String b) { + if (base.get(a) >= base.get(b)) { + return -1; + } else { + return 1; + } // returning 0 would merge keys + } +} \ No newline at end of file -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.
participants (1)
-
codelutin.com scm