Index: jrst2/src/java/org/codelutin/jrst/JRSTGenerator.java diff -u jrst2/src/java/org/codelutin/jrst/JRSTGenerator.java:1.3 jrst2/src/java/org/codelutin/jrst/JRSTGenerator.java:1.4 --- jrst2/src/java/org/codelutin/jrst/JRSTGenerator.java:1.3 Fri Nov 3 16:20:38 2006 +++ jrst2/src/java/org/codelutin/jrst/JRSTGenerator.java Mon Nov 6 15:08:46 2006 @@ -23,9 +23,9 @@ * Created: 30 oct. 06 00:14:18 * * @author poussin - * @version $Revision: 1.3 $ + * @version $Revision: 1.4 $ * - * Last update: $Date: 2006/11/03 16:20:38 $ + * Last update: $Date: 2006/11/06 15:08:46 $ * by : $Author: bpoussin $ */ @@ -40,6 +40,7 @@ import java.net.URL; import java.util.LinkedList; +import javax.xml.transform.ErrorListener; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; @@ -58,6 +59,10 @@ import org.dom4j.io.DocumentResult; import org.dom4j.io.DocumentSource; import org.xml.sax.ContentHandler; +import org.xml.sax.EntityResolver; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.ext.EntityResolver2; /** @@ -198,7 +203,7 @@ * @throws TransformerException * @throws IOException */ - public Document transform(Document doc, URL stylesheet) throws TransformerException, IOException { + public Document transform(Document doc, URL stylesheet, String ... args) throws TransformerException, IOException { // load the transformer using JAXP TransformerFactory factory = TransformerFactory.newInstance(); if (uriResolver != null) { @@ -210,6 +215,25 @@ new StreamSource( stylesheet.openStream() ) ); + // DEBUG To see where is the probleme with the dtd locator :( +// transformer.setErrorListener(new ErrorListener() { +// public void error(TransformerException exception) throws TransformerException { +// exception.printStackTrace(); +// } +// public void fatalError(TransformerException exception) throws TransformerException { +// exception.printStackTrace(); +// } +// public void warning(TransformerException exception) throws TransformerException { +// exception.printStackTrace(); +// } +// +// }); + + // TODO +// for (int i=0; i + * abbreviation + * acronym + * address (done) + * admonition + * attention + * attribution + * author (done) + * authors (partialy done) + * block_quote + * bullet_list (done) + * caption + * caution + * citation + * citation_reference + * classifier (done) + * colspec (done) + * comment + * compound + * contact (done) + * container + * copyright (done) + * danger + * date (done) + * decoration + * definition (done) + * definition_list (done) + * definition_list_item (done) + * description (done) + * docinfo (done) + * doctest_block + * document (done) + * emphasis (done) + * entry (done) + * enumerated_list (done) + * error + * field (done) + * field_body (done) + * field_list (done) + * field_name (done) + * figure + * footer + * footnote + * footnote_reference + * generated + * header + * hint + * image (done) + * important + * inline + * label + * legend + * line + * line_block + * list_item (done) + * literal (done) + * literal_block (done) + * note + * option + * option_argument + * option_group + * option_list + * option_list_item + * option_string + * organization (done) + * paragraph (done) + * pending + * problematic + * raw + * reference (partialy done) + * revision (done) + * row (done) + * rubric + * section (done) + * sidebar + * status (done) + * strong (done) + * subscript + * substitution_definition + * substitution_reference + * subtitle (done) + * superscript + * system_message + * table (done, only complexe table in lexer) + * target + * tbody (done) + * term (done) + * tgroup (done) + * thead (done) + * tip + * title (done) + * title_reference + * topic + * transition (done) + * version (done) + * warning + * + * * @author poussin */ public class JRSTReader { + /** to use log facility, just put in your code: log.info(\"...\"); */ + static private Log log = LogFactory.getLog(JRSTReader.class); + boolean ERROR_MISSING_ITEM = false; static int MAX_SECTION_DEPTH = -1000; + static protected Map defaultDirectives = null; + protected Map directives = new HashMap(); + + static { + defaultDirectives = new HashMap(); + defaultDirectives.put(IMAGE, new ImageDirective()); + defaultDirectives.put(DATE, new DateDirective()); + defaultDirectives.put("time", new DateDirective()); + // TODO put here all other directive + } + /** * */ @@ -188,6 +307,34 @@ /** + * @return the defaultDirectives + */ + public static JRSTDirective getDefaultDirective(String name) { + return defaultDirectives.get(name); + } + + /** + * @param defaultDirectives the defaultDirectives to set + */ + public static void addDefaultDirectives(String name, JRSTDirective directive) { + JRSTReader.defaultDirectives.put(name, directive); + } + + /** + * @return the defaultDirectives + */ + public JRSTDirective getDirective(String name) { + return directives.get(name); + } + + /** + * @param defaultDirectives the defaultDirectives to set + */ + public void addDirectives(String name, JRSTDirective directive) { + directives.put(name, directive); + } + + /** * On commence par decouper tout le document en Element, puis on construit * l'article a partir de ces elements. * @param reader @@ -200,15 +347,26 @@ Element root = composeDocument(lexer); + Document result = DocumentHelper.createDocument(); + result.setRootElement(root); + // remove all level attribute root.accept(new VisitorSupport() { public void visit(Element e) { e.addAttribute("level", null); + if ("true".equalsIgnoreCase(e.attributeValue("inline"))) { + e.addAttribute("inline", null); + try { + inline(e); + } catch (DocumentException eee) { + if (log.isWarnEnabled()) { + log.warn("Can inline text for " + e, eee); + } + } + } } }); - Document result = DocumentHelper.createDocument(); - result.setRootElement(root); return result; } @@ -231,7 +389,7 @@ lexer.remove(); Element title = result.addElement(TITLE); copyLevel(item, title); - title.appendContent(inline(item.getText())); + title.addAttribute("inline", "true").setText(item.getText()); } // le sous titre du doc @@ -240,7 +398,7 @@ lexer.remove(); Element subtitle = result.addElement(SUBTITLE); copyLevel(item, subtitle); - subtitle.appendContent(inline(item.getText())); + subtitle.addAttribute("inline", "true").setText(item.getText()); } // les infos du doc @@ -256,23 +414,23 @@ documentinfo.add(field); } else { if ("author".equalsIgnoreCase(item.attributeValue("type"))) { - documentinfo.addElement(AUTHOR).appendContent(inline(item.getText())); + documentinfo.addElement(AUTHOR).addAttribute("inline", "true").setText(item.getText()); } else if ("date".equalsIgnoreCase(item.attributeValue("type"))) { - documentinfo.addElement(DATE).appendContent(inline(item.getText())); + documentinfo.addElement(DATE).addAttribute("inline", "true").setText(item.getText()); } else if ("organization".equalsIgnoreCase(item.attributeValue("type"))) { - documentinfo.addElement(ORGANIZATION).appendContent(inline(item.getText())); + documentinfo.addElement(ORGANIZATION).addAttribute("inline", "true").setText(item.getText()); } else if ("contact".equalsIgnoreCase(item.attributeValue("type"))) { - documentinfo.addElement(CONTACT).appendContent(inline(item.getText())); + documentinfo.addElement(CONTACT).addAttribute("inline", "true").setText(item.getText()); } else if ("address".equalsIgnoreCase(item.attributeValue("type"))) { - documentinfo.addElement(ADDRESS).appendContent(inline(item.getText())); + documentinfo.addElement(ADDRESS).addAttribute("inline", "true").setText(item.getText()); } else if ("version".equalsIgnoreCase(item.attributeValue("type"))) { - documentinfo.addElement(VERSION).appendContent(inline(item.getText())); + documentinfo.addElement(VERSION).addAttribute("inline", "true").setText(item.getText()); } else if ("revision".equalsIgnoreCase(item.attributeValue("type"))) { - documentinfo.addElement(REVISION).appendContent(inline(item.getText())); + documentinfo.addElement(REVISION).addAttribute("inline", "true").setText(item.getText()); } else if ("status".equalsIgnoreCase(item.attributeValue("type"))) { - documentinfo.addElement(STATUS).appendContent(inline(item.getText())); + documentinfo.addElement(STATUS).addAttribute("inline", "true").setText(item.getText()); } else if ("copyright".equalsIgnoreCase(item.attributeValue("type"))) { - documentinfo.addElement(COPYRIGHT).appendContent(inline(item.getText())); + documentinfo.addElement(COPYRIGHT).addAttribute("inline", "true").setText(item.getText()); } // TODO authors lexer.remove(); @@ -311,7 +469,15 @@ lexer.remove(); Element para = parent.addElement(PARAGRAPH); copyLevel(item,para); - para.appendContent(inline(item.getText())); + para.addAttribute("inline", "true").setText(item.getText()); + } else if (itemEquals(JRSTLexer.DIRECTIVE, item)) { + lexer.remove(); + Node directive = composeDirective(item); + parent.add(directive); + } else if (itemEquals(SUBSTITUTION_DEFINITION, item)) { + lexer.remove(); + Element subst = composeSubstitutionDefinition(item); + parent.add(subst); } else if (itemEquals(TRANSITION, item)) { lexer.remove(); Element para = parent.addElement(TRANSITION); @@ -355,6 +521,41 @@ } /** + * @param item + * @return + */ + private Node composeDirective(Element item) { + Node result = item; + String type = item.attributeValue(JRSTLexer.DIRECTIVE_TYPE); + JRSTDirective directive = getDirective(type); + if (directive == null) { + directive = getDefaultDirective(type); + } + if (directive != null) { + result = directive.parse(item); + } else { + log.warn("Unknow directive type '" + type + "' in: " + item); + } + return result; + } + + + /** + * @param lexer + * @param item + * @return + */ + private Element composeSubstitutionDefinition(Element item) { + Element result = item; + Element child = (Element)item.selectSingleNode("*"); + Node newChild = composeDirective(child); + result.remove(child); // remove old after composeDirective, because directive can be used this parent + result.add(newChild); + return result; + } + + + /** * @param lexer * @param item * @return @@ -462,7 +663,7 @@ lexer.remove(); Element bullet = result.addElement(LIST_ITEM); copyLevel(item, bullet); - bullet.addElement(PARAGRAPH).appendContent(inline(item.getText())); + bullet.addElement(PARAGRAPH).addAttribute("inline", "true").setText(item.getText()); composeBody(lexer, bullet); item = lexer.peekBulletList(); @@ -485,7 +686,7 @@ lexer.remove(); Element e = result.addElement(LIST_ITEM); copyLevel(item, e); - e.addElement(PARAGRAPH).appendContent(inline(item.getText())); + e.addElement(PARAGRAPH).addAttribute("inline", "true").setText(item.getText()); composeBody(lexer, e); item = lexer.peekEnumeratedList(); @@ -504,17 +705,17 @@ Element term = def.addElement(TERM); copyLevel(item, term); - term.appendContent(inline(item.attributeValue("term"))); + term.addAttribute("inline", "true").setText(item.attributeValue("term")); String [] classifiers = StringUtil.split(item.attributeValue("classifiers"), " : "); for (String classifierText : classifiers) { Element classifier = def.addElement("classifier"); copyLevel(item, classifier); - classifier.appendContent(inline(classifierText)); + classifier.addAttribute("inline", "true").setText(classifierText); } Element defintion = def.addElement(DEFINITION); - defintion.addElement(PARAGRAPH).appendContent(inline(item.getText())); + defintion.addElement(PARAGRAPH).addAttribute("inline", "true").setText(item.getText()); copyLevel(item, defintion); composeBody(lexer, defintion); @@ -544,9 +745,9 @@ copyLevel(item, field); Element fieldName = field.addElement(FIELD_NAME); copyLevel(item, fieldName); - fieldName.appendContent(inline(item.attributeValue("name"))); + fieldName.addAttribute("inline", "true").setText(item.attributeValue("name")); Element fieldBody = field.addElement(FIELD_BODY); - fieldBody.addElement(PARAGRAPH).appendContent(inline(item.getText())); + fieldBody.addElement(PARAGRAPH).addAttribute("inline", "true").setText(item.getText()); copyLevel(item, fieldBody); composeBody(lexer, fieldBody); @@ -579,7 +780,7 @@ copyLevel(item, result); copyLevel(item, title); - title.appendContent(inline(item.getText())); + title.addAttribute("inline", "true").setText(item.getText()); } // le contenu de la section @@ -680,30 +881,73 @@ return result; } - private Element inline(String text) throws DocumentException { + /** + * Parse text in element and replace text with parse result + * @param text + * @return + * @throws DocumentException + */ + private void inline(Element e) throws DocumentException { + String text = e.getText(); text = StringEscapeUtils.escapeXml(text); + // search all LITERAL and replace it with special mark + // this prevent substitution in literal, example **something** must not + // change in literal + ArrayList literals = new ArrayList(); + Matcher matcher = REGEX_LITERAL.matcher(text); + while(matcher.find()) { + int start = matcher.start(); + int end = matcher.end(); + String literal = matcher.group(1); + literals.add(literal); + + text = text.substring(0, start) + "``" + (literals.size() - 1) + "``" + text.substring(end); + } + + // do all substitution inline text = REGEX_EMAIL.matcher(text).replaceAll("$1<"+REFERENCE+" refuri='mailto:$2'>$2$3"); text = REGEX_STRONG.matcher(text).replaceAll("<"+STRONG+">$1"); text = REGEX_EMPHASIS.matcher(text).replaceAll("<"+EMPHASIS+">$1"); text = REGEX_REFERENCE.matcher(text).replaceAll("<"+REFERENCE+" refuri='$1'>$1$2"); + // substitution reference + matcher = REGEX_SUBSTITUTION_REFERENCE.matcher(text); + int begin = 0; + while(matcher.find(begin)) { + String start = text.substring(0, matcher.start()); + String end = text.substring(matcher.end()); + String ref = matcher.group(1); + + Node subst = e.selectSingleNode( + "//"+SUBSTITUTION_DEFINITION+"[@name='"+ref+"']/child::node()"); + + if (subst == null) { + text = start + "|" + ref + "|"; + } else { + text = start + subst.asXML(); + } + + begin = text.length(); + text += end; + matcher = REGEX_SUBSTITUTION_REFERENCE.matcher(text); + } + // undo substitution in LITERAL - Matcher matcher = REGEX_LITERAL.matcher(text); + matcher = REGEX_LITERAL.matcher(text); while(matcher.find()) { String start = text.substring(0, matcher.start()); String end = text.substring(matcher.end()); - String literal = matcher.group(1); - literal = literal.replaceAll("]*>", "**"); - literal = literal.replaceAll("]*>", "**"); - text = start + "<"+LITERAL+">" + literal + "" + end; + int literalIndex = Integer.parseInt(matcher.group(1)); + text = start + "<"+LITERAL+">" + literals.get(literalIndex) + "" + end; } Element result = DocumentHelper.parseText(""+text+"").getRootElement(); - return result; + e.setText(""); + e.appendContent(result); } } Index: jrst2/src/java/org/codelutin/jrst/ReStructuredText.java diff -u jrst2/src/java/org/codelutin/jrst/ReStructuredText.java:1.1.1.1 jrst2/src/java/org/codelutin/jrst/ReStructuredText.java:1.2 --- jrst2/src/java/org/codelutin/jrst/ReStructuredText.java:1.1.1.1 Mon Oct 30 19:34:05 2006 +++ jrst2/src/java/org/codelutin/jrst/ReStructuredText.java Mon Nov 6 15:08:47 2006 @@ -23,9 +23,9 @@ * Created: 27 oct. 06 11:10:30 * * @author poussin - * @version $Revision: 1.1.1.1 $ + * @version $Revision: 1.2 $ * - * Last update: $Date: 2006/10/30 19:34:05 $ + * Last update: $Date: 2006/11/06 15:08:47 $ * by : $Author: bpoussin $ */ @@ -184,8 +184,8 @@ public static final Pattern REGEX_REFERENCE = Pattern.compile("(http://[-/%#&\\._\\w]+)([^-/%#&\\._\\w]|$)"); public static final Pattern REGEX_EMAIL = Pattern.compile("(^|[^_\\w])([-\\._\\w]+@[-\\._\\w]+)([^-\\._\\w]|$)"); public static final Pattern REGEX_FOOTNOTE_REFERENCE = Pattern.compile("\\[([0-9]+?|#)\\]"); - public static final Pattern REGEX_CITATION_REFERENCE = Pattern.compile("\\[([a-zA-Z_]+?)\\]"); - public static final Pattern REGEX_SUBSTITUTION_REFERENCE = Pattern.compile("\\|([a-zA-Z_]+?)\\|"); + public static final Pattern REGEX_CITATION_REFERENCE = Pattern.compile("\\[([^\\]]+?)\\]"); + public static final Pattern REGEX_SUBSTITUTION_REFERENCE = Pattern.compile("\\|([^|]+?)\\|"); public static final Pattern REGEX_ABBREVIATION = Pattern.compile("(.*?)"); public static final Pattern REGEX_ACRONYM = Pattern.compile("(.*?)"); public static final Pattern REGEX_SUPERSCRIPT = Pattern.compile("(.*?)"); Index: jrst2/src/java/org/codelutin/jrst/JRST.java diff -u /dev/null jrst2/src/java/org/codelutin/jrst/JRST.java:1.1 --- /dev/null Mon Nov 6 15:08:52 2006 +++ jrst2/src/java/org/codelutin/jrst/JRST.java Mon Nov 6 15:08:46 2006 @@ -0,0 +1,162 @@ +/* *##% + * Copyright (C) 2006 + * Code Lutin, Cédric Pineau, Benjamin Poussin + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *##%*/ + +/* * + * JRST.java + * + * Created: 3 nov. 06 20:56:00 + * + * @author poussin + * @version $Revision: 1.1 $ + * + * Last update: $Date: 2006/11/06 15:08:46 $ + * by : $Author: bpoussin $ + */ + +package org.codelutin.jrst; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; + +import javax.xml.transform.TransformerException; + +import org.codelutin.util.StringUtil; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.io.OutputFormat; +import org.dom4j.io.XMLWriter; + +import uk.co.flamingpenguin.jewel.cli.ArgumentValidationException; +import uk.co.flamingpenguin.jewel.cli.Cli; +import uk.co.flamingpenguin.jewel.cli.CliFactory; +import uk.co.flamingpenguin.jewel.cli.CommandLineInterface; +import uk.co.flamingpenguin.jewel.cli.Option; +import uk.co.flamingpenguin.jewel.cli.Unparsed; + + +/** + * @author poussin + */ +@CommandLineInterface(application="JRST") +public class JRST { + + static final private String docbook = "/xsl/dn2dbk.xsl"; + static final private String html = "/xsl/rst2xhtml.xsl"; + + static final private String walshDir = "/docbook-xsl-nwalsh"; + + static final private String xhtml = walshDir + "/xhtml/docbook.xsl"; + static final private String javahelp = walshDir + "/javahelp/javahelp.xsl"; +// static final private String dbkx2html = walshDir + "/html/onechunk.xsl"; + static final private String htmlhelp = walshDir + "/htmlhelp/htmlhelp.xsl"; + + static final private String rst2xdoc = "/xsl/rst2xdoc.xsl"; + + static private Map stylesheets = null; + + static { + stylesheets = new HashMap(); + stylesheets.put("docbook", docbook); + stylesheets.put("html", html); + stylesheets.put("xhtml", docbook+","+xhtml); + stylesheets.put("javahelp", docbook+","+javahelp); + stylesheets.put("htmlhelp", docbook+","+htmlhelp); + stylesheets.put("xdoc", rst2xdoc); + } + + static public void main(String [] args) throws ArgumentValidationException, IOException, TransformerException, DocumentException { + JRSTOption option = CliFactory.parseArguments(JRSTOption.class, args); + + if (option.isHelp()) { + Cli cli = CliFactory.createCli(JRSTOption.class); + System.out.println(cli.getHelpMessage()); + return; + } + + // prepare the output flux + XMLWriter out = null; + if (option.isOutFile()) { + out = new XMLWriter(new FileWriter(option.getFile()), new OutputFormat(" ", true)); + } else { + out = new XMLWriter(System.out, new OutputFormat(" ", true)); + } + + // search xsl file list to apply + String xslList = null; + if (option.isXslFile()) { + xslList = option.getXslFile(); + } else { + xslList = stylesheets.get(option.getOutType()); + } + + // parse rst file + URL url = option.getFile().toURL(); + Reader in = new InputStreamReader(url.openStream()); + JRSTReader jrst = new JRSTReader(); + Document doc = jrst.read(in); + + // apply xsl on rst xml document + String [] xsls = StringUtil.split(xslList, ","); + for (String xsl : xsls) { + URL stylesheet = null; + File file = new File(xsl); + if (file.exists()) { + stylesheet = file.toURL(); + } else { + stylesheet = JRSTReaderTest.class.getResource(xsl); + } + JRSTGenerator gen = new JRSTGenerator(); + doc = gen.transform(doc, stylesheet); + } + + // write generated document + out.write(doc); + } + + public static interface JRSTOption { + + @Option(description="display this help and exit") + boolean isHelp(); + + @Option(shortName="x", description = "XSL file list to apply, comma separated") + public String getXslFile(); + public boolean isXslFile(); + + @Option(shortName="t", pattern = "xhtml|docbook|xml|html|xdoc", // TODO |pdf|rst|odt|rtf", + description = "Output type") + public String getOutType(); + public boolean isOutType(); + + @Option(shortName="o", description = "Output file") + public File getOutFile(); + public boolean isOutFile(); + + @Unparsed(name = "FILE") + public File getFile(); + } + +} + + Index: jrst2/src/java/org/codelutin/jrst/JRSTDirective.java diff -u /dev/null jrst2/src/java/org/codelutin/jrst/JRSTDirective.java:1.1 --- /dev/null Mon Nov 6 15:08:52 2006 +++ jrst2/src/java/org/codelutin/jrst/JRSTDirective.java Mon Nov 6 15:08:46 2006 @@ -0,0 +1,55 @@ +/* *##% + * Copyright (C) 2006 + * Code Lutin, Cédric Pineau, Benjamin Poussin + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + *##%*/ + +/* * + * JRSTDirective.java + * + * Created: 4 nov. 06 12:49:39 + * + * @author poussin + * @version $Revision: 1.1 $ + * + * Last update: $Date: 2006/11/06 15:08:46 $ + * by : $Author: bpoussin $ + */ + +package org.codelutin.jrst; + +import org.dom4j.Element; +import org.dom4j.Node; + + +/** + * @author poussin + * + */ + +public interface JRSTDirective { + + /** + * parse directive block element and return element to put in final + * XML + * + * @param e + * @return + */ + public Node parse(Element e); +} + +