public class XhtmlBaseParser extends AbstractXmlParser implements HtmlMarkup
AbstractXmlParser.CachedFileEntityResolver
Modifier and Type | Field and Description |
---|---|
private SinkEventAttributeSet |
decoration
Decoration properties, eg for texts.
|
(package private) boolean |
hasDefinitionListItem
Used to wrap the definedTerm with its definition, even when one is omitted
|
private boolean |
inFigure
Used to recognize the case of img inside figure.
|
private boolean |
inVerbatim
Verbatim flag, true whenever we are inside a <pre> tag.
|
private boolean |
isAnchor
Used to distinguish <a href=""> from <a name="">.
|
private boolean |
isLink
Used to distinguish <a href=""> from <a name="">.
|
private int |
orderedListDepth
Used for nested lists.
|
private boolean |
scriptBlock
True if a <script></script> or <style></style> block is read.
|
private int |
sectionLevel
Counts section level.
|
private java.util.Map<java.lang.String,java.util.Set<java.lang.String>> |
warnMessages
Map of warn messages with a String as key to describe the error type and a Set as value.
|
A, ABBR, ACRONYM, ADDRESS, APPLET, AREA, B, BASE, BASEFONT, BDO, BIG, BLOCKQUOTE, BODY, BR, BUTTON, CAPTION, CDATA_TYPE, CENTER, CITE, CODE, COL, COLGROUP, DD, DEL, DFN, DIR, DIV, DL, DT, EM, ENTITY_TYPE, FIELDSET, FONT, FORM, FRAME, FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HR, HTML, I, IFRAME, IMG, INPUT, INS, ISINDEX, KBD, LABEL, LEGEND, LI, LINK, MAP, MENU, META, NOFRAMES, NOSCRIPT, OBJECT, OL, OPTGROUP, OPTION, P, PARAM, PRE, Q, S, SAMP, SCRIPT, SELECT, SMALL, SPAN, STRIKE, STRONG, STYLE, SUB, SUP, TABLE, TAG_TYPE_END, TAG_TYPE_SIMPLE, TAG_TYPE_START, TBODY, TD, TEXTAREA, TFOOT, TH, THEAD, TITLE, TR, TT, U, UL, VAR
BANG, CDATA, DOCTYPE_START, ENTITY_START, XML_NAMESPACE
COLON, EOL, EQUAL, GREATER_THAN, LEFT_CURLY_BRACKET, LEFT_SQUARE_BRACKET, LESS_THAN, MINUS, PLUS, QUOTE, RIGHT_CURLY_BRACKET, RIGHT_SQUARE_BRACKET, SEMICOLON, SLASH, SPACE, STAR
ROLE, TXT_TYPE, UNKNOWN_TYPE, XML_TYPE
Constructor and Description |
---|
XhtmlBaseParser() |
Modifier and Type | Method and Description |
---|---|
protected boolean |
baseEndTag(org.codehaus.plexus.util.xml.pull.XmlPullParser parser,
Sink sink)
Goes through a common list of possible html end tags.
|
protected boolean |
baseStartTag(org.codehaus.plexus.util.xml.pull.XmlPullParser parser,
Sink sink)
Goes through a common list of possible html start tags.
|
private void |
closeOpenSections(int newLevel,
Sink sink)
Close open sections.
|
protected void |
consecutiveSections(int newLevel,
Sink sink)
Make sure sections are nested consecutively.
|
protected int |
getSectionLevel()
Return the current section level.
|
private void |
handleAEnd(Sink sink) |
private void |
handleAStart(org.codehaus.plexus.util.xml.pull.XmlPullParser parser,
Sink sink,
SinkEventAttributeSet attribs) |
protected void |
handleCdsect(org.codehaus.plexus.util.xml.pull.XmlPullParser parser,
Sink sink)
Handles CDATA sections.
|
protected void |
handleComment(org.codehaus.plexus.util.xml.pull.XmlPullParser parser,
Sink sink)
Handles comments.
|
private boolean |
handleDivStart(org.codehaus.plexus.util.xml.pull.XmlPullParser parser,
SinkEventAttributeSet attribs,
Sink sink) |
protected void |
handleEndTag(org.codehaus.plexus.util.xml.pull.XmlPullParser parser,
Sink sink)
Goes through the possible end tags.
|
private void |
handleFigureCaptionEnd(Sink sink) |
private void |
handleFigureCaptionStart(Sink sink,
SinkEventAttributeSet attribs) |
private void |
handleImgStart(org.codehaus.plexus.util.xml.pull.XmlPullParser parser,
Sink sink,
SinkEventAttributeSet attribs) |
private void |
handleLIStart(Sink sink,
SinkEventAttributeSet attribs) |
private void |
handleListItemEnd(Sink sink) |
private void |
handleOLStart(org.codehaus.plexus.util.xml.pull.XmlPullParser parser,
Sink sink,
SinkEventAttributeSet attribs) |
private void |
handlePreStart(SinkEventAttributeSet attribs,
Sink sink) |
private void |
handlePStart(Sink sink,
SinkEventAttributeSet attribs) |
private void |
handleSectionStart(Sink sink,
int level,
SinkEventAttributeSet attribs) |
protected void |
handleStartTag(org.codehaus.plexus.util.xml.pull.XmlPullParser parser,
Sink sink)
Goes through the possible start tags.
|
private void |
handleTableStart(Sink sink,
SinkEventAttributeSet attribs,
org.codehaus.plexus.util.xml.pull.XmlPullParser parser) |
protected void |
handleText(org.codehaus.plexus.util.xml.pull.XmlPullParser parser,
Sink sink)
Handles text events.
|
protected void |
init()
Initialize the parser.
|
protected void |
initXmlParser(org.codehaus.plexus.util.xml.pull.XmlPullParser parser)
Initializes the parser with custom entities or other options.
|
protected boolean |
isScriptBlock()
Checks if we are currently inside a <script> tag.
|
protected boolean |
isVerbatim()
Checks if we are currently inside a <pre> tag.
|
private void |
logMessage(java.lang.String key,
java.lang.String msg)
If debug mode is enabled, log the
msg as is, otherwise add unique msg in warnMessages . |
private void |
logWarnings() |
private void |
openMissingSections(int newLevel,
Sink sink)
Open missing sections.
|
void |
parse(java.io.Reader source,
Sink sink)
Parses the given source model and emits Doxia events into the given sink.
|
protected void |
setSectionLevel(int newLevel)
Set the current section level.
|
protected java.lang.String |
validAnchor(java.lang.String id)
Checks if the given id is a valid Doxia id and if not, returns a transformed one.
|
protected void |
verbatim_()
Stop verbatim mode.
|
protected void |
verbatim()
Start verbatim mode.
|
getAttributesFromParser, getLocalEntities, getText, getType, handleEntity, handleUnknown, isCollapsibleWhitespace, isIgnorableWhitespace, isTrimmableWhitespace, isValidate, parse, setCollapsibleWhitespace, setIgnorableWhitespace, setTrimmableWhitespace, setValidate
doxiaVersion, enableLogging, executeMacro, getBasedir, getLog, getMacroManager, isEmitComments, isSecondParsing, parse, setEmitComments, setSecondParsing
private boolean scriptBlock
private boolean isLink
private boolean isAnchor
private int orderedListDepth
private int sectionLevel
private boolean inVerbatim
private boolean inFigure
boolean hasDefinitionListItem
private final SinkEventAttributeSet decoration
private java.util.Map<java.lang.String,java.util.Set<java.lang.String>> warnMessages
public void parse(java.io.Reader source, Sink sink) throws ParseException
parse
in interface Parser
parse
in class AbstractXmlParser
source
- not null reader that provides the source document.
You could use newReader
methods from ReaderFactory
.sink
- A sink that consumes the Doxia events.ParseException
- if the model could not be parsed.protected void initXmlParser(org.codehaus.plexus.util.xml.pull.XmlPullParser parser) throws org.codehaus.plexus.util.xml.pull.XmlPullParserException
initXmlParser
in class AbstractXmlParser
parser
- A parser, not null.org.codehaus.plexus.util.xml.pull.XmlPullParserException
- if there's a problem initializing the parserprotected boolean baseStartTag(org.codehaus.plexus.util.xml.pull.XmlPullParser parser, Sink sink)
Goes through a common list of possible html start tags. These include only tags that can go into the body of a xhtml document and so should be re-usable by different xhtml-based parsers.
The currently handled tags are:
<h2>, <h3>, <h4>, <h5>, <h6>, <p>, <pre>,
<ul>, <ol>, <li>, <dl>, <dt>, <dd>, <b>, <strong>,
<i>, <em>, <code>, <samp>, <tt>, <a>, <table>, <tr>,
<th>, <td>, <caption>, <br/>, <hr/>, <img/>.
parser
- A parser.sink
- the sink to receive the events.protected boolean baseEndTag(org.codehaus.plexus.util.xml.pull.XmlPullParser parser, Sink sink)
Goes through a common list of possible html end tags.
These should be re-usable by different xhtml-based parsers.
The tags handled here are the same as for baseStartTag(XmlPullParser,Sink)
,
except for the empty elements (<br/>, <hr/>, <img/>
).
parser
- A parser.sink
- the sink to receive the events.protected void handleStartTag(org.codehaus.plexus.util.xml.pull.XmlPullParser parser, Sink sink) throws org.codehaus.plexus.util.xml.pull.XmlPullParserException, MacroExecutionException
baseStartTag(XmlPullParser,Sink)
, this should be
overridden by implementing parsers to include additional tags.handleStartTag
in class AbstractXmlParser
parser
- A parser, not null.sink
- the sink to receive the events.org.codehaus.plexus.util.xml.pull.XmlPullParserException
- if there's a problem parsing the modelMacroExecutionException
- if there's a problem executing a macroprotected void handleEndTag(org.codehaus.plexus.util.xml.pull.XmlPullParser parser, Sink sink) throws org.codehaus.plexus.util.xml.pull.XmlPullParserException, MacroExecutionException
baseEndTag(XmlPullParser,Sink)
, this should be
overridden by implementing parsers to include additional tags.handleEndTag
in class AbstractXmlParser
parser
- A parser, not null.sink
- the sink to receive the events.org.codehaus.plexus.util.xml.pull.XmlPullParserException
- if there's a problem parsing the modelMacroExecutionException
- if there's a problem executing a macroprotected void handleText(org.codehaus.plexus.util.xml.pull.XmlPullParser parser, Sink sink) throws org.codehaus.plexus.util.xml.pull.XmlPullParserException
This is a default implementation, if the parser points to a non-empty text element, it is emitted as a text event into the specified sink.
handleText
in class AbstractXmlParser
parser
- A parser, not null.sink
- the sink to receive the events. Not null.org.codehaus.plexus.util.xml.pull.XmlPullParserException
- if there's a problem parsing the modelprotected void handleComment(org.codehaus.plexus.util.xml.pull.XmlPullParser parser, Sink sink) throws org.codehaus.plexus.util.xml.pull.XmlPullParserException
This is a default implementation, all data are emitted as comment events into the specified sink.
handleComment
in class AbstractXmlParser
parser
- A parser, not null.sink
- the sink to receive the events. Not null.org.codehaus.plexus.util.xml.pull.XmlPullParserException
- if there's a problem parsing the modelprotected void handleCdsect(org.codehaus.plexus.util.xml.pull.XmlPullParser parser, Sink sink) throws org.codehaus.plexus.util.xml.pull.XmlPullParserException
This is a default implementation, all data are emitted as text events into the specified sink.
handleCdsect
in class AbstractXmlParser
parser
- A parser, not null.sink
- the sink to receive the events. Not null.org.codehaus.plexus.util.xml.pull.XmlPullParserException
- if there's a problem parsing the modelprotected void consecutiveSections(int newLevel, Sink sink)
HTML doesn't have any sections, only sectionTitles (<h2> etc), that means we have to open close any sections that are missing in between.
For instance, if the following sequence is parsed:
<h3></h3> <h6></h6>we have to insert two section starts before we open the
<h6>
.
In the following sequence
<h6></h6> <h3></h3>we have to close two sections before we open the
<h3>
.
The current level is set to newLevel afterwards.
newLevel
- the new section level, all upper levels have to be closed.sink
- the sink to receive the events.private void closeOpenSections(int newLevel, Sink sink)
newLevel
- the new section level, all upper levels have to be closed.sink
- the sink to receive the events.private void openMissingSections(int newLevel, Sink sink)
newLevel
- the new section level, all lower levels have to be opened.sink
- the sink to receive the events.protected int getSectionLevel()
protected void setSectionLevel(int newLevel)
newLevel
- the new section level.protected void verbatim_()
protected void verbatim()
protected boolean isVerbatim()
protected boolean isScriptBlock()
<script>
tags.protected java.lang.String validAnchor(java.lang.String id)
id
- The id to validate.DoxiaUtils.encodeId(String)
protected void init()
Parser.parse(java.io.Reader, org.apache.maven.doxia.sink.Sink)
and can be used
to set the parser into a clear state so it can be re-used.init
in class AbstractParser
private void handleAEnd(Sink sink)
private void handleAStart(org.codehaus.plexus.util.xml.pull.XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs)
private boolean handleDivStart(org.codehaus.plexus.util.xml.pull.XmlPullParser parser, SinkEventAttributeSet attribs, Sink sink)
private void handleFigureCaptionEnd(Sink sink)
private void handleFigureCaptionStart(Sink sink, SinkEventAttributeSet attribs)
private void handleImgStart(org.codehaus.plexus.util.xml.pull.XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs)
private void handleLIStart(Sink sink, SinkEventAttributeSet attribs)
private void handleListItemEnd(Sink sink)
private void handleOLStart(org.codehaus.plexus.util.xml.pull.XmlPullParser parser, Sink sink, SinkEventAttributeSet attribs)
private void handlePStart(Sink sink, SinkEventAttributeSet attribs)
private void handlePreStart(SinkEventAttributeSet attribs, Sink sink)
private void handleSectionStart(Sink sink, int level, SinkEventAttributeSet attribs)
private void handleTableStart(Sink sink, SinkEventAttributeSet attribs, org.codehaus.plexus.util.xml.pull.XmlPullParser parser)
private void logMessage(java.lang.String key, java.lang.String msg)
msg
as is, otherwise add unique msg in warnMessages
.key
- not nullmsg
- not nullparse(Reader, Sink)
private void logWarnings()