Class DocumentParser

java.lang.Object
com.vladsch.flexmark.parser.internal.DocumentParser
All Implemented Interfaces:
BlockParserTracker, ParserState, BlockTracker

public class DocumentParser extends Object implements ParserState
  • Field Details

    • INLINE_PARSER_FACTORY

      public static final InlineParserFactory INLINE_PARSER_FACTORY
    • CORE_FACTORIES_DATA_KEYS

      private static final HashMap<CustomBlockParserFactory,DataKey<Boolean>> CORE_FACTORIES_DATA_KEYS
    • CORE_PARAGRAPH_PRE_PROCESSORS

      private static final HashMap<DataKey<Boolean>,ParagraphPreProcessorFactory> CORE_PARAGRAPH_PRE_PROCESSORS
    • line

      private BasedSequence line
    • lineWithEOL

      private BasedSequence lineWithEOL
    • lineNumber

      private int lineNumber
      current line number in the input
    • lineStart

      private int lineStart
      current start of line offset in the input
    • lineEOLIndex

      private int lineEOLIndex
      current lines EOL sequence
    • lineEndIndex

      private int lineEndIndex
      current end of line offset in the input including EOL
    • index

      private int index
      current index (offset) in input line (0-based)
    • column

      private int column
      current column of input line (tab causes column to go to next 4-space tab stop) (0-based)
    • columnIsInTab

      private boolean columnIsInTab
      if the current column is within a tab character (partially consumed tab)
    • nextNonSpace

      private int nextNonSpace
    • nextNonSpaceColumn

      private int nextNonSpaceColumn
    • indent

      private int indent
    • blank

      private boolean blank
    • isBlankLine

      private boolean isBlankLine
    • blockParserFactories

      private final List<BlockParserFactory> blockParserFactories
    • paragraphPreProcessorDependencies

      private final List<List<ParagraphPreProcessorFactory>> paragraphPreProcessorDependencies
    • blockPreProcessorDependencies

      private final List<List<BlockPreProcessorFactory>> blockPreProcessorDependencies
    • inlineParser

      private final InlineParser inlineParser
    • documentBlockParser

      private final DocumentBlockParser documentBlockParser
    • blankLinesInAst

      private final boolean blankLinesInAst
    • trackDocumentLines

      private final boolean trackDocumentLines
    • lineSegments

      private final List<BasedSequence> lineSegments
    • activeBlockParsers

      private final List<BlockParser> activeBlockParsers
    • blockTracker

      private final ClassifyingBlockTracker blockTracker
    • lastLineBlank

      private final Map<Node,Boolean> lastLineBlank
    • options

      private final DataHolder options
    • currentPhase

      private ParserPhase currentPhase
    • myParsing

      private final Parsing myParsing
  • Constructor Details

  • Method Details

    • getLineSegments

      public List<BasedSequence> getLineSegments()
      Description copied from interface: ParserState
      Returns a list of document lines encountered this far in the parsing process
      Specified by:
      getLineSegments in interface ParserState
      Returns:
      list of line sequences (including EOLs)
    • blockParserAdded

      public void blockParserAdded(BlockParser blockParser)
      Specified by:
      blockParserAdded in interface BlockParserTracker
    • blockParserRemoved

      public void blockParserRemoved(BlockParser blockParser)
      Specified by:
      blockParserRemoved in interface BlockParserTracker
    • blockAdded

      public void blockAdded(@NotNull @NotNull Block node)
      Specified by:
      blockAdded in interface BlockTracker
    • blockAddedWithChildren

      public void blockAddedWithChildren(@NotNull @NotNull Block node)
      Specified by:
      blockAddedWithChildren in interface BlockTracker
    • blockAddedWithDescendants

      public void blockAddedWithDescendants(@NotNull @NotNull Block node)
      Specified by:
      blockAddedWithDescendants in interface BlockTracker
    • blockRemoved

      public void blockRemoved(@NotNull @NotNull Block node)
      Specified by:
      blockRemoved in interface BlockTracker
    • blockRemovedWithChildren

      public void blockRemovedWithChildren(@NotNull @NotNull Block node)
      Specified by:
      blockRemovedWithChildren in interface BlockTracker
    • blockRemovedWithDescendants

      public void blockRemovedWithDescendants(@NotNull @NotNull Block node)
      Specified by:
      blockRemovedWithDescendants in interface BlockTracker
    • getParserPhase

      public ParserPhase getParserPhase()
      Description copied from interface: ParserState
      Get the current parser phase
      Specified by:
      getParserPhase in interface ParserState
      Returns:
      the current parser phase ParserPhase
    • getParsing

      public Parsing getParsing()
      Specified by:
      getParsing in interface ParserState
      Returns:
      strings and patterns class adjusted for options Parsing
    • getProperties

      public MutableDataHolder getProperties()
      Specified by:
      getProperties in interface ParserState
      Returns:
      document properties of the document being parsed
    • calculateBlockParserFactories

      public static List<CustomBlockParserFactory> calculateBlockParserFactories(DataHolder options, List<CustomBlockParserFactory> customBlockParserFactories)
    • calculateParagraphPreProcessors

      public static List<List<ParagraphPreProcessorFactory>> calculateParagraphPreProcessors(DataHolder options, List<ParagraphPreProcessorFactory> blockPreProcessors, InlineParserFactory inlineParserFactory)
    • calculateBlockPreProcessors

      public static List<List<BlockPreProcessorFactory>> calculateBlockPreProcessors(DataHolder options, List<BlockPreProcessorFactory> blockPreProcessors)
    • getInlineParser

      public InlineParser getInlineParser()
      Specified by:
      getInlineParser in interface ParserState
      Returns:
      inline parser instance for the parser state
    • parse

      public Document parse(CharSequence source)
      The main parsing function. Returns a parsed document AST.
      Parameters:
      source - source sequence to parse
      Returns:
      Document node of the resulting AST
    • parse

      public Document parse(Reader input) throws IOException
      Throws:
      IOException
    • getLineNumber

      public int getLineNumber()
      Specified by:
      getLineNumber in interface ParserState
      Returns:
      The 0 based current line number within the input
    • getLineStart

      public int getLineStart()
      Specified by:
      getLineStart in interface ParserState
      Returns:
      the start of line offset into the input stream corresponding to current index into the line
    • getLineEndIndex

      public int getLineEndIndex()
      Specified by:
      getLineEndIndex in interface ParserState
      Returns:
      the end of line offset into the input stream corresponding to current index into the line, including the EOL
    • getLine

      public BasedSequence getLine()
      Specified by:
      getLine in interface ParserState
      Returns:
      the current line
    • getLineWithEOL

      public BasedSequence getLineWithEOL()
      Specified by:
      getLineWithEOL in interface ParserState
      Returns:
      the current line with EOL
    • getLineEolLength

      public int getLineEolLength()
      Specified by:
      getLineEolLength in interface ParserState
      Returns:
      the EOL offset into the input stream corresponding to current index into the line
    • getIndex

      public int getIndex()
      Specified by:
      getIndex in interface ParserState
      Returns:
      the current index within the line (0-based)
    • getNextNonSpaceIndex

      public int getNextNonSpaceIndex()
      Specified by:
      getNextNonSpaceIndex in interface ParserState
      Returns:
      the index of the next non-space character starting from ParserState.getIndex() (may be the same) (0-based)
    • getColumn

      public int getColumn()
      Description copied from interface: ParserState
      The column is the position within the line after tab characters have been processed as 4-space tab stops. If the line doesn't contain any tabs, it's the same as the ParserState.getIndex(). If the line starts with a tab, followed by text, then the column for the first character of the text is 4 (the index is 1).
      Specified by:
      getColumn in interface ParserState
      Returns:
      the current column within the line (0-based)
    • getIndent

      public int getIndent()
      Specified by:
      getIndent in interface ParserState
      Returns:
      the indentation in columns (either by spaces or tab stop of 4), starting from ParserState.getColumn()
    • isBlank

      public boolean isBlank()
      Specified by:
      isBlank in interface ParserState
      Returns:
      true if the current line is blank starting from the index
    • isBlankLine

      public boolean isBlankLine()
      Specified by:
      isBlankLine in interface ParserState
      Returns:
      true if the current line is blank starting from the index
    • getActiveBlockParser

      public BlockParser getActiveBlockParser()
      Specified by:
      getActiveBlockParser in interface ParserState
      Returns:
      the deepest open block parser
    • getActiveBlockParser

      public BlockParser getActiveBlockParser(Block node)
      Specified by:
      getActiveBlockParser in interface ParserState
      Parameters:
      node - block node for which to get the active block parser
      Returns:
      an active block parser for the node or null if not found or the block is already closed.
    • getActiveBlockParsers

      public List<BlockParser> getActiveBlockParsers()
      Specified by:
      getActiveBlockParsers in interface ParserState
      Returns:
      the current list of active block parsers, deepest is last
    • incorporateLine

      private void incorporateLine(BasedSequence ln)
      Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each line of input, then finalizing the document.
      Parameters:
      ln - sequence of the current line
    • findNextNonSpace

      private void findNextNonSpace()
    • setNewIndex

      private void setNewIndex(int newIndex)
    • setNewColumn

      private void setNewColumn(int newColumn)
    • advance

      private void advance()
    • addLine

      private void addLine()
      Add line content to the active block parser. We assume it can accept lines -- that check should be done before calling this.
    • findBlockStart

      private BlockStartImpl findBlockStart(BlockParser blockParser)
    • finalize

      private void finalize(BlockParser blockParser)
      Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings, setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference definitions.
      Parameters:
      blockParser - block parser instance to finalize
    • processInlines

      private void processInlines()
      Walk through a block invalid input: '&' children recursively, parsing string content into inline content where appropriate.
    • endsWithBlankLine

      public boolean endsWithBlankLine(Node block)
      Description copied from interface: ParserState
      Test the block to see if it ends in a blank line. The blank line can be in the block or its last child.
      Specified by:
      endsWithBlankLine in interface ParserState
      Parameters:
      block - block to be tested
      Returns:
      true if the block ends in a blank line
    • breakOutOfLists

      private void breakOutOfLists(List<BlockParser> blockParsers)
      Break out of all containing lists, resetting the tip of the document to the parent of the highest list, and finalizing all the lists. (This is used to implement the "two blank lines break of of all lists" feature.)
      Parameters:
      blockParsers - list of block parsers to break out on double blank line
    • addChild

      private <T extends BlockParser> T addChild(T blockParser)
      Add block parser of type T as a child of the currently active parsers. If the tip can't accept children, close and finalize it and try its parent, and so on til we find a block that can accept children.
      Type Parameters:
      T - block parser type
      Parameters:
      blockParser - new block parser to add as a child
      Returns:
      block parser instance added as a child.
    • activateBlockParser

      private void activateBlockParser(BlockParser blockParser)
    • deactivateBlockParser

      private void deactivateBlockParser()
    • removeActiveBlockParser

      private void removeActiveBlockParser()
    • propagateLastLineBlank

      private void propagateLastLineBlank(BlockParser blockParser, BlockParser lastMatchedBlockParser)
    • setLastLineBlank

      private void setLastLineBlank(Node node, boolean value)
    • isLastLineBlank

      public boolean isLastLineBlank(Node node)
      Description copied from interface: ParserState
      Test a block to see if the last line of the block is blank. Children not tested.
      Specified by:
      isLastLineBlank in interface ParserState
      Parameters:
      node - block instance to test
      Returns:
      true if the block's last line is blank
    • finalizeBlocks

      private boolean finalizeBlocks(List<BlockParser> blockParsers)
      Finalize blocks of previous line.
      Returns:
      true.
    • preProcessParagraph

      private void preProcessParagraph(Paragraph block, List<ParagraphPreProcessorFactory> stage, DocumentParser.ParagraphPreProcessorCache processorMap)
      pre-process a paragraph block
      Parameters:
      block - paragraph block to pre-process
      stage - paragraph pre-processor dependency stage
      processorMap - paragraph pre-processor cache
    • preProcessParagraphs

      private void preProcessParagraphs()
    • preProcessBlocks

      private void preProcessBlocks()
    • finalizeAndProcess

      private Document finalizeAndProcess()