org.apache.pdfbox.pdfparser.PDFStreamParser java code examples

/**
 * Parses an appearance stream into tokens.
 */
private List<Object> tokenize(PDAppearanceStream appearanceStream) throws IOException
{
  PDFStreamParser parser = new PDFStreamParser(appearanceStream.getContents());
  parser.parse();
  return parser.getTokens();
}

skipSpaces();
int nextByte = seqSource.peek();
if( ((byte)nextByte) == -1 )
      retval = parseCOSDictionary();
      retval = parseCOSString();
    retval = parseCOSArray();
    break;
    retval = parseCOSString();
    break;
  case '/':
    retval = parseCOSName();
    break;
  case 'n':   
    String nullString = readString();
    if( nullString.equals( "null") )
    String next = readString();
    if( next.equals( "true" ) )
    String line = readString();
    if( line.equals( "R" ) )
    String next = readString();

skipSpaces();
while(
  nextChar != -1 && // EOF
  !isWhitespace(nextChar) &&
  !isClosing(nextChar) &&
  nextChar != '[' &&
  nextChar != '<' &&

/**
 * Processes the operators of the given content stream.
 *
 * @param content the content to parse.
 * @throws IOException if there is an error reading or parsing the content stream.
 */
private void processAppearanceStringOperators(byte[] content) throws IOException
{
  List<COSBase> arguments = new ArrayList<>();
  PDFStreamParser parser = new PDFStreamParser(content);
  Object token = parser.parseNextToken();
  while (token != null)
  {
    if (token instanceof COSObject)
    {
      arguments.add(((COSObject) token).getObject());
    }
    else if (token instanceof Operator)
    {
      processOperator((Operator) token, arguments);
      arguments = new ArrayList<>();
    }
    else
    {
      arguments.add((COSBase) token);
    }
    token = parser.parseNextToken();
  }
}

 PDFStreamParser parser = new PDFStreamParser(page);
parser.parse();
List<Object> pageTokens = parser.getTokens();

/**
 * This will parse all the tokens in the stream. This will close the stream when it is finished
 * parsing. You can then access these with {@link #getTokens() getTokens()}.
 *
 * @throws IOException If there is an error while parsing the stream.
 */
public void parse() throws IOException
{
  Object token;
  while( (token = parseNextToken()) != null )
  {
    streamObjects.add( token );
  }
}

  /**
   * Checks if the next char is a space or a return.
   * 
   * @return true if the next char is a space or a return
   * @throws IOException if something went wrong
   */
  private boolean hasNextSpaceOrReturn() throws IOException
  {
    return isSpaceOrReturn( seqSource.peek() );
  }
}

/**
 * Get the width from a type3 charproc stream.
 *
 * @return the glyph width.
 * @throws IOException if the stream could not be read, or did not have d0 or d1 as first
 * operator, or if their first argument was not a number.
 */
public float getWidth() throws IOException
{
  List<COSBase> arguments = new ArrayList<>();
  PDFStreamParser parser = new PDFStreamParser(getContents());
  Object token = parser.parseNextToken();
  while (token != null)
  {
    if (token instanceof COSObject)
    {
      arguments.add(((COSObject) token).getObject());
    }
    else if (token instanceof Operator)
    {
      return parseWidth((Operator) token, arguments);
    }
    else
    {
      arguments.add((COSBase) token);
    }
    token = parser.parseNextToken();
  }
  throw new IOException("Unexpected end of stream");
}

PDStream contents = page.getContents();
PDFStreamParser parser = new PDFStreamParser(contents.getStream());
parser.parse();  
List tokens = parser.getTokens();  
boolean parsingTextObject = false; //boolean to check whether the token being parsed is part of a TextObject
PDFTextObject textobj = new PDFTextObject();

/**
 * This will parse all the tokens in the stream. This will close the stream when it is finished
 * parsing. You can then access these with {@link #getTokens() getTokens()}.
 *
 * @throws IOException If there is an error while parsing the stream.
 */
public void parse() throws IOException
{
  Object token;
  while( (token = parseNextToken()) != null )
  {
    streamObjects.add( token );
  }
}

  /**
   * Checks if the next char is a space or a return.
   * 
   * @return true if the next char is a space or a return
   * @throws IOException if something went wrong
   */
  private boolean hasNextSpaceOrReturn() throws IOException
  {
    return isSpaceOrReturn( seqSource.peek() );
  }
}

private StyledDocument getContentStreamDocument(InputStream inputStream)
{
  StyledDocument docu = new DefaultStyledDocument();
  PDFStreamParser parser;
  try
  {
    parser = new PDFStreamParser(IOUtils.toByteArray(inputStream));
    parser.parse();
  }
  catch (IOException e)
  {
    return null;
  }
  for (Object obj : parser.getTokens())
  {
    writeToken(obj, docu);
  }
  return docu;
}

skipSpaces();
int nextByte = seqSource.peek();
if( ((byte)nextByte) == -1 )
      retval = parseCOSDictionary();
      retval = parseCOSString();
    retval = parseCOSArray();
    break;
    retval = parseCOSString();
    break;
  case '/':
    retval = parseCOSName();
    break;
  case 'n':   
    String nullString = readString();
    if( nullString.equals( "null") )
    String next = readString();
    if( next.equals( "true" ) )
    String line = readString();
    if( line.equals( "R" ) )
    String next = readString();

/**
 * Processes the operators of the given content stream.
 *
 * @param contentStream to content stream to parse.
 * @throws IOException if there is an error reading or parsing the content stream.
 */
private void processStreamOperators(PDContentStream contentStream) throws IOException
{
  List<COSBase> arguments = new ArrayList<>();
  PDFStreamParser parser = new PDFStreamParser(contentStream.getContents());
  Object token = parser.parseNextToken();
  while (token != null)
  {
    if (token instanceof COSObject)
    {
      arguments.add(((COSObject) token).getObject());
    }
    else if (token instanceof Operator)
    {
      processOperator((Operator) token, arguments);
      arguments = new ArrayList<>();
    }
    else
    {
      arguments.add((COSBase) token);
    }
    token = parser.parseNextToken();
  }
}

for (PDPage page : pages) {
  PDFStreamParser parser = new PDFStreamParser(page);
  parser.parse();
  List tokens = parser.getTokens();
  for (int j = 0; j < tokens.size(); j++) {
    Object next = tokens.get(j);

skipSpaces();
while(
  nextChar != -1 && // EOF
  !isWhitespace(nextChar) &&
  !isClosing(nextChar) &&
  nextChar != '[' &&
  nextChar != '<' &&

/**
 * This will parse all the tokens in the stream. This will close the stream when it is finished
 * parsing. You can then access these with {@link #getTokens() getTokens()}.
 *
 * @throws IOException If there is an error while parsing the stream.
 */
public void parse() throws IOException
{
  Object token;
  while( (token = parseNextToken()) != null )
  {
    streamObjects.add( token );
  }
}

  /**
   * Checks if the next char is a space or a return.
   * 
   * @return true if the next char is a space or a return
   * @throws IOException if something went wrong
   */
  private boolean hasNextSpaceOrReturn() throws IOException
  {
    return isSpaceOrReturn( seqSource.peek() );
  }
}

private PDDocument removeText(PDPage page) throws IOException {
  PDFStreamParser parser = new PDFStreamParser(page);
  parser.parse();
  List<Object> tokens = parser.getTokens();
  List<Object> newTokens = new ArrayList<>();
  for (Object token : tokens) {

skipSpaces();
int nextByte = seqSource.peek();
if( ((byte)nextByte) == -1 )
      retval = parseCOSDictionary();
      retval = parseCOSString();
    retval = parseCOSArray();
    break;
    retval = parseCOSString();
    break;
  case '/':
    retval = parseCOSName();
    break;
  case 'n':   
    String nullString = readString();
    if( nullString.equals( "null") )
    String next = readString();
    if( next.equals( "true" ) )
    String line = readString();
    if( line.equals( "R" ) )
    String next = readString();

Javadoc

This will parse a PDF byte stream and extract operands and such.

Most used methods

<init>
Constructor.
getTokens
This will get the tokens that were parsed from the stream by the #parse() method.
parse
This will parse all the tokens in the stream. This will close the stream when it is finished parsing
parseNextToken
This will parse the next token in the stream.
hasNextSpaceOrReturn
Checks if the next char is a space or a return.
hasNoFollowingBinData
Looks up an amount of bytes if they contain only ASCII characters (no control sequences etc.), and t
isClosing
isSpaceOrReturn
isWhitespace
parseCOSArray
parseCOSDictionary
parseCOSName

Popular in Java

Running tasks concurrently on multiple threads
onRequestPermissionsResult (Fragment)
addToBackStack (FragmentTransaction)
onCreateOptionsMenu (Activity)
Socket (java.net)
Provides a client-side TCP socket.
URL (java.net)
A Uniform Resource Locator that identifies the location of an Internet resource as specified by RFC
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
Top Sublime Text plugins

How to usePDFStreamParser in org.apache.pdfbox.pdfparser

Best Java code snippets using org.apache.pdfbox.pdfparser.PDFStreamParser (Showing top 20 results out of 315)

How to use
PDFStreamParser
in
org.apache.pdfbox.pdfparser