writer2latex.office
Class OfficeReader

java.lang.Object
  extended by writer2latex.office.OfficeReader

public class OfficeReader
extends java.lang.Object

This class reads and collects global information about an OOo document. This includes styles, forms, information about indexes and references etc.


Field Summary
private  java.util.HashSet bookmarkRef
           
private  boolean bOpenDocument
           
private  boolean bPresentation
           
private  boolean bSpreadsheet
           
private  boolean bText
           
private  OfficeStyleFamily cell
           
private  OfficeStyleFamily column
           
private  org.w3c.dom.Element content
           
private  OfficeStyleFamily drawingPage
           
private  java.util.HashSet endnoteRef
           
private  PropertySet endnotes
           
private  java.util.HashSet figureSequenceNames
           
private  MasterPage firstMasterPage
           
private  OfficeStyleFamily font
           
private  java.util.HashSet footnoteRef
           
private  PropertySet footnotes
           
private  FormsReader forms
           
private  OfficeStyleFamily frame
           
private  StyleWithProperties[] heading
           
private  java.util.Hashtable indexes
           
private  java.util.HashSet indexSourceStyles
           
private  java.util.HashSet links
           
private  OfficeStyleFamily list
           
private  OfficeStyleFamily masterPage
           
private  OfficeDocument oooDoc
           
private  ListStyle outline
           
private  OfficeStyleFamily pageLayout
           
private  OfficeStyleFamily par
           
private  OfficeStyleFamily presentation
           
private  java.util.HashSet referenceRef
           
private  OfficeStyleFamily row
           
private  java.lang.String sAutoFigureSequenceName
           
private  java.lang.String sAutoTableSequenceName
           
private  OfficeStyleFamily section
           
private  java.util.Hashtable seqrefNames
           
private  java.util.Hashtable sequenceNames
           
private  java.util.HashSet sequenceRef
           
private  OfficeStyleFamily table
           
private  java.util.HashSet tableSequenceNames
           
private  OfficeStyleFamily text
           
 
Constructor Summary
OfficeReader(OfficeDocument oooDoc, boolean bAllParagraphsAreSoft)
          Constructor; read a document
 
Method Summary
 void addFigureSequenceName(java.lang.String sName)
          Add a sequence name for figure captions.
 void addTableSequenceName(java.lang.String sName)
          Add a sequence name for table captions.
private  void collectRefName(java.util.HashSet ref, org.w3c.dom.Element node)
           
 StyleWithProperties getCellStyle(java.lang.String sName)
           
 OfficeStyleFamily getCellStyles()
           
static int getCharacterCount(org.w3c.dom.Node node)
          Counts the number of characters (text nodes) in this element excluding footnotes etc.
 StyleWithProperties getColumnStyle(java.lang.String sName)
           
 OfficeStyleFamily getColumnStyles()
           
 org.w3c.dom.Element getContent()
          Get the content element In the old file format this means the office:body element In the OpenDocument format this means a office:text, office:spreadsheet or office:presentation element.
 StyleWithProperties getDefaultCellStyle()
           
 StyleWithProperties getDefaultDrawingPageStyle()
           
 StyleWithProperties getDefaultFrameStyle()
           
 StyleWithProperties getDefaultParStyle()
           
 StyleWithProperties getDefaultPresentationStyle()
           
 StyleWithProperties getDrawingPageStyle(java.lang.String sName)
           
 OfficeStyleFamily getDrawingPageStyles()
           
 PropertySet getEndnotesConfiguration()
           
 MasterPage getFirstMasterPage()
          Returns the first master page used in the document.
 FontDeclaration getFontDeclaration(java.lang.String sName)
          Get a specific font declaration
 OfficeStyleFamily getFontDeclarations()
          Get the collection of all font declarations.
 PropertySet getFootnotesConfiguration()
           
 FormsReader getForms()
          Get the forms belonging to this document.
 StyleWithProperties getFrameStyle(java.lang.String sName)
           
 OfficeStyleFamily getFrameStyles()
           
 StyleWithProperties getHeadingStyle(int nLevel)
          Returns the paragraph style associated with headings of a specific level.
 ListStyle getListStyle(java.lang.String sName)
           
 OfficeStyleFamily getListStyles()
           
 MasterPage getMasterPage(java.lang.String sName)
           
 OfficeStyleFamily getMasterPages()
           
 ListStyle getOutlineStyle()
           
 PageLayout getPageLayout(java.lang.String sName)
           
 OfficeStyleFamily getPageLayouts()
           
private  org.w3c.dom.Element getParagraph(org.w3c.dom.Element node)
           
 StyleWithProperties getParStyle(java.lang.String sName)
           
 OfficeStyleFamily getParStyles()
           
 StyleWithProperties getPresentationStyle(java.lang.String sName)
           
 OfficeStyleFamily getPresentationStyles()
           
 StyleWithProperties getRowStyle(java.lang.String sName)
           
 OfficeStyleFamily getRowStyles()
           
 StyleWithProperties getSectionStyle(java.lang.String sName)
           
 OfficeStyleFamily getSectionStyles()
           
 java.lang.String getSequenceFromRef(java.lang.String sRefName)
          Get the sequence name associated with a reference name
 java.lang.String getSequenceName(org.w3c.dom.Element par)
          Get the sequence name associated with a paragraph
 TableReader getTableReader(org.w3c.dom.Element node)
          Read a table from a table:table node
 StyleWithProperties getTableStyle(java.lang.String sName)
           
 OfficeStyleFamily getTableStyles()
           
 java.lang.String getTextContent(org.w3c.dom.Node node)
           
 StyleWithProperties getTextStyle(java.lang.String sName)
           
 OfficeStyleFamily getTextStyles()
           
 TocReader getTocReader(org.w3c.dom.Element onode)
          Returns a reader for a specific toc
 boolean hasBookmarkRefTo(java.lang.String sName)
          Is there a reference to this bookmark?
 boolean hasEndnoteRefTo(java.lang.String sId)
          Is there a reference to this endnote?
 boolean hasFootnoteRefTo(java.lang.String sId)
          Is there a reference to this footnote id?
 boolean hasLinkTo(java.lang.String sName)
          Is there a link to this sequence anchor name?
 boolean hasReferenceRefTo(java.lang.String sName)
          Is there a reference to this reference mark?
 boolean hasSequenceRefTo(java.lang.String sId)
          Is there a reference to this sequence field?
static boolean isDrawElement(org.w3c.dom.Node node)
          Checks, if a node is an element in the draw namespace
 boolean isFigureSequenceName(java.lang.String sName)
          Does this sequence name belong to a lof?
 boolean isIndexSourceStyle(java.lang.String sStyleName)
          Is this style used in some toc as an index source style?
 boolean isInPackage(java.lang.String sUrl)
          Checks whether this url is internal to the package
static boolean isNoteElement(org.w3c.dom.Node node)
          Checks, if a node is an element representing a note (footnote/endnote)
 boolean isOpenDocument()
          Is this an OASIS OpenDocument or an OOo 1.0 document?
 boolean isPackageFormat()
          Checks whether or not this document is in package format
 boolean isPresentation()
          Is this a presentation document?
static boolean isSingleParagraph(org.w3c.dom.Node node)
          Checks, if this node contains at most one element, and that this is a paragraph.
 boolean isSpreadsheet()
          Is this a spreadsheet document?
static boolean isTableElement(org.w3c.dom.Node node)
          Checks, if a node is an element in the table namespace
 boolean isTableSequenceName(java.lang.String sName)
          Does this sequence name belong to a lot?
 boolean isText()
          Is this an text document?
static boolean isTextElement(org.w3c.dom.Node node)
          Checks, if a node is an element in the text namespace
static boolean isWhitespace(java.lang.String s)
          Checks, if this text is whitespace
static boolean isWhitespaceContent(org.w3c.dom.Node node)
          Checks, if the only text content of this node is whitespace
private  void loadContentFromDOM(org.w3c.dom.Document contentDOM)
           
private  void loadStylesFromDOM(org.w3c.dom.Document stylesDOM, org.w3c.dom.Document contentDOM, boolean bAllParagraphsAreSoft)
           
private  void loadStylesFromDOM(org.w3c.dom.Node node, boolean bAllParagraphsAreSoft)
           
private  void traverseContent(org.w3c.dom.Element node)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

oooDoc

private OfficeDocument oooDoc

font

private OfficeStyleFamily font

text

private OfficeStyleFamily text

par

private OfficeStyleFamily par

section

private OfficeStyleFamily section

table

private OfficeStyleFamily table

column

private OfficeStyleFamily column

row

private OfficeStyleFamily row

cell

private OfficeStyleFamily cell

frame

private OfficeStyleFamily frame

presentation

private OfficeStyleFamily presentation

drawingPage

private OfficeStyleFamily drawingPage

list

private OfficeStyleFamily list

pageLayout

private OfficeStyleFamily pageLayout

masterPage

private OfficeStyleFamily masterPage

outline

private ListStyle outline

footnotes

private PropertySet footnotes

endnotes

private PropertySet endnotes

heading

private StyleWithProperties[] heading

firstMasterPage

private MasterPage firstMasterPage

indexes

private java.util.Hashtable indexes

indexSourceStyles

private java.util.HashSet indexSourceStyles

figureSequenceNames

private java.util.HashSet figureSequenceNames

tableSequenceNames

private java.util.HashSet tableSequenceNames

sAutoFigureSequenceName

private java.lang.String sAutoFigureSequenceName

sAutoTableSequenceName

private java.lang.String sAutoTableSequenceName

sequenceNames

private java.util.Hashtable sequenceNames

seqrefNames

private java.util.Hashtable seqrefNames

footnoteRef

private java.util.HashSet footnoteRef

endnoteRef

private java.util.HashSet endnoteRef

referenceRef

private java.util.HashSet referenceRef

bookmarkRef

private java.util.HashSet bookmarkRef

sequenceRef

private java.util.HashSet sequenceRef

links

private java.util.HashSet links

forms

private FormsReader forms

content

private org.w3c.dom.Element content

bOpenDocument

private boolean bOpenDocument

bText

private boolean bText

bSpreadsheet

private boolean bSpreadsheet

bPresentation

private boolean bPresentation
Constructor Detail

OfficeReader

public OfficeReader(OfficeDocument oooDoc,
                    boolean bAllParagraphsAreSoft)
Constructor; read a document

Method Detail

isTextElement

public static boolean isTextElement(org.w3c.dom.Node node)
Checks, if a node is an element in the text namespace

Parameters:
node - the node to check
Returns:
true if this is a text element

isTableElement

public static boolean isTableElement(org.w3c.dom.Node node)
Checks, if a node is an element in the table namespace

Parameters:
node - the node to check
Returns:
true if this is a table element

isDrawElement

public static boolean isDrawElement(org.w3c.dom.Node node)
Checks, if a node is an element in the draw namespace

Parameters:
node - the node to check
Returns:
true if this is a draw element

isNoteElement

public static boolean isNoteElement(org.w3c.dom.Node node)
Checks, if a node is an element representing a note (footnote/endnote)

Parameters:
node - the node to check
Returns:
true if this is a note element

isSingleParagraph

public static boolean isSingleParagraph(org.w3c.dom.Node node)
Checks, if this node contains at most one element, and that this is a paragraph.

Parameters:
node - the node to check
Returns:
true if the node contains a single paragraph or nothing

isWhitespaceContent

public static boolean isWhitespaceContent(org.w3c.dom.Node node)

Checks, if the only text content of this node is whitespace

Parameters:
node - the node to check (should be a paragraph node or a child of a paragraph node)
Returns:
true if the node contains whitespace only

isWhitespace

public static boolean isWhitespace(java.lang.String s)

Checks, if this text is whitespace

Parameters:
s - the String to check
Returns:
true if the String contains whitespace only

getCharacterCount

public static int getCharacterCount(org.w3c.dom.Node node)
Counts the number of characters (text nodes) in this element excluding footnotes etc.

Parameters:
node - the node to count in
Returns:
the number of characters

getTextContent

public java.lang.String getTextContent(org.w3c.dom.Node node)

isPackageFormat

public boolean isPackageFormat()
Checks whether or not this document is in package format

Returns:
true if it's in package format

isInPackage

public boolean isInPackage(java.lang.String sUrl)
Checks whether this url is internal to the package

Parameters:
sUrl - the url to check
Returns:
true if the url is internal to the package

getFontDeclarations

public OfficeStyleFamily getFontDeclarations()

Get the collection of all font declarations.

Returns:
the OfficeStyleFamily of font declarations

getFontDeclaration

public FontDeclaration getFontDeclaration(java.lang.String sName)

Get a specific font declaration

Parameters:
sName - the name of the font declaration
Returns:
a FontDeclaration representing the font

getTextStyles

public OfficeStyleFamily getTextStyles()

getTextStyle

public StyleWithProperties getTextStyle(java.lang.String sName)

getParStyles

public OfficeStyleFamily getParStyles()

getParStyle

public StyleWithProperties getParStyle(java.lang.String sName)

getDefaultParStyle

public StyleWithProperties getDefaultParStyle()

getSectionStyles

public OfficeStyleFamily getSectionStyles()

getSectionStyle

public StyleWithProperties getSectionStyle(java.lang.String sName)

getTableStyles

public OfficeStyleFamily getTableStyles()

getTableStyle

public StyleWithProperties getTableStyle(java.lang.String sName)

getColumnStyles

public OfficeStyleFamily getColumnStyles()

getColumnStyle

public StyleWithProperties getColumnStyle(java.lang.String sName)

getRowStyles

public OfficeStyleFamily getRowStyles()

getRowStyle

public StyleWithProperties getRowStyle(java.lang.String sName)

getCellStyles

public OfficeStyleFamily getCellStyles()

getCellStyle

public StyleWithProperties getCellStyle(java.lang.String sName)

getDefaultCellStyle

public StyleWithProperties getDefaultCellStyle()

getFrameStyles

public OfficeStyleFamily getFrameStyles()

getFrameStyle

public StyleWithProperties getFrameStyle(java.lang.String sName)

getDefaultFrameStyle

public StyleWithProperties getDefaultFrameStyle()

getPresentationStyles

public OfficeStyleFamily getPresentationStyles()

getPresentationStyle

public StyleWithProperties getPresentationStyle(java.lang.String sName)

getDefaultPresentationStyle

public StyleWithProperties getDefaultPresentationStyle()

getDrawingPageStyles

public OfficeStyleFamily getDrawingPageStyles()

getDrawingPageStyle

public StyleWithProperties getDrawingPageStyle(java.lang.String sName)

getDefaultDrawingPageStyle

public StyleWithProperties getDefaultDrawingPageStyle()

getListStyles

public OfficeStyleFamily getListStyles()

getListStyle

public ListStyle getListStyle(java.lang.String sName)

getPageLayouts

public OfficeStyleFamily getPageLayouts()

getPageLayout

public PageLayout getPageLayout(java.lang.String sName)

getMasterPages

public OfficeStyleFamily getMasterPages()

getMasterPage

public MasterPage getMasterPage(java.lang.String sName)

getOutlineStyle

public ListStyle getOutlineStyle()

getFootnotesConfiguration

public PropertySet getFootnotesConfiguration()

getEndnotesConfiguration

public PropertySet getEndnotesConfiguration()

getHeadingStyle

public StyleWithProperties getHeadingStyle(int nLevel)

Returns the paragraph style associated with headings of a specific level. Returns null if no such style is known.

In principle, different styles can be used for each heading, in practice the same (soft) style is used for all headings of a specific level.

Parameters:
nLevel - the level of the heading
Returns:
a StyleWithProperties object representing the style

getFirstMasterPage

public MasterPage getFirstMasterPage()

Returns the first master page used in the document. If no master page is used explicitly, the first master page found in the styles is returned. Returns null if no master pages exists.

Returns:
a MasterPage object representing the master page

getTocReader

public TocReader getTocReader(org.w3c.dom.Element onode)

Returns a reader for a specific toc

Parameters:
onode - the text:table-of-content-node
Returns:
the reader, or null

isIndexSourceStyle

public boolean isIndexSourceStyle(java.lang.String sStyleName)

Is this style used in some toc as an index source style?

Parameters:
sStyleName - the name of the style
Returns:
true if this is an index source style

isFigureSequenceName

public boolean isFigureSequenceName(java.lang.String sName)

Does this sequence name belong to a lof?

Parameters:
sName - the name of the sequence
Returns:
true if it belongs to an index

isTableSequenceName

public boolean isTableSequenceName(java.lang.String sName)

Does this sequence name belong to a lot?

Parameters:
sName - the name of the sequence
Returns:
true if it belongs to an index

addTableSequenceName

public void addTableSequenceName(java.lang.String sName)

Add a sequence name for table captions.

OpenDocument has a very weak notion of table captions: A caption is a paragraph containing a text:sequence element. Moreover, the only source to identify which sequence number to use is the list(s) of tables. If there's no list of tables, captions cannot be identified. Thus this method lets the user add a sequence name to identify the table captions.

Parameters:
sName - the name to add

addFigureSequenceName

public void addFigureSequenceName(java.lang.String sName)

Add a sequence name for figure captions.

OpenDocument has a very weak notion of figure captions: A caption is a paragraph containing a text:sequence element. Moreover, the only source to identify which sequence number to use is the list(s) of figures. If there's no list of figures, captions cannot be identified. Thus this method lets the user add a sequence name to identify the figure captions.

Parameters:
sName - the name to add

getSequenceName

public java.lang.String getSequenceName(org.w3c.dom.Element par)

Get the sequence name associated with a paragraph

Parameters:
par - the paragraph to look up
Returns:
the sequence name or null

getSequenceFromRef

public java.lang.String getSequenceFromRef(java.lang.String sRefName)

Get the sequence name associated with a reference name

Parameters:
sRefName - the reference name to use
Returns:
the sequence name or null

hasFootnoteRefTo

public boolean hasFootnoteRefTo(java.lang.String sId)

Is there a reference to this footnote id?

Parameters:
sId - the id of the footnote
Returns:
true if there is a reference

hasEndnoteRefTo

public boolean hasEndnoteRefTo(java.lang.String sId)

Is there a reference to this endnote?

Parameters:
sId - the id of the endnote
Returns:
true if there is a reference

hasReferenceRefTo

public boolean hasReferenceRefTo(java.lang.String sName)

Is there a reference to this reference mark?

Parameters:
sName - the name of the
Returns:
true if there is a reference

hasBookmarkRefTo

public boolean hasBookmarkRefTo(java.lang.String sName)

Is there a reference to this bookmark?

Parameters:
sName - the name of the bookmark
Returns:
true if there is a reference

hasSequenceRefTo

public boolean hasSequenceRefTo(java.lang.String sId)

Is there a reference to this sequence field?

Parameters:
sId - the id of the sequence field
Returns:
true if there is a reference

hasLinkTo

public boolean hasLinkTo(java.lang.String sName)

Is there a link to this sequence anchor name?

Parameters:
sName - the name of the anchor
Returns:
true if there is a link

isOpenDocument

public boolean isOpenDocument()

Is this an OASIS OpenDocument or an OOo 1.0 document?

Returns:
true if it's an OASIS OpenDocument

isText

public boolean isText()

Is this an text document?

Returns:
true if it's a text document

isSpreadsheet

public boolean isSpreadsheet()

Is this a spreadsheet document?

Returns:
true if it's a spreadsheet document

isPresentation

public boolean isPresentation()

Is this a presentation document?

Returns:
true if it's a presentation document

getContent

public org.w3c.dom.Element getContent()

Get the content element

In the old file format this means the office:body element

In the OpenDocument format this means a office:text, office:spreadsheet or office:presentation element.

Returns:
the content Element

getForms

public FormsReader getForms()

Get the forms belonging to this document.

Returns:
a FormsReader representing the forms

getTableReader

public TableReader getTableReader(org.w3c.dom.Element node)

Read a table from a table:table node

Parameters:
node - the table:table Element node
Returns:
a TableReader object representing the table

loadStylesFromDOM

private void loadStylesFromDOM(org.w3c.dom.Node node,
                               boolean bAllParagraphsAreSoft)

loadStylesFromDOM

private void loadStylesFromDOM(org.w3c.dom.Document stylesDOM,
                               org.w3c.dom.Document contentDOM,
                               boolean bAllParagraphsAreSoft)

loadContentFromDOM

private void loadContentFromDOM(org.w3c.dom.Document contentDOM)

getParagraph

private org.w3c.dom.Element getParagraph(org.w3c.dom.Element node)

traverseContent

private void traverseContent(org.w3c.dom.Element node)

collectRefName

private void collectRefName(java.util.HashSet ref,
                            org.w3c.dom.Element node)