537 lines
18 KiB
Java
537 lines
18 KiB
Java
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright ownership.
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package org.apache.html.dom;
|
|
|
|
import java.io.Serializable;
|
|
|
|
import org.w3c.dom.Element;
|
|
import org.w3c.dom.Node;
|
|
import org.w3c.dom.html.HTMLAnchorElement;
|
|
import org.w3c.dom.html.HTMLAppletElement;
|
|
import org.w3c.dom.html.HTMLAreaElement;
|
|
import org.w3c.dom.html.HTMLCollection;
|
|
import org.w3c.dom.html.HTMLElement;
|
|
import org.w3c.dom.html.HTMLFormElement;
|
|
import org.w3c.dom.html.HTMLImageElement;
|
|
import org.w3c.dom.html.HTMLObjectElement;
|
|
import org.w3c.dom.html.HTMLOptionElement;
|
|
import org.w3c.dom.html.HTMLTableCellElement;
|
|
import org.w3c.dom.html.HTMLTableRowElement;
|
|
import org.w3c.dom.html.HTMLTableSectionElement;
|
|
|
|
/**
|
|
* Implements {@link org.w3c.dom.html.HTMLCollection} to traverse any named
|
|
* elements on a {@link org.w3c.dom.html.HTMLDocument}. The elements type to
|
|
* look for is identified in the constructor by code. This collection is not
|
|
* optimized for traversing large trees.
|
|
* <p>
|
|
* The collection has to meet two requirements: it has to be live, and it has
|
|
* to traverse depth first and always return results in that order. As such,
|
|
* using an object container (such as {@link java.util.Vector}) is expensive on
|
|
* insert/remove operations. Instead, the collection has been implemented using
|
|
* three traversing functions. As a result, operations on large documents will
|
|
* result in traversal of the entire document tree and consume a considerable
|
|
* amount of time.
|
|
* <p>
|
|
* Note that synchronization on the traversed document cannot be achieved.
|
|
* The document itself cannot be locked, and locking each traversed node is
|
|
* likely to lead to a dead lock condition. Therefore, there is a chance of the
|
|
* document being changed as results are fetched; in all likelihood, the results
|
|
* might be out dated, but not erroneous.
|
|
*
|
|
* @xerces.internal
|
|
*
|
|
* @version $Revision$ $Date$
|
|
* @author <a href="mailto:arkin@exoffice.com">Assaf Arkin</a>
|
|
* @see org.w3c.dom.html.HTMLCollection
|
|
*/
|
|
class HTMLCollectionImpl
|
|
implements HTMLCollection, Serializable
|
|
{
|
|
|
|
private static final long serialVersionUID = 9112122196669185082L;
|
|
|
|
/**
|
|
* Request collection of all anchors in document: <A> elements that
|
|
* have a <code>name</code> attribute.
|
|
*/
|
|
static final short ANCHOR = 1;
|
|
|
|
|
|
/**
|
|
* Request collection of all forms in document: <FORM> elements.
|
|
*/
|
|
static final short FORM = 2;
|
|
|
|
|
|
/**
|
|
* Request collection of all images in document: <IMG> elements.
|
|
*/
|
|
static final short IMAGE = 3;
|
|
|
|
|
|
/**
|
|
* Request collection of all Applets in document: <APPLET> and
|
|
* <OBJECT> elements (<OBJECT> must contain an Applet).
|
|
*/
|
|
static final short APPLET = 4;
|
|
|
|
|
|
/**
|
|
* Request collection of all links in document: <A> and <AREA>
|
|
* elements (must have a <code>href</code> attribute).
|
|
*/
|
|
static final short LINK = 5;
|
|
|
|
|
|
/**
|
|
* Request collection of all options in selection: <OPTION> elements in
|
|
* <SELECT> or <OPTGROUP>.
|
|
*/
|
|
static final short OPTION = 6;
|
|
|
|
|
|
/**
|
|
* Request collection of all rows in table: <TR> elements in table or
|
|
* table section.
|
|
*/
|
|
static final short ROW = 7;
|
|
|
|
|
|
/**
|
|
* Request collection of all form elements: <INPUT>, <BUTTON>,
|
|
* <SELECT>, and <TEXTAREA> elements inside form <FORM>.
|
|
*/
|
|
static final short ELEMENT = 8;
|
|
|
|
|
|
/**
|
|
* Request collection of all areas in map: <AREA> element in <MAP>
|
|
* (non recursive).
|
|
*/
|
|
static final short AREA = -1;
|
|
|
|
|
|
/**
|
|
* Request collection of all table bodies in table: <TBODY> element in
|
|
* table <TABLE> (non recursive).
|
|
*/
|
|
static final short TBODY = -2;
|
|
|
|
|
|
/**
|
|
* Request collection of all cells in row: <TD> and <TH>
|
|
* elements in <TR> (non recursive).
|
|
*/
|
|
static final short CELL = -3;
|
|
|
|
|
|
/**
|
|
* Indicates what this collection is looking for. Holds one of the enumerated
|
|
* values and used by {@link #collectionMatch}. Set by the constructor and
|
|
* determine the collection's use for its life time.
|
|
*/
|
|
private short _lookingFor;
|
|
|
|
|
|
/**
|
|
* This is the top level element underneath which the collection exists.
|
|
*/
|
|
private Element _topLevel;
|
|
|
|
|
|
/**
|
|
* Construct a new collection that retrieves element of the specific type
|
|
* (<code>lookingFor</code>) from the specific document portion
|
|
* (<code>topLevel</code>).
|
|
*
|
|
* @param topLevel The element underneath which the collection exists
|
|
* @param lookingFor Code indicating what elements to look for
|
|
*/
|
|
HTMLCollectionImpl( HTMLElement topLevel, short lookingFor )
|
|
{
|
|
if ( topLevel == null )
|
|
throw new NullPointerException( "HTM011 Argument 'topLevel' is null." );
|
|
_topLevel = topLevel;
|
|
_lookingFor = lookingFor;
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns the length of the collection. This method might traverse the
|
|
* entire document tree.
|
|
*
|
|
* @return Length of the collection
|
|
*/
|
|
public final int getLength()
|
|
{
|
|
// Call recursive function on top-level element.
|
|
return getLength( _topLevel );
|
|
}
|
|
|
|
|
|
/**
|
|
* Retrieves the indexed node from the collection. Nodes are numbered in
|
|
* tree order - depth-first traversal order. This method might traverse
|
|
* the entire document tree.
|
|
*
|
|
* @param index The index of the node to return
|
|
* @return The specified node or null if no such node found
|
|
*/
|
|
public final Node item( int index )
|
|
{
|
|
if ( index < 0 )
|
|
throw new IllegalArgumentException( "HTM012 Argument 'index' is negative." );
|
|
// Call recursive function on top-level element.
|
|
return item( _topLevel, new CollectionIndex( index ) );
|
|
}
|
|
|
|
|
|
/**
|
|
* Retrieves the named node from the collection. The name is matched case
|
|
* sensitive against the <TT>id</TT> attribute of each element in the
|
|
* collection, returning the first match. The tree is traversed in
|
|
* depth-first order. This method might traverse the entire document tree.
|
|
*
|
|
* @param name The name of the node to return
|
|
* @return The specified node or null if no such node found
|
|
*/
|
|
public final Node namedItem( String name )
|
|
{
|
|
if ( name == null )
|
|
throw new NullPointerException( "HTM013 Argument 'name' is null." );
|
|
// Call recursive function on top-level element.
|
|
return namedItem( _topLevel, name );
|
|
}
|
|
|
|
|
|
/**
|
|
* Recursive function returns the number of elements of a particular type
|
|
* that exist under the top level element. This is a recursive function
|
|
* and the top level element is passed along.
|
|
*
|
|
* @param topLevel Top level element from which to scan
|
|
* @return Number of elements
|
|
*/
|
|
private int getLength( Element topLevel )
|
|
{
|
|
int length;
|
|
Node node;
|
|
|
|
synchronized ( topLevel )
|
|
{
|
|
// Always count from zero and traverse all the childs of the
|
|
// current element in the order they appear.
|
|
length = 0;
|
|
node = topLevel.getFirstChild();
|
|
while ( node != null )
|
|
{
|
|
// If a particular node is an element (could be HTML or XML),
|
|
// do two things: if it's the one we're looking for, count
|
|
// another matched element; at any rate, traverse it's
|
|
// children as well.
|
|
if ( node instanceof Element )
|
|
{
|
|
if ( collectionMatch( (Element) node, null ) )
|
|
++ length;
|
|
else if ( recurse() )
|
|
length += getLength( (Element) node );
|
|
}
|
|
node = node.getNextSibling();
|
|
}
|
|
}
|
|
return length;
|
|
}
|
|
|
|
|
|
/**
|
|
* Recursive function returns the numbered element of a particular type
|
|
* that exist under the top level element. This is a recursive function
|
|
* and the top level element is passed along.
|
|
* <p>
|
|
* Note that this function must call itself with an index and get back both
|
|
* the element (if one was found) and the new index which is decremeneted
|
|
* for any like element found. Since integers are only passed by value,
|
|
* this function makes use of a separate class ({@link CollectionIndex})
|
|
* to hold that index.
|
|
*
|
|
* @param topLevel Top level element from which to scan
|
|
* @param index The index of the item to retreive
|
|
* @return Number of elements
|
|
* @see CollectionIndex
|
|
*/
|
|
private Node item( Element topLevel, CollectionIndex index )
|
|
{
|
|
Node node;
|
|
Node result;
|
|
|
|
synchronized ( topLevel )
|
|
{
|
|
// Traverse all the childs of the current element in the order
|
|
// they appear. Count from the index backwards until you reach
|
|
// matching element with an index of zero. Return that element.
|
|
node = topLevel.getFirstChild();
|
|
while ( node != null )
|
|
{
|
|
// If a particular node is an element (could be HTML or XML),
|
|
// do two things: if it's the one we're looking for, decrease
|
|
// the index and if zero, return this node; at any rate,
|
|
// traverse it's children as well.
|
|
if ( node instanceof Element )
|
|
{
|
|
if ( collectionMatch( (Element) node, null ) )
|
|
{
|
|
if ( index.isZero() )
|
|
return node;
|
|
index.decrement();
|
|
} else if ( recurse() )
|
|
{
|
|
result = item( (Element) node, index );
|
|
if ( result != null )
|
|
return result;
|
|
}
|
|
}
|
|
node = node.getNextSibling();
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
|
|
/**
|
|
* Recursive function returns an element of a particular type with the
|
|
* specified name (<TT>id</TT> attribute).
|
|
*
|
|
* @param topLevel Top level element from which to scan
|
|
* @param name The named element to look for
|
|
* @return The first named element found
|
|
*/
|
|
private Node namedItem( Element topLevel, String name )
|
|
{
|
|
Node node;
|
|
Node result;
|
|
|
|
synchronized ( topLevel )
|
|
{
|
|
// Traverse all the childs of the current element in the order
|
|
// they appear.
|
|
node = topLevel.getFirstChild();
|
|
while ( node != null )
|
|
{
|
|
// If a particular node is an element (could be HTML or XML),
|
|
// do two things: if it's the one we're looking for, and the
|
|
// name (id attribute) attribute is the one we're looking for,
|
|
// return this element; otherwise, traverse it's children.
|
|
if ( node instanceof Element )
|
|
{
|
|
if ( collectionMatch( (Element) node, name ) )
|
|
return node;
|
|
else if ( recurse() )
|
|
{
|
|
result = namedItem( (Element) node, name );
|
|
if ( result != null )
|
|
return result;
|
|
}
|
|
}
|
|
node = node.getNextSibling();
|
|
}
|
|
return node;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns true if scanning methods should iterate through the collection.
|
|
* When looking for elements in the document, recursing is needed to traverse
|
|
* the full document tree. When looking inside a specific element (e.g. for a
|
|
* cell inside a row), recursing can lead to erroneous results.
|
|
*
|
|
* @return True if methods should recurse to traverse entire tree
|
|
*/
|
|
protected boolean recurse()
|
|
{
|
|
return _lookingFor > 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Determines if current element matches based on what we're looking for.
|
|
* The element is passed along with an optional identifier name. If the
|
|
* element is the one we're looking for, return true. If the name is also
|
|
* specified, the name must match the <code>id</code> attribute
|
|
* (match <code>name</code> first for anchors).
|
|
*
|
|
* @param elem The current element
|
|
* @param name The identifier name or null
|
|
* @return The element matches what we're looking for
|
|
*/
|
|
protected boolean collectionMatch( Element elem, String name )
|
|
{
|
|
boolean match;
|
|
|
|
synchronized ( elem )
|
|
{
|
|
// Begin with no matching. Depending on what we're looking for,
|
|
// attempt to match based on the element type. This is the quickest
|
|
// way to match involving only a cast. Do the expensive string
|
|
// comparison later on.
|
|
match = false;
|
|
switch ( _lookingFor )
|
|
{
|
|
case ANCHOR:
|
|
// Anchor is an <A> element with a 'name' attribute. Otherwise, it's
|
|
// just a link.
|
|
match = ( elem instanceof HTMLAnchorElement ) &&
|
|
elem.getAttribute( "name" ).length() > 0;
|
|
break;
|
|
case FORM:
|
|
// Any <FORM> element.
|
|
match = ( elem instanceof HTMLFormElement );
|
|
break;
|
|
case IMAGE:
|
|
// Any <IMG> element. <OBJECT> elements with images are not returned.
|
|
match = ( elem instanceof HTMLImageElement );
|
|
break;
|
|
case APPLET:
|
|
// Any <APPLET> element, and any <OBJECT> element which represents an
|
|
// Applet. This is determined by 'codetype' attribute being
|
|
// 'application/java' or 'classid' attribute starting with 'java:'.
|
|
match = ( elem instanceof HTMLAppletElement ) ||
|
|
( elem instanceof HTMLObjectElement &&
|
|
( "application/java".equals( elem.getAttribute( "codetype" ) ) ||
|
|
elem.getAttribute( "classid" ).startsWith( "java:" ) ) );
|
|
break;
|
|
case ELEMENT:
|
|
// All form elements implement HTMLFormControl for easy identification.
|
|
match = ( elem instanceof HTMLFormControl );
|
|
break;
|
|
case LINK:
|
|
// Any <A> element, and any <AREA> elements with an 'href' attribute.
|
|
match = ( ( elem instanceof HTMLAnchorElement ||
|
|
elem instanceof HTMLAreaElement ) &&
|
|
elem.getAttribute( "href" ).length() > 0 );
|
|
break;
|
|
case AREA:
|
|
// Any <AREA> element.
|
|
match = ( elem instanceof HTMLAreaElement );
|
|
break;
|
|
case OPTION:
|
|
// Any <OPTION> element.
|
|
match = ( elem instanceof HTMLOptionElement );
|
|
break;
|
|
case ROW:
|
|
// Any <TR> element.
|
|
match = ( elem instanceof HTMLTableRowElement );
|
|
break;
|
|
case TBODY:
|
|
// Any <TBODY> element (one of three table section types).
|
|
match = ( elem instanceof HTMLTableSectionElement &&
|
|
elem.getTagName().equals( "TBODY" ) );
|
|
break;
|
|
case CELL:
|
|
// Any <TD> or <TH> element.
|
|
match = ( elem instanceof HTMLTableCellElement );
|
|
break;
|
|
}
|
|
|
|
// If element type was matched and a name was specified, must also match
|
|
// the name against either the 'id' or the 'name' attribute. The 'name'
|
|
// attribute is relevant only for <A> elements for backward compatibility.
|
|
if ( match && name != null )
|
|
{
|
|
// If an anchor and 'name' attribute matches, return true. Otherwise,
|
|
// try 'id' attribute.
|
|
if ( elem instanceof HTMLAnchorElement &&
|
|
name.equals( elem.getAttribute( "name" ) ) )
|
|
return true;
|
|
match = name.equals( elem.getAttribute( "id" ) );
|
|
}
|
|
}
|
|
return match;
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
/**
|
|
* {@link CollectionImpl#item} must traverse down the tree and decrement the
|
|
* index until it matches an element who's index is zero. Since integers are
|
|
* passed by value, this class servers to pass the index into each recursion
|
|
* by reference. It encompasses all the operations that need be performed on
|
|
* the index, although direct access is possible.
|
|
*
|
|
* @xerces.internal
|
|
*
|
|
* @see CollectionImpl#item
|
|
*/
|
|
class CollectionIndex
|
|
{
|
|
|
|
|
|
/**
|
|
* Returns the current index.
|
|
*
|
|
* @return Current index
|
|
*/
|
|
int getIndex()
|
|
{
|
|
return _index;
|
|
}
|
|
|
|
|
|
/**
|
|
* Decrements the index by one.
|
|
*/
|
|
void decrement()
|
|
{
|
|
-- _index;
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns true if index is zero (or negative).
|
|
*
|
|
* @return True if index is zero
|
|
*/
|
|
boolean isZero()
|
|
{
|
|
return _index <= 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Constructs a new index with the specified initial value. The index will
|
|
* then be decremeneted until it reaches zero.
|
|
*
|
|
* @param index The initial value
|
|
*/
|
|
CollectionIndex( int index )
|
|
{
|
|
_index = index;
|
|
}
|
|
|
|
|
|
/**
|
|
* Holds the actual value that is passed by reference using this class.
|
|
*/
|
|
private int _index;
|
|
|
|
|
|
}
|