/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.html.dom; import java.io.Serializable; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.html.HTMLAnchorElement; import org.w3c.dom.html.HTMLAppletElement; import org.w3c.dom.html.HTMLAreaElement; import org.w3c.dom.html.HTMLCollection; import org.w3c.dom.html.HTMLElement; import org.w3c.dom.html.HTMLFormElement; import org.w3c.dom.html.HTMLImageElement; import org.w3c.dom.html.HTMLObjectElement; import org.w3c.dom.html.HTMLOptionElement; import org.w3c.dom.html.HTMLTableCellElement; import org.w3c.dom.html.HTMLTableRowElement; import org.w3c.dom.html.HTMLTableSectionElement; /** * Implements {@link org.w3c.dom.html.HTMLCollection} to traverse any named * elements on a {@link org.w3c.dom.html.HTMLDocument}. The elements type to * look for is identified in the constructor by code. This collection is not * optimized for traversing large trees. *

* The collection has to meet two requirements: it has to be live, and it has * to traverse depth first and always return results in that order. As such, * using an object container (such as {@link java.util.Vector}) is expensive on * insert/remove operations. Instead, the collection has been implemented using * three traversing functions. As a result, operations on large documents will * result in traversal of the entire document tree and consume a considerable * amount of time. *

* Note that synchronization on the traversed document cannot be achieved. * The document itself cannot be locked, and locking each traversed node is * likely to lead to a dead lock condition. Therefore, there is a chance of the * document being changed as results are fetched; in all likelihood, the results * might be out dated, but not erroneous. * * @xerces.internal * * @version $Revision$ $Date$ * @author Assaf Arkin * @see org.w3c.dom.html.HTMLCollection */ class HTMLCollectionImpl implements HTMLCollection, Serializable { private static final long serialVersionUID = 9112122196669185082L; /** * Request collection of all anchors in document: <A> elements that * have a name attribute. */ static final short ANCHOR = 1; /** * Request collection of all forms in document: <FORM> elements. */ static final short FORM = 2; /** * Request collection of all images in document: <IMG> elements. */ static final short IMAGE = 3; /** * Request collection of all Applets in document: <APPLET> and * <OBJECT> elements (<OBJECT> must contain an Applet). */ static final short APPLET = 4; /** * Request collection of all links in document: <A> and <AREA> * elements (must have a href attribute). */ static final short LINK = 5; /** * Request collection of all options in selection: <OPTION> elements in * <SELECT> or <OPTGROUP>. */ static final short OPTION = 6; /** * Request collection of all rows in table: <TR> elements in table or * table section. */ static final short ROW = 7; /** * Request collection of all form elements: <INPUT>, <BUTTON>, * <SELECT>, and <TEXTAREA> elements inside form <FORM>. */ static final short ELEMENT = 8; /** * Request collection of all areas in map: <AREA> element in <MAP> * (non recursive). */ static final short AREA = -1; /** * Request collection of all table bodies in table: <TBODY> element in * table <TABLE> (non recursive). */ static final short TBODY = -2; /** * Request collection of all cells in row: <TD> and <TH> * elements in <TR> (non recursive). */ static final short CELL = -3; /** * Indicates what this collection is looking for. Holds one of the enumerated * values and used by {@link #collectionMatch}. Set by the constructor and * determine the collection's use for its life time. */ private short _lookingFor; /** * This is the top level element underneath which the collection exists. */ private Element _topLevel; /** * Construct a new collection that retrieves element of the specific type * (lookingFor) from the specific document portion * (topLevel). * * @param topLevel The element underneath which the collection exists * @param lookingFor Code indicating what elements to look for */ HTMLCollectionImpl( HTMLElement topLevel, short lookingFor ) { if ( topLevel == null ) throw new NullPointerException( "HTM011 Argument 'topLevel' is null." ); _topLevel = topLevel; _lookingFor = lookingFor; } /** * Returns the length of the collection. This method might traverse the * entire document tree. * * @return Length of the collection */ public final int getLength() { // Call recursive function on top-level element. return getLength( _topLevel ); } /** * Retrieves the indexed node from the collection. Nodes are numbered in * tree order - depth-first traversal order. This method might traverse * the entire document tree. * * @param index The index of the node to return * @return The specified node or null if no such node found */ public final Node item( int index ) { if ( index < 0 ) throw new IllegalArgumentException( "HTM012 Argument 'index' is negative." ); // Call recursive function on top-level element. return item( _topLevel, new CollectionIndex( index ) ); } /** * Retrieves the named node from the collection. The name is matched case * sensitive against the id attribute of each element in the * collection, returning the first match. The tree is traversed in * depth-first order. This method might traverse the entire document tree. * * @param name The name of the node to return * @return The specified node or null if no such node found */ public final Node namedItem( String name ) { if ( name == null ) throw new NullPointerException( "HTM013 Argument 'name' is null." ); // Call recursive function on top-level element. return namedItem( _topLevel, name ); } /** * Recursive function returns the number of elements of a particular type * that exist under the top level element. This is a recursive function * and the top level element is passed along. * * @param topLevel Top level element from which to scan * @return Number of elements */ private int getLength( Element topLevel ) { int length; Node node; synchronized ( topLevel ) { // Always count from zero and traverse all the childs of the // current element in the order they appear. length = 0; node = topLevel.getFirstChild(); while ( node != null ) { // If a particular node is an element (could be HTML or XML), // do two things: if it's the one we're looking for, count // another matched element; at any rate, traverse it's // children as well. if ( node instanceof Element ) { if ( collectionMatch( (Element) node, null ) ) ++ length; else if ( recurse() ) length += getLength( (Element) node ); } node = node.getNextSibling(); } } return length; } /** * Recursive function returns the numbered element of a particular type * that exist under the top level element. This is a recursive function * and the top level element is passed along. *

* Note that this function must call itself with an index and get back both * the element (if one was found) and the new index which is decremeneted * for any like element found. Since integers are only passed by value, * this function makes use of a separate class ({@link CollectionIndex}) * to hold that index. * * @param topLevel Top level element from which to scan * @param index The index of the item to retreive * @return Number of elements * @see CollectionIndex */ private Node item( Element topLevel, CollectionIndex index ) { Node node; Node result; synchronized ( topLevel ) { // Traverse all the childs of the current element in the order // they appear. Count from the index backwards until you reach // matching element with an index of zero. Return that element. node = topLevel.getFirstChild(); while ( node != null ) { // If a particular node is an element (could be HTML or XML), // do two things: if it's the one we're looking for, decrease // the index and if zero, return this node; at any rate, // traverse it's children as well. if ( node instanceof Element ) { if ( collectionMatch( (Element) node, null ) ) { if ( index.isZero() ) return node; index.decrement(); } else if ( recurse() ) { result = item( (Element) node, index ); if ( result != null ) return result; } } node = node.getNextSibling(); } } return null; } /** * Recursive function returns an element of a particular type with the * specified name (id attribute). * * @param topLevel Top level element from which to scan * @param name The named element to look for * @return The first named element found */ private Node namedItem( Element topLevel, String name ) { Node node; Node result; synchronized ( topLevel ) { // Traverse all the childs of the current element in the order // they appear. node = topLevel.getFirstChild(); while ( node != null ) { // If a particular node is an element (could be HTML or XML), // do two things: if it's the one we're looking for, and the // name (id attribute) attribute is the one we're looking for, // return this element; otherwise, traverse it's children. if ( node instanceof Element ) { if ( collectionMatch( (Element) node, name ) ) return node; else if ( recurse() ) { result = namedItem( (Element) node, name ); if ( result != null ) return result; } } node = node.getNextSibling(); } return node; } } /** * Returns true if scanning methods should iterate through the collection. * When looking for elements in the document, recursing is needed to traverse * the full document tree. When looking inside a specific element (e.g. for a * cell inside a row), recursing can lead to erroneous results. * * @return True if methods should recurse to traverse entire tree */ protected boolean recurse() { return _lookingFor > 0; } /** * Determines if current element matches based on what we're looking for. * The element is passed along with an optional identifier name. If the * element is the one we're looking for, return true. If the name is also * specified, the name must match the id attribute * (match name first for anchors). * * @param elem The current element * @param name The identifier name or null * @return The element matches what we're looking for */ protected boolean collectionMatch( Element elem, String name ) { boolean match; synchronized ( elem ) { // Begin with no matching. Depending on what we're looking for, // attempt to match based on the element type. This is the quickest // way to match involving only a cast. Do the expensive string // comparison later on. match = false; switch ( _lookingFor ) { case ANCHOR: // Anchor is an element with a 'name' attribute. Otherwise, it's // just a link. match = ( elem instanceof HTMLAnchorElement ) && elem.getAttribute( "name" ).length() > 0; break; case FORM: // Any

element. match = ( elem instanceof HTMLFormElement ); break; case IMAGE: // Any element. elements with images are not returned. match = ( elem instanceof HTMLImageElement ); break; case APPLET: // Any element, and any element which represents an // Applet. This is determined by 'codetype' attribute being // 'application/java' or 'classid' attribute starting with 'java:'. match = ( elem instanceof HTMLAppletElement ) || ( elem instanceof HTMLObjectElement && ( "application/java".equals( elem.getAttribute( "codetype" ) ) || elem.getAttribute( "classid" ).startsWith( "java:" ) ) ); break; case ELEMENT: // All form elements implement HTMLFormControl for easy identification. match = ( elem instanceof HTMLFormControl ); break; case LINK: // Any element, and any elements with an 'href' attribute. match = ( ( elem instanceof HTMLAnchorElement || elem instanceof HTMLAreaElement ) && elem.getAttribute( "href" ).length() > 0 ); break; case AREA: // Any element. match = ( elem instanceof HTMLAreaElement ); break; case OPTION: // Any elements for backward compatibility. if ( match && name != null ) { // If an anchor and 'name' attribute matches, return true. Otherwise, // try 'id' attribute. if ( elem instanceof HTMLAnchorElement && name.equals( elem.getAttribute( "name" ) ) ) return true; match = name.equals( elem.getAttribute( "id" ) ); } } return match; } } /** * {@link CollectionImpl#item} must traverse down the tree and decrement the * index until it matches an element who's index is zero. Since integers are * passed by value, this class servers to pass the index into each recursion * by reference. It encompasses all the operations that need be performed on * the index, although direct access is possible. * * @xerces.internal * * @see CollectionImpl#item */ class CollectionIndex { /** * Returns the current index. * * @return Current index */ int getIndex() { return _index; } /** * Decrements the index by one. */ void decrement() { -- _index; } /** * Returns true if index is zero (or negative). * * @return True if index is zero */ boolean isZero() { return _index <= 0; } /** * Constructs a new index with the specified initial value. The index will * then be decremeneted until it reaches zero. * * @param index The initial value */ CollectionIndex( int index ) { _index = index; } /** * Holds the actual value that is passed by reference using this class. */ private int _index; }