package edu.vt.marian.Document; import java.io.*; import java.net.*; import java.util.*; import edu.vt.marian.common.*; /** Utility routines for dealing with XML documents. @author Robert France */ public class XmlDoc { public static Vector acceptTag(BufferedReader in) throws IOException { int c; c = in.read(); // System.err.println("Got '" + (char) c + "' (" + c + ")."); while ( Character.isWhitespace((char) c) ) // Eat any white space before next c = in.read(); // attribute or close character. if ( c == -1 ) throw(new EOFException()); else if ( c != '<' ) return( null ); return( acceptPreppedTag(in) ); } public static Vector acceptPreppedTag(BufferedReader in) throws IOException { Vector bindings = new Vector(); char [] charBuf = new char [256]; int c; int i = 0; // Get tag name; add as first binding. c = in.read(); if ( c == '/' ) { charBuf[i++] = (char) c; c = in.read(); } while ( Character.isLetterOrDigit((char) c) || (c == '_') ) { charBuf[i++] = (char) c; c = in.read(); } if (i == 0) // No tag name. return( null ); String tag = new String(charBuf, 0, i); // System.err.println("Tag is '" + tag + "'."); bindings.addElement(tag); boolean seenSingleQuote; boolean seenDoubleQuote; while ( true ) { i = 0; while ( ! Character.isLetterOrDigit((char) c) && (c != '_') ) { if ( c == -1 ) throw(new EOFException()); else if ( c == '>') return( bindings ); c = in.read(); } charBuf[i++] = (char) c; c = in.read(); while ( Character.isLetterOrDigit((char) c) || (c == '_') ) { charBuf[i++] = (char) c; c = in.read(); } bindings.addElement(new String(charBuf, 0, i)); // System.err.println("Attr is '" + new String(charBuf, 0, i) + "'."); /* c = in.read(); // System.err.println("Looking at '" + (char) c + "' (" + c + ")."); while ( ! Character.isLetterOrDigit((char) c) && (c != '_') ) { if ( c == -1 ) throw(new EOFException()); c = in.read(); // System.err.println("Looking at '" + (char) c + "' (" + c + ")."); } */ if ( c != '=' ) { return( null ); } seenSingleQuote = false; seenDoubleQuote = false; i = 0; c = in.read(); while ( ! Character.isLetterOrDigit((char) c) && (c != '_') ) { // System.err.println("(Attr) Looking at '" + (char) c + "' (" + c + ")."); if ( c == -1 ) throw(new EOFException()); else if ( c == '\'' ) { seenSingleQuote = true; // System.err.println("Setting seenSingleQuote."); break; } else if ( c == '\"' ) { seenDoubleQuote = true; // System.err.println("Setting seenDoubleQuote."); break; } c = in.read(); } if (seenSingleQuote) { while ( (c = in.read()) != '\'' ) { // System.err.println("(SSQ) Looking at '" + (char) c + "' (" + c + ")."); charBuf[i++] = (char) c; } // System.err.println("(SSQ: out) Looking at '" + (char) c + "' (" + c + ")."); c = in.read(); } else if (seenDoubleQuote) { while ( (c = in.read()) != '\"' ) { // System.err.println("(SDQ) Looking at '" + (char) c + "' (" + c + ")."); charBuf[i++] = (char) c; } // System.err.println("(SDQ: out) Looking at '" + (char) c + "' (" + c + ")."); c = in.read(); } else { while ( Character.isLetterOrDigit((char) c) || (c == '_') ) { charBuf[i++] = (char) c; c = in.read(); } } bindings.addElement(new String(charBuf, 0, i)); // System.err.println("Value is '" + new String(charBuf, 0, i) + "'."); while ( Character.isWhitespace((char) c) ) // Eat any white space before next c = in.read(); // attribute or close character. } } }