package edu.vt.marian.Document; import java.io.*; import java.net.*; import java.util.*; import edu.vt.marian.common.*; import edu.vt.marian.Document.SOIFException; import gnu.regexp.*; /** * A set of data encapsulated in a SOIF */ public class RFC1807Document implements Document { /** this string contains all the information of this document the format of the string is in SGML */ private String rfc1807String = null; /** * The URL in the SOIF */ private URL URL; /** * Property list for the SOIF */ private Hashtable attributeValues; /** * Allows us to pull things out of the property list in the order they * were inserted. */ private Vector keyList; /** * Common delimitter that separates SOIF attributes and values */ static final String soifDelimitter = ":: "; Debug debug; /** Build a new SOIF object */ public RFC1807Document(Debug debug) { this.debug = debug; attributeValues = new Hashtable(); keyList = new Vector(); } /** * Build a new SOIF object from an input stream * @param soifInput input stream from which the SOIF should be built. */ public RFC1807Document(String rfc1807Input, Debug debug) throws SOIFException { this(debug); rfc1807String = rfc1807Input; //*** ParseRFC1807Buffer(rfc1807String); } /** * Return a string array of the attribute names in this SOIF * @return An array of String that are the attribute (property) * names for the SOIF instance */ public String[] GetAttributeNames() { Enumeration elements = attributeValues.keys(); String attributeNames[] = new String[attributeValues.size()]; int i = 0; while (elements.hasMoreElements()) { attributeNames[i++] = (String) elements.nextElement(); } return(attributeNames); } /** * Return a count of the number of attributes in this SOIF */ public int GetNumAttributes() { return(attributeValues.size()); } /** * Return the value of a specified attribute in this SOIF * @param attribute name of the attribute to get the value of. */ public String GetAttributeValue(String attribute) { if (!attributeValues.containsKey(attribute)) { return null; } return((String) attributeValues.get(attribute)); } /** * Set an attribute/value pair for this SOIF * @param attribute name of the attribute to set. * @param value value to set the attribute to. */ public void SetAttributeValue(String attribute, String value) { attributeValues.put(attribute, value); keyList.addElement(attribute); } /** * Return a StringBuffer which is a marshalled version of this SOIF * @param close A boolean value determining whether the soif should be closed * (I.E. whether the closing brace should be appended to the soif). */ public StringBuffer SOIFToBuffer(boolean close) { StringBuffer output = new StringBuffer(); Enumeration keys = attributeValues.keys(); String attribute, value; //*** output.append("@" + GetTemplateType() + "{"); for (int i = 0; i < keyList.size(); i++) { value = (String) attributeValues.get(keyList.elementAt(i)); output.append(UnparseSOIFElement((String) keyList.elementAt(i), value)); } if (close) { output.append(SOIFClose()); } return(output); } /** * Turn a attribute/value pair into a strintg. * @param attribute The attribute * @param value The attribute's value. */ public static String UnparseSOIFElement(String attribute, String value) { StringBuffer output = new StringBuffer(); output.append(attribute); output.append("{" + String.valueOf(value.length()) + "}"); output.append(soifDelimitter + value); output.append("\n"); return output.toString(); } /** * Return the SOIF close symbol (the brace) * */ public String SOIFClose() { return("}"); } /** * Parse a marshalled SOIF object extracting all its attribute/value * pairs * @param soifString the string to parse. */ public void ParseSOIFBuffer(String soifString) throws SOIFException { String soifBody, couldBeURL; RE reg; REMatch result; int pos; soifString.trim(); // separate the template type and the body (inside {}) if (!soifString.startsWith("@")) { throw new SOIFException("Illegal SOIF file format"); } if ((pos = soifString.indexOf("{")) < 0) { throw new SOIFException("Illegal SOIF file format"); } //*** templateType = soifString.substring(1, pos).trim(); soifBody = soifString.substring(pos + 1).trim(); if (!soifBody.endsWith("}")) { throw new SOIFException("Illegal SOIF file format"); } soifBody = soifBody.substring(0, soifBody.length() - 1); // first token in the body may be a url StringTokenizer st = new StringTokenizer(soifBody); try { couldBeURL = st.nextToken(); URL = new URL(couldBeURL); soifBody = soifBody.substring(couldBeURL.length()).trim(); } catch (MalformedURLException e) { // we don't care if there is no URL, It is optional and if there is none, then the // body is just attribute/value pairs. } catch (NoSuchElementException e1) { throw new SOIFException("Illegal SOIF file format"); } ParseSOIFBody(soifBody); } /** * Parse the body of the SOIF extracting the attribute/value pairs * @param body the body to parse */ public void ParseSOIFBody(String body) throws SOIFException { int curPos = 0, bodyLength = body.length(); attributeValues = new Hashtable(); String attribute, attributeName, value; int valueLength,startValue, endValue; RE reg; REMatch result, nextResult; // pattern for an attribute (alpha-num string followed by :: - e.g. AUTHOR::) try { reg = new RE("\\s*(\\S+)\\::\\t"); //{throw new gnu.regexp.REException();} body = body.trim(); while (curPos < body.length()) { if ((result = reg.getMatch(body, curPos)) == null) { throw new SOIFException("Illegal SOIF file format - can't find attribute name"); } curPos = result.getStartIndex(); attributeName = result.toString(); attributeName = attributeName.substring(1); nextResult = reg.getMatch(body,result.getEndIndex() ); startValue = result.getEndIndex(); if(nextResult != null) { attribute = nextResult.toString(); endValue = nextResult.getStartIndex(); } else endValue = body.length(); valueLength = endValue - startValue -1; try { value = (body.substring(startValue , endValue)); } catch (StringIndexOutOfBoundsException e) { throw new SOIFException("Illegal RFC1807 file format"); } SetAttributeValue(attributeName, value); curPos += (endValue - curPos); } } catch (gnu.regexp.REException e){} } /** Return a short description (probably only one sentence) of the document this represents. @param markupType -- specifies the charater set type need to be returned @return the short description of this object as a string */ public String presentShort(int markupType) { StringWriter sw = new StringWriter(); BufferedWriter out = new BufferedWriter( sw ); int Err; try { if ( (Err = presentShort(markupType, out)) != ReturnCodes.OK ) return( null ); out.flush(); } catch( Exception e ) { return( null ); } return(sw.toString()); } public int presentShort(int markupType, BufferedWriter out) throws IOException { int Err; String AttributeValue,AUTHOR, TITLE; String [] AttributeNames; AttributeNames = this.GetAttributeNames(); String shortPresentation; shortPresentation = ""; AUTHOR= null; TITLE = null; switch ( markupType ) { default: // only ASCII and XML are supported now. ASCII produces the sort // of single-line description used in results lists. XML produces // an OAMS description of the document. debug.dumpTrace("MarcDocument.presentShort(): unsupported markup type: using ASCII."); // FALL THROUGH: case DigInfObj.ASCII: boolean seenAuthor = false; for(int i=0; i < this.GetNumAttributes(); i++) { if(AttributeNames[i].indexOf("AUTHOR")!=-1) { seenAuthor = true; AUTHOR = this.GetAttributeValue(AttributeNames[i]); shortPresentation = shortPresentation + AUTHOR; } //Main title if(AttributeNames[i].indexOf("TITLE")!=-1) { TITLE = this.GetAttributeValue(AttributeNames[i]); if (seenAuthor) { // add separator shortPresentation = shortPresentation + ": "; } shortPresentation = shortPresentation + TITLE; } } // end -- for(...) shortPresentation = shortPresentation + "\n"; out.write(shortPresentation); } // end -- switch return( ReturnCodes.OK ); } public String presentLong(int markupType) { StringWriter sw = new StringWriter(); BufferedWriter out = new BufferedWriter( sw ); int Err; try { if ( (Err = presentLong(markupType, out)) != ReturnCodes.OK ) return( null ); out.flush(); } catch( Exception e ) { return( null ); } return(sw.toString()); } public int presentLong(int markupType, BufferedWriter out) throws IOException { String lineBreak; String paraBreak; int Err; String AttributeValue; String presentLong = null; String [] AttributeNames = this.GetAttributeNames(); switch ( markupType ) { case DigInfObj.HTML: lineBreak = new String("
" + System.getProperty("line.separator")); paraBreak = new String("

" + System.getProperty("line.separator")); // out.write("Call Number: "); break; default: debug.dumpTrace("MarcDocument.presentLong(): unsupported markup type: using ASCII."); // FALL THROUGH. case DigInfObj.ASCII: case DigInfObj.ANSEL: lineBreak = System.getProperty("line.separator"); paraBreak = new String(System.getProperty("line.separator") + System.getProperty("line.separator")); break; } //this.presentShort(markupType, out); presentLong= rfc1807String; out.write(presentLong); return( ReturnCodes.OK ); } public boolean isValid() { return( true ); } public DigInfObj copy() { return null; } public String presentFull(int markupType) { StringWriter sw = new StringWriter(); BufferedWriter out = new BufferedWriter( sw ); int Err; try { if ( (Err = presentFull(markupType, out)) != ReturnCodes.OK ) return( null ); out.flush(); } catch( Exception e ) { return( null ); } return(sw.toString()); } public Vector attributes() { return null; } public Vector attributes(int markupType) { return null; } public Vector presentAttributes(int markupType) { return null; } public Object presentAttribute(int attrID, int markupType) { return null; } public int presentFull(int markupType, BufferedWriter out) throws IOException { return( ReturnCodes.NOT_YET_IMPLEMENTED ); } /** Default "short" presentation for any variable field: just use the 'a' subfield. @param markupType see edu.vt.marian.common.DigInfObj @param out A BufferedWriter (presumably String or OutputStream) to present on. @return OK -- everything jake.
IO_ERROR or PARSE_ERROR -- problems. */ /* public int presentShortField(int markupType, BufferedWriter out) throws IOException { int Err; switch ( markupType ) { default: debug.dumpTrace("MarcVarField.presentShort(): Unexpected markup type " + markupType + ": treating as ASCII."); // Fall through: case DigInfObj.XML: case DigInfObj.SGML: case DigInfObj.HTML: case DigInfObj.ASCII: case DigInfObj.ANSEL: Enumeration subfld = subfields.elements(); try { while ( true ) { //MarcSubField sf = (MarcSubField) subfld.nextElement(); //if ( sf.getLabel() == 'a' ) if ( (Err = sf.present(markupType, out)) != ReturnCodes.OK ) return( Err ); if ( subfld.hasMoreElements() ) out.write(' '); } } catch( NoSuchElementException e) {}; } return( ReturnCodes.OK ); } */ }