package edu.vt.marian.Document; import java.io.*; import java.net.*; import java.util.*; import edu.vt.marian.common.*; //import edu.vt.marian.Document.*; import gnu.regexp.*; /** * A set of data encapsulated in a SOIF */ public class SOIFDocument implements Document { /** this string contains all the information of this document the format of the string is in SGML */ public String soifString = null; /** * The template type of the SOIF */ private String templateType; /** * The URL in the SOIF */ private URL URL; /** * Property list for the SOIF */ private Hashtable attributeValues; /** * Allows us to pull things out of the property list in the order they * were inserted. */ private Vector keyList; /** * Common delimitter that separates SOIF attributes and values */ static final String soifDelimitter = ": "; Debug debug; /** Build a new SOIF object */ public SOIFDocument(Debug debug) { this.debug = debug; attributeValues = new Hashtable(); keyList = new Vector(); } /* * Build a new SOIF object with a set template type * @param templateType the template type of the SOIF */ /* public SOIFObject(String templateType) { this(); this.templateType = templateType; } */ /** * Build a new SOIF object from an input stream * @param soifInput input stream from which the SOIF should be built. */ public SOIFDocument(String soifInput, Debug debug) throws SOIFException { this(debug); soifString = soifInput; ParseSOIFBuffer(soifString); } /** * Return a string array of the attribute names in this SOIF * @return An array of String that are the attribute (property) * names for the SOIF instance */ public String[] GetAttributeNames() { Enumeration elements = attributeValues.keys(); String attributeNames[] = new String[attributeValues.size()]; int i = 0; while (elements.hasMoreElements()) { attributeNames[i++] = (String) elements.nextElement(); } return(attributeNames); } /** * Return a count of the number of attributes in this SOIF */ public int GetNumAttributes() { return(attributeValues.size()); } /** * Return the value of a specified attribute in this SOIF * @param attribute name of the attribute to get the value of. */ public String GetAttributeValue(String attribute) { if (!attributeValues.containsKey(attribute)) { return null; } return((String) attributeValues.get(attribute)); } /** * Set an attribute/value pair for this SOIF * @param attribute name of the attribute to set. * @param value value to set the attribute to. */ public void SetAttributeValue(String attribute, String value) { attributeValues.put(attribute, value); keyList.addElement(attribute); } /** * Return the template type of this SOIF object. */ public String GetTemplateType() { return(templateType); } /** * Return the URL of this SOIF object */ public URL GetURL() { return(URL); } /** * Return a StringBuffer which is a marshalled version of this SOIF * @param close A boolean value determining whether the soif should be closed * (I.E. whether the closing brace should be appended to the soif). */ public StringBuffer SOIFToBuffer(boolean close) { StringBuffer output = new StringBuffer(); Enumeration keys = attributeValues.keys(); String attribute, value; output.append("@" + GetTemplateType() + "{"); for (int i = 0; i < keyList.size(); i++) { value = (String) attributeValues.get(keyList.elementAt(i)); output.append(UnparseSOIFElement((String) keyList.elementAt(i), value)); } if (close) { output.append(SOIFClose()); } return(output); } /** * Turn a attribute/value pair into a strintg. * @param attribute The attribute * @param value The attribute's value. */ public static String UnparseSOIFElement(String attribute, String value) { StringBuffer output = new StringBuffer(); output.append(attribute); output.append("{" + String.valueOf(value.length()) + "}"); output.append(soifDelimitter + value); output.append("\n"); return output.toString(); } /** * Return the SOIF close symbol (the brace) * */ public String SOIFClose() { return("}"); } /** * Parse a marshalled SOIF object extracting all its attribute/value * pairs * @param soifString the string to parse. */ public void ParseSOIFBuffer(String soifString) throws SOIFException { String soifBody, couldBeURL; RE reg; REMatch result; int pos; soifString.trim(); // separate the template type and the body (inside {}) if (!soifString.startsWith("@")) { throw new SOIFException("Illegal SOIF file format (no '@')"); } if ((pos = soifString.indexOf("{")) < 0) { throw new SOIFException("Illegal SOIF file format (no '{')"); } templateType = soifString.substring(1, pos).trim(); soifBody = soifString.substring(pos + 1).trim(); if (!soifBody.endsWith("}")) { throw new SOIFException("Illegal SOIF file format (no '}')"); } soifBody = soifBody.substring(0, soifBody.length() - 1); // first token in the body may be a url StringTokenizer st = new StringTokenizer(soifBody); try { couldBeURL = st.nextToken(); URL = new URL(couldBeURL); soifBody = soifBody.substring(couldBeURL.length()).trim(); } catch (MalformedURLException e) { // we don't care if there is no URL, It is optional and if there is none, then the // body is just attribute/value pairs. } catch (NoSuchElementException e1) { throw new SOIFException("Illegal SOIF file format (no URL)"); } ParseSOIFBody(soifBody); } /** * Parse the body of the SOIF extracting the attribute/value pairs * @param body the body to parse */ public void ParseSOIFBody(String body) throws SOIFException { int curPos = 0, bodyLength = body.length(); attributeValues = new Hashtable(); String attribute, attributeName, value=null; int valueLength,startValue, endValue; RE reg; REMatch result, nextResult; // pattern for an attribute (alpha-num string followed by count - e.g. foo{10}) try { reg = new RE("\\s*(\\S+)\\{(\\d+)\\}:"); // VERSION with reg = new RE("\\s*(\\S+)\\{(\\d+)\\}:\\t"); // {throw new gnu.regexp.REException();} body = body.trim(); while (curPos < body.length()) { if ( (result = reg.getMatch(body, curPos)) == null) { throw new SOIFException("Illegal SOIF file format - can't find attribute name"); } curPos = result.getStartIndex(); attributeName = result.toString(); attributeName = attributeName.substring(0); nextResult = reg.getMatch(body,result.getEndIndex() ); startValue = result.getEndIndex(); if(nextResult != null) { attribute = nextResult.toString(); endValue = nextResult.getStartIndex(); } else endValue = body.length(); valueLength = endValue - startValue -1; try { value = (body.substring(startValue , endValue)); } catch (StringIndexOutOfBoundsException e) { // throw new SOIFException("Illegal SOIF file format - value too short"); } SetAttributeValue(attributeName, value); curPos += (endValue - curPos); } } catch (gnu.regexp.REException e){} } /** Return a short description (probably only one sentence) of the document this represents. @param markupType -- specifies the charater set type need to be returned @return the short description of this object as a string */ public String presentShort(int markupType) { StringWriter sw = new StringWriter(); BufferedWriter out = new BufferedWriter( sw ); int Err; try { if ( (Err = presentShort(markupType, out)) != ReturnCodes.OK ) return( null ); out.flush(); } catch( Exception e ) { return( null ); } return(sw.toString()); } public int presentShort(int markupType, BufferedWriter out) throws IOException { int Err; String AttributeValue,DC_Creator, DC_Title; String [] AttributeNames; AttributeNames = this.GetAttributeNames(); String shortPresentation; shortPresentation = ""; DC_Creator= ""; DC_Title = ""; switch ( markupType ) { default: // only ASCII and XML are supported now. ASCII produces the sort // of single-line description used in results lists. XML produces // an OAMS description of the document. debug.dumpTrace("MarcDocument.presentShort(): unsupported markup type: using ASCII."); // FALL THROUGH: case DigInfObj.ASCII: boolean seenAuthor = false; for(int i=0; i < this.GetNumAttributes(); i++) { if(AttributeNames[i].indexOf("dc.creator")!=-1 || AttributeNames[i].indexOf("author")!=-1) { boolean DCfound = false; seenAuthor=true; AttributeValue = this.GetAttributeValue(AttributeNames[i]); if(AttributeNames[i].indexOf("dc.creator")!=-1) { DCfound = true; DC_Creator = AttributeValue; } if(!DCfound) DC_Creator = AttributeValue; } //Main title if(AttributeNames[i].indexOf("dc.title")!=-1 || AttributeNames[i].indexOf("title")!=-1) { AttributeValue = this.GetAttributeValue(AttributeNames[i]); boolean DCfound = false; if(AttributeNames[i].indexOf("dc.title")!=-1) { DCfound = true; DC_Title = AttributeValue; } if(!DCfound) DC_Title = AttributeValue; } //Short description of the object /* if(AttributeNames[i].indexOf("dc.description")!=-1 || AttributeNames[i].indexOf("Description")!=-1) { AttributeValue = this.GetAttributeValue(AttributeNames[i]); /*if ( (Err = presentShortField(markupType, out)) != ReturnCodes.OK ) return( Err ); out.write(AttributeValue); out.write(". "); }*/ } // end -- for(...) if(seenAuthor) shortPresentation = DC_Creator + "," + DC_Title + "\n"; else shortPresentation = DC_Title + "\n"; System.out.println(shortPresentation); out.write(shortPresentation); } // end -- switch return( ReturnCodes.OK ); } public String presentLong(int markupType) { StringWriter sw = new StringWriter(); BufferedWriter out = new BufferedWriter( sw ); int Err; try { if ( (Err = presentLong(markupType, out)) != ReturnCodes.OK ) return( null ); out.flush(); } catch( Exception e ) { return( null ); } return(sw.toString()); } public int presentLong(int markupType, BufferedWriter out) throws IOException { String lineBreak; String paraBreak; int Err; String AttributeValue; String presentLong = null; String [] AttributeNames = this.GetAttributeNames(); boolean seenAuthor = false; String Author = ""; String Title= ""; String Description=""; String url = URL.toString(); for(int i=0; i < this.GetNumAttributes(); i++) { if(AttributeNames[i].indexOf("dc.creator")!=-1 || AttributeNames[i].indexOf("author")!=-1) { AttributeValue = this.GetAttributeValue(AttributeNames[i]); boolean DCAuthorfound = false; if(AttributeNames[i].indexOf("dc.creator")!=-1) { DCAuthorfound = true; Author = AttributeValue; } if(!DCAuthorfound) Author = AttributeValue; } //Main title if(AttributeNames[i].indexOf("dc.title")!=-1 || AttributeNames[i].indexOf("title")!=-1) { AttributeValue = this.GetAttributeValue(AttributeNames[i]); boolean DCfound = false; if(AttributeNames[i].indexOf("dc.title")!=-1) { DCfound = true; Title = AttributeValue; } if(!DCfound) Title = AttributeValue; // Title = Title.replace('\n',' '); } //Short description of the object if(AttributeNames[i].indexOf("dc.description")!=-1 || AttributeNames[i].indexOf("Description")!=-1) { boolean DCfound = false; if(AttributeNames[i].indexOf("dc.description")!=-1) { DCfound = true; Description = this.GetAttributeValue(AttributeNames[i]); } if(!DCfound) Description = this.GetAttributeValue(AttributeNames[i]); Description.replace('\n',' '); //Description = Description.substring(5); /*System.out.println("Description: " + Description);*/ } } // end -- for(...) switch ( markupType ) { case DigInfObj.HTML: lineBreak = new String("
" + System.getProperty("line.separator")); paraBreak = new String("

" + System.getProperty("line.separator")); System.out.println("Title2:" + Title); out.write("

" + Title + "\n"); out.write("

" + Author + "\n"); out.write("

" + Description + "\n"); out.write("

" + "" + url + "\n"); // out.write("Call Number: "); break; default: debug.dumpTrace("MarcDocument.presentLong(): unsupported markup type: using ASCII."); // FALL THROUGH. case DigInfObj.ASCII: presentLong = Title + "\n\n" + Author + "\n\n" + Description + "\n\n" + url; out.write(presentLong); break; case DigInfObj.ANSEL: lineBreak = System.getProperty("line.separator"); paraBreak = new String(System.getProperty("line.separator") + System.getProperty("line.separator")); break; } // System.out.println(presentLong); /* String presentLong = "blublu"; */ return( ReturnCodes.OK ); } public boolean isValid() { return( true ); } public DigInfObj copy() { return null; } public String presentFull(int markupType) { StringWriter sw = new StringWriter(); BufferedWriter out = new BufferedWriter( sw ); int Err; try { if ( (Err = presentFull(markupType, out)) != ReturnCodes.OK ) return( null ); out.flush(); } catch( Exception e ) { return( null ); } return(sw.toString()); } public Vector attributes() { return null; } public Vector attributes(int markupType) { return null; } public Vector presentAttributes(int markupType) { return null; } public Object presentAttribute(int attrID, int markupType) { return null; } public int presentFull(int markupType, BufferedWriter out) throws IOException { return( ReturnCodes.NOT_YET_IMPLEMENTED ); } /** Default "short" presentation for any variable field: just use the 'a' subfield. @param markupType see edu.vt.marian.common.DigInfObj @param out A BufferedWriter (presumably String or OutputStream) to present on. @return OK -- everything jake.
IO_ERROR or PARSE_ERROR -- problems. */ /* public int presentShortField(int markupType, BufferedWriter out) throws IOException { int Err; switch ( markupType ) { default: debug.dumpTrace("MarcVarField.presentShort(): Unexpected markup type " + markupType + ": treating as ASCII."); // Fall through: case DigInfObj.XML: case DigInfObj.SGML: case DigInfObj.HTML: case DigInfObj.ASCII: case DigInfObj.ANSEL: Enumeration subfld = subfields.elements(); try { while ( true ) { //MarcSubField sf = (MarcSubField) subfld.nextElement(); //if ( sf.getLabel() == 'a' ) if ( (Err = sf.present(markupType, out)) != ReturnCodes.OK ) return( Err ); if ( subfld.hasMoreElements() ) out.write(' '); } } catch( NoSuchElementException e) {}; } return( ReturnCodes.OK ); } */ }