package edu.vt.marian.common; import java.io.*; import java.net.*; import java.util.*; import edu.vt.marian.Document.*; /** * A Weighted Object that actually includes the full object! Unlike the class * WtdObj, which only includes a FullID, a WtdEntireObj also includes * a Java Object which is the actual instance selected by the FullID. *

* We actually use WtEntireObjs in two ways. In what might be thought * of as the paradigmatic use, the Object is a digital information * object and implements some sub-interface of DigInfObj. At several * levels of MARIAN, however, this is overkill, and we simply need to * more the raw object around, in the form of an unanalyzed * string or stream, without creating a DigInfObj instance from it. @author Robert France

implementator(s): Jianxin Zhao (jxzhao@csgrad.cs.vt.edu), Robert France

finished time:

known bugs:

JDK version: 1.1.5

side effects: */ public class WtdEntireObj extends WtdObj { /** either an object of class classID or a string from which such an object can be extracted. */ protected Object obj = null; /** whether obj is a raw string, or has been "extracted" and converted into a real DigInfObj. */ protected boolean extracted = false; /** return values of methods of this class */ public final static int OK = 0; public final static int UNKNOWN_CLASS = 1; public final static int NO_CAN_DO = -10; /** method description: this constructor will create a WtdEntireObj object from the specified stream @param br the stream to read out this document @param debug used for debugging */ public WtdEntireObj(BufferedReader br, Debug debug) { super(br, debug); if (br == null) { debug.dumpTrace("WtdEntireObj.[constructor 1]: br is null"); return; } // br is not null, read out raw document from it int numLines = 0; try { numLines = Integer.parseInt(br.readLine()); } catch (Exception e0) { debug.dumpTrace("WtdEntireObj.[constructor 1]: error reading number of lines"); } int i; StringBuffer buffer = new StringBuffer(numLines*80); // Well, it's a try // better guess { // than 16 (the for (i = 0; i < numLines; i++) // default size for { // StringBuffers). if (i != 0) { // not the first line, so add "\n" between them buffer.append( "\n" ); } buffer.append( br.readLine() ); } } catch (IOException e1) { debug.dumpTrace("WtdEntireObj.[constructor 1]: error reading sgml string"); } obj = new String(buffer); extracted = false; } /** method description: this constructor will create a WtdEntireObj from the specified id, weight and document string

uses the services of class(es): @param id this will be the id of this document @param w this will be the weight at which this document corresponds to a query @param o the Object that id stands for. @param debug used for debugging */ public WtdEntireObj(FullID id, Weight w, Document o, Debug debug) { super(id, w, debug); if (o == null) { debug.dumpTrace("WtdEntireObj.[constructor 2]: object is null"); return; } obj = o.copy(); extracted = true; } /** method description: this constructor will create a WtdEntireObj from the specified id, weight and document string

uses the services of class(es): @param id this will be the id of this document @param w this will be the weight at which this document corresponds to a query @param o the document that id stands for in raw (String) form. @param debug used for debugging */ public WtdEntireObj(FullID id, Weight w, String rawObj, Debug debug) { super(id, w, debug); debug.dumpTrace("wtdEntireObj(): id is " + id.toString() + "; weight is " + w.toString() + ". String is " + rawObj + "."); ////DEBUG if (rawObj == null) { debug.dumpTrace("WtdEntireObj.[constructor w]: string is null"); return; } obj = new String(rawObj); extracted = false; } /** print the contents of this object to the specified stream. @param pw the stream to write this object @return OK -- this object has been written to the stream correctly
NULL_STREAM -- the parameter stream is null */ public int toStream(PrintWriter pw) { if (pw == null) { debug.dumpTrace("WtdEntireObj.to_stream(): parameter stream is null"); return NULL_STREAM; } // stream is not null, first write id and weight int err; if ( (err = super.toStream(pw)) != OK ) { debug.dumpTrace("WtdEntireObj.to_stream(): error happened in writing id"); return err; } // now write raw document if (obj instanceof String) { // used to count lines LinedString ms = new LinedString(debug); pw.println(ms.count_lines( (String) obj)); pw.println( (String) obj); } else { String s = obj.toString(); pw.println( s ); //** This is what we really want: obj.toStream(pw); } return OK; } /** return the raw string of the document this object represents. @return this document as a string */ public String getRawObj() { if (obj instanceof String) return( (String) obj ); else return( obj.toString() ); //** Eventually, we may constraint obj to be a Document, upon // which we can say: // return( ((Document) obj).presentFull(DigInfObj.ASCII) ); } /** set this to a (non-extracted) String representation of the entire object. @param objectAsString a String from which the entire object can be built. @return OK -- the new raw marc record has been set correctly
BAD_PARAMS -- the parameter is null

NOTE: What if this is of class String? Then you should use setEntireObj() instead of setRaw() to avoid confusion. */ public int setRawObj(String objectAsString) { if (objectAsString == null) { debug.dumpTrace("WtdEntireObj.setRwObj(): parameter is null"); return BAD_PARAMS; } // document string is not null obj = new String(objectAsString); extracted = false; return OK; } /** set the entire object to (a clone of) an extracted object. @param o the object corresponding to this.id. @return OK -- the new object has been set correctly
BAD_PARAMS -- the parameter is null */ public int setDocument(Document o) { if (o == null) { debug.dumpTrace("WtdEntireObj.setDocument(): parameter is null"); return BAD_PARAMS; } // document string is not null obj = o.copy(); extracted = true; return OK; } /** return the (extracted) Document object. @return (a reference to) an object implementing Document. */ public Document getDocument() { if (obj == null) { debug.dumpTrace("WtdEntireObj.getDocument(): object is null"); return null; } if (! extracted) { if (extract() != OK) return( null ); } // document string is not null, and has been extracted properly. return (Document) obj; } private int extract() { int Err; switch (classID) { case ClassIDs.CLASS_NLM_SGML_DOC: SgmlDocument extractedSgmlDoc = new SgmlDocument((String) obj, debug); if (extractedSgmlDoc.isValid() ) { extracted = true; obj = extractedSgmlDoc; return( OK ); } else { debug.dumpTrace("WtdEntireObj.extract(): cannot extract " + super.toString() + "."); return( NO_CAN_DO ); } case ClassIDs.CLASS_PHYSDIS_ETD: SOIFDocument extractedPhysDisDoc; try { extractedPhysDisDoc = new SOIFDocument((String) obj, debug); } catch (SOIFException e) { debug.dumpTrace("WtdEntireObj.extract(): cannot extract " + super.toString() + " (" + (String) obj + "): " + e.getMessage() + "."); return( NO_CAN_DO ); } if (extractedPhysDisDoc.isValid() ) { extracted = true; obj = extractedPhysDisDoc; return( OK ); } else { debug.dumpTrace("WtdEntireObj.extract(): cannot extract " + super.toString() + "."); return( NO_CAN_DO ); } case ClassIDs.CLASS_VT_ETD_OAMS: OAMSDocument extractedOamsDoc; extractedOamsDoc = new OAMSDocument((String) obj, debug); if (extractedOamsDoc.isValid() ) { extracted = true; obj = extractedOamsDoc; return( OK ); } else { debug.dumpTrace("WtdEntireObj.extract(): cannot extract " + super.toString() + "."); return( NO_CAN_DO ); } case ClassIDs.CLASS_MIT_ETD: RFC1807Document extractedRfcDoc; try { extractedRfcDoc = new RFC1807Document((String) obj, debug); } catch (SOIFException e) { debug.dumpTrace("WtdEntireObj.extract(): cannot extract " + super.toString() + ":" + e.getMessage() + "."); return( NO_CAN_DO ); } if (extractedRfcDoc.isValid() ) { extracted = true; obj = extractedRfcDoc; return( OK ); } else { debug.dumpTrace("WtdEntireObj.extract(): cannot extract " + super.toString() + "."); return( NO_CAN_DO ); } case ClassIDs.CLASS_VT_MARC: MarcDocument extractedMarc = new MarcDocument(new EntityMap(debug), debug); try { StringReader sr = new StringReader((String) obj); BufferedReader bsr = new BufferedReader(sr); if ( (Err = extractedMarc.setFromTapeFormat(bsr)) != ReturnCodes.OK ) return( Err ); if (extractedMarc.isValid() ) { extracted = true; obj = extractedMarc; return( OK ); } } catch (Exception e) { debug.dumpTrace("WtdEntireObj.extract(): cannot extract " + super.toString() + "."); return( NO_CAN_DO ); } default: debug.dumpTrace("WtdEntireObj.extract(): unknown class ID in " + super.toString() + "."); return( UNKNOWN_CLASS ); } } }