package edu.vt.marian.common; import java.io.*; import java.net.*; import java.util.*; import edu.vt.marian.Document.*; /** * A Weighted Object that actually includes the full object! Unlike the class * WtdObj, which only includes a FullID, a WtdEntireObj also includes * a Java Object which is the actual instance selected by the FullID. *
* We actually use WtEntireObjs in two ways. In what might be thought * of as the paradigmatic use, the Object is a digital information * object and implements some sub-interface of DigInfObj. At several * levels of MARIAN, however, this is overkill, and we simply need to * more the raw object around, in the form of an unanalyzed * string or stream, without creating a DigInfObj instance from it. @author Robert France
implementator(s): Jianxin Zhao (jxzhao@csgrad.cs.vt.edu), Robert France
finished time:
known bugs:
JDK version: 1.1.5
side effects: */ public class WtdEntireObj extends WtdObj { /** either an object of class classID or a string from which such an object can be extracted. */ protected Object obj = null; /** whether obj is a raw string, or has been "extracted" and converted into a real DigInfObj. */ protected boolean extracted = false; /** return values of methods of this class */ public final static int OK = 0; public final static int UNKNOWN_CLASS = 1; public final static int NO_CAN_DO = -10; /** method description: this constructor will create a WtdEntireObj object from the specified stream @param br the stream to read out this document @param debug used for debugging */ public WtdEntireObj(BufferedReader br, Debug debug) { super(br, debug); if (br == null) { debug.dumpTrace("WtdEntireObj.[constructor 1]: br is null"); return; } // br is not null, read out raw document from it int numLines = 0; try { numLines = Integer.parseInt(br.readLine()); } catch (Exception e0) { debug.dumpTrace("WtdEntireObj.[constructor 1]: error reading number of lines"); } int i; StringBuffer buffer = new StringBuffer(numLines*80); // Well, it's a try // better guess { // than 16 (the for (i = 0; i < numLines; i++) // default size for { // StringBuffers). if (i != 0) { // not the first line, so add "\n" between them buffer.append( "\n" ); } buffer.append( br.readLine() ); } } catch (IOException e1) { debug.dumpTrace("WtdEntireObj.[constructor 1]: error reading sgml string"); } obj = new String(buffer); extracted = false; } /** method description: this constructor will create a WtdEntireObj from the specified id, weight and document string
uses the services of class(es): @param id this will be the id of this document @param w this will be the weight at which this document corresponds to a query @param o the Object that id stands for. @param debug used for debugging */ public WtdEntireObj(FullID id, Weight w, Document o, Debug debug) { super(id, w, debug); if (o == null) { debug.dumpTrace("WtdEntireObj.[constructor 2]: object is null"); return; } obj = o.copy(); extracted = true; } /** method description: this constructor will create a WtdEntireObj from the specified id, weight and document string
uses the services of class(es):
@param id this will be the id of this document
@param w this will be the weight at which this document corresponds to a
query
@param o the document that id stands for in raw (String) form.
@param debug used for debugging
*/
public WtdEntireObj(FullID id, Weight w, String rawObj, Debug debug)
{
super(id, w, debug);
debug.dumpTrace("wtdEntireObj(): id is " + id.toString() + "; weight is " + w.toString() + ". String is " + rawObj + "."); ////DEBUG
if (rawObj == null)
{
debug.dumpTrace("WtdEntireObj.[constructor w]: string is null");
return;
}
obj = new String(rawObj);
extracted = false;
}
/**
print the contents of this object to the specified stream.
@param pw the stream to write this object
@return OK -- this object has been written to the stream correctly
NULL_STREAM -- the parameter stream is null
*/
public int toStream(PrintWriter pw)
{
if (pw == null)
{
debug.dumpTrace("WtdEntireObj.to_stream(): parameter stream is null");
return NULL_STREAM;
}
// stream is not null, first write id and weight
int err;
if ( (err = super.toStream(pw)) != OK )
{
debug.dumpTrace("WtdEntireObj.to_stream(): error happened in writing id");
return err;
}
// now write raw document
if (obj instanceof String)
{
// used to count lines
LinedString ms = new LinedString(debug);
pw.println(ms.count_lines( (String) obj));
pw.println( (String) obj);
}
else
{
String s = obj.toString();
pw.println( s );
//** This is what we really want: obj.toStream(pw);
}
return OK;
}
/**
return the raw string of the document this object represents.
@return this document as a string
*/
public String getRawObj()
{
if (obj instanceof String)
return( (String) obj );
else
return( obj.toString() );
//** Eventually, we may constraint obj to be a Document, upon
// which we can say:
// return( ((Document) obj).presentFull(DigInfObj.ASCII) );
}
/**
set this to a (non-extracted) String representation of the entire object.
@param objectAsString a String from which the entire object can be
built.
@return OK -- the new raw marc record has been set correctly
BAD_PARAMS -- the parameter is null
NOTE: What if this is of class String? Then you should use
setEntireObj() instead of setRaw() to avoid confusion.
*/
public int setRawObj(String objectAsString)
{
if (objectAsString == null)
{
debug.dumpTrace("WtdEntireObj.setRwObj(): parameter is null");
return BAD_PARAMS;
}
// document string is not null
obj = new String(objectAsString);
extracted = false;
return OK;
}
/**
set the entire object to (a clone of) an extracted object.
@param o the object corresponding to this.id.
@return OK -- the new object has been set correctly
BAD_PARAMS -- the parameter is null
*/
public int setDocument(Document o)
{
if (o == null)
{
debug.dumpTrace("WtdEntireObj.setDocument(): parameter is null");
return BAD_PARAMS;
}
// document string is not null
obj = o.copy();
extracted = true;
return OK;
}
/**
return the (extracted) Document object.
@return (a reference to) an object implementing Document.
*/
public Document getDocument()
{
if (obj == null)
{
debug.dumpTrace("WtdEntireObj.getDocument(): object is null");
return null;
}
if (! extracted)
{
if (extract() != OK)
return( null );
}
// document string is not null, and has been extracted properly.
return (Document) obj;
}
private int extract()
{
int Err;
switch (classID)
{
case ClassIDs.CLASS_NLM_SGML_DOC:
SgmlDocument extractedSgmlDoc = new SgmlDocument((String) obj, debug);
if (extractedSgmlDoc.isValid() )
{
extracted = true;
obj = extractedSgmlDoc;
return( OK );
}
else
{
debug.dumpTrace("WtdEntireObj.extract(): cannot extract " +
super.toString() + ".");
return( NO_CAN_DO );
}
case ClassIDs.CLASS_PHYSDIS_ETD:
SOIFDocument extractedPhysDisDoc;
try {
extractedPhysDisDoc = new SOIFDocument((String) obj, debug);
} catch (SOIFException e)
{
debug.dumpTrace("WtdEntireObj.extract(): cannot extract " +
super.toString() + " (" + (String) obj + "): " + e.getMessage() + ".");
return( NO_CAN_DO );
}
if (extractedPhysDisDoc.isValid() )
{
extracted = true;
obj = extractedPhysDisDoc;
return( OK );
}
else
{
debug.dumpTrace("WtdEntireObj.extract(): cannot extract " +
super.toString() + ".");
return( NO_CAN_DO );
}
case ClassIDs.CLASS_VT_ETD_OAMS:
OAMSDocument extractedOamsDoc;
extractedOamsDoc = new OAMSDocument((String) obj, debug);
if (extractedOamsDoc.isValid() )
{
extracted = true;
obj = extractedOamsDoc;
return( OK );
}
else
{
debug.dumpTrace("WtdEntireObj.extract(): cannot extract " +
super.toString() + ".");
return( NO_CAN_DO );
}
case ClassIDs.CLASS_MIT_ETD:
RFC1807Document extractedRfcDoc;
try {
extractedRfcDoc = new RFC1807Document((String) obj, debug);
} catch (SOIFException e)
{
debug.dumpTrace("WtdEntireObj.extract(): cannot extract " +
super.toString() + ":" + e.getMessage() + ".");
return( NO_CAN_DO );
}
if (extractedRfcDoc.isValid() )
{
extracted = true;
obj = extractedRfcDoc;
return( OK );
}
else
{
debug.dumpTrace("WtdEntireObj.extract(): cannot extract " +
super.toString() + ".");
return( NO_CAN_DO );
}
case ClassIDs.CLASS_VT_MARC:
MarcDocument extractedMarc = new MarcDocument(new EntityMap(debug), debug);
try
{
StringReader sr = new StringReader((String) obj);
BufferedReader bsr = new BufferedReader(sr);
if ( (Err = extractedMarc.setFromTapeFormat(bsr)) != ReturnCodes.OK )
return( Err );
if (extractedMarc.isValid() )
{
extracted = true;
obj = extractedMarc;
return( OK );
}
} catch (Exception e)
{
debug.dumpTrace("WtdEntireObj.extract(): cannot extract " +
super.toString() + ".");
return( NO_CAN_DO );
}
default:
debug.dumpTrace("WtdEntireObj.extract(): unknown class ID in " +
super.toString() + ".");
return( UNKNOWN_CLASS );
}
}
}