package edu.vt.marian.Document; import java.io.*; import java.net.*; import java.util.*; import edu.vt.marian.common.*; /** A USMARC record interpreted as a MARIAN Document. */ public class MarcDocument extends MarcRecord implements Document { /** ID of this MARC object in MARIAN. Set by extractCharacteristics(). */ private int classID; private FullID id; /** Is this a conference proceedings? Set by extractCharacteristics(). */ private boolean isConference = false; /** Language (USMARC 3-letter code). Set by extractCharacteristics(). */ private String language; /** Publication year (more or less: see comments in extractCharacteristics()). */ private int pubYear; /** Designators for sections in the "long" presentation. */ private final static int authorSection = 0; private final static int titleSection = 1; private final static int noteSection = 2; private final static int subjectSection = 3; /** Set instance variables to default values.
NOTE: Called in constructors and in setFrom*(). Note call to
superclass to initialize MarcRecord variables.
*/
protected void init()
{
super.init();
// Give local variables invalid but non-null values.
id = new FullID(debug);
isConference = false;
language = new String();
pubYear = 0;
}
/** those variable field ids are used to get long description of author
*/
private final static int[] author_ids = {100, 110, 111, 400, 410, 411,
700, 710, 711, 800, 810, 811};
/** those variable field ids are used to get long description of title
*/
private final static int[] title_ids = {130, 240, 241, 242, 243, 245,
246, 440, 730, 740, 830, 840};
/** those variable field ids are used to get long description of subject
*/
private final static int[] subject_ids = {600, 610, 611, 630, 650, 651,
653, 690, 691, 693};
/** those variable field ids are used to get long description of notes
*/
private final static int[] note_ids = {500, 501, 502, 504, 508, 511,
515, 533, 550, 580, 590, 705, 715};
/** those variable field ids are used to get long description of
person name
*/
private final static int[] person_name_ids = {100, 400, 600, 700, 800};
/** those variable field ids are used to get long description of
conference name
*/
private final static int[] conference_name_ids = {111, 411, 611,
711, 811};
/** those variable field ids are used to get long description of
corprate name
*/
private final static int[] corp_name_ids = {110, 410, 610, 710, 810};
protected boolean hasSectionType(MarcVarField mvf, int section)
{
int[] sectionIDs;
switch (section)
{
case authorSection:
sectionIDs = author_ids;
break;
case titleSection:
sectionIDs = title_ids;
break;
case noteSection:
sectionIDs = note_ids;
break;
case subjectSection:
sectionIDs = subject_ids;
break;
default:
return(false);
}
for (int i=0; i NOTE: At this point we are using String compare on the
raw strings to determine equality. This obviously leaves
something to be desired.
@param d the document used to compare with this object
@return true / false
*/
public boolean equals(MarcDocument d)
{
if (d == null)
{
debug.dumpTrace("MarcDocument.equals(): d is null");
return false;
}
if ( (! isInstantiated ) || ( ! d.isInstantiated ) )
{
debug.dumpTrace("MarcDocument.equals(): uninstantiated record.");
return false;
}
return( id.equals(d.id) );
}
/**
Make a new MarcDocument just like this one.
" + System.getProperty("line.separator"));
out.write("Call Number: ");
break;
default:
debug.dumpTrace("MarcDocument.presentLong(): unsupported markup type: using ASCII.");
// FALL THROUGH.
case DigInfObj.ASCII:
case DigInfObj.ANSEL:
lineBreak = System.getProperty("line.separator");
paraBreak = new String(System.getProperty("line.separator") +
System.getProperty("line.separator"));
out.write("Call Number: ");
break;
}
if ( (Err = presentCallNumber(markupType, out)) != ReturnCodes.OK)
return( Err );
out.write(paraBreak);
if ( (Err = presentSection(authorSection, markupType, out))
!= ReturnCodes.OK)
return( Err );
out.write(lineBreak);
if ( (Err = presentSection(titleSection, markupType, out))
!= ReturnCodes.OK)
return( Err );
out.write(lineBreak);
if ( (Err = presentPublishingInfoSection(markupType, out))
!= ReturnCodes.OK)
return( Err );
out.write(lineBreak);
if ( (Err = presentNotesSection(markupType, out)) != ReturnCodes.OK)
return( Err );
out.write(lineBreak);
if ( (Err = presentSection(subjectSection, markupType, out))
!= ReturnCodes.OK)
return( Err );
return( ReturnCodes.OK );
}
/**
Return a full description of the document this object represents.
other --
*/
private int extractCharacteristics()
{
// extract isConference, language and publish year
MarcFixField field8 = getFixFieldById(8);
if ((field8 == null) || (field8.getData() == null) ||
(field8.getData().length() != 40))
{
// invalid field 8
debug.dumpTrace("MarcDocument.extractCharacteristics(): field 8 or its data is invalid.");
return( ReturnCodes.PARSE_ERROR );
}
// field 8 is valid, get isConference
String data = field8.getData();
isConference = (data.charAt(29) == '1');
// get language
language = data.substring(35, 38);
// get publish year
try
{
switch (data.charAt(6))
{
case 'd':
case 's':
pubYear = Integer.parseInt(data.substring(7, 11));
break;
case 'r':
pubYear = Integer.parseInt(data.substring(11, 15));
if (pubYear <= 0)
{
pubYear = Integer.parseInt(data.substring(7, 11));
}
break;
case 'c':
case 'p':
case 'u':
pubYear = Integer.parseInt(data.substring(11, 15));
break;
case 'i':
case 'k':
case 'm':
case 'q':
pubYear = ( Integer.parseInt(data.substring(7, 11)) +
Integer.parseInt(data.substring(11, 15)) ) / 2;
break;
case 'b':
case 'n':
default:
pubYear = 0;
} // end -- switch
} catch (Exception e)
{
debug.dumpTrace("MarcDocument.extractCharacteristics(): parse year error");
pubYear = 0;
// return PARSE_YEAR_ERROR;
}
if (pubYear < 0)
{
pubYear = 0;
}
// get id
Vector mvfs = getVarFieldsById(35);
if (mvfs.size() != 1)
{
// variable field 35 should be uniq
debug.dumpTrace("MarcDocument.extractCharateristics(): variable field 35 is not uniq");
return( ReturnCodes.PARSE_ERROR );
}
PresentableMarcVarField mvf = (PresentableMarcVarField) mvfs.elementAt(0);
Vector msfs = mvf.getSubfieldsByLabel('a');
if (msfs.size() != 1)
{
// subfield 'a' should be uniq here too
debug.dumpTrace("MarcDocument.extractCharacteristics(): subfield 'a' in var field 35 is not uniq");
return( ReturnCodes.PARSE_ERROR );
}
String rawId = ((MarcSubField) msfs.elementAt(0)).getData();
if ((rawId == null) || (rawId.length() < 10))
{
// invalid rawId
debug.dumpTrace("MarcDocument.extractCharacteristics(): rawId is not valid");
return( ReturnCodes.PARSE_ERROR );
}
int instID = 0;
try
{
instID = Integer.parseInt(rawId.substring(0, 4) +
rawId.substring(5, 10));
}
catch (Exception e)
{
debug.dumpTrace("MarcDocument.extractCharacteristics(): error when parsing '" +
rawId + "' for instance ID.");
return( ReturnCodes.PARSE_ERROR );
}
if (instID <= 0)
{
debug.dumpTrace("MarcDocument.extractCharacteristics(): instance ID '" +
rawId + "' is invalid.");
return( ReturnCodes.PARSE_ERROR );
}
id = new FullID(classID, instID, debug);
// everything is fine here
return( ReturnCodes.OK );
}
protected int presentCallNumber(int markupType, BufferedWriter out)
throws IOException
{
// Look for an 099 field, or if none an 090 field, or if none an 050.
// Search backwards through var field vector to achieve this.
PresentableMarcVarField mvf;
for (int i = varFields.size()-1; i>=0; i--)
{
mvf = (PresentableMarcVarField) varFields.elementAt(i);
if ( (mvf.getID() == 99) || (mvf.getID() == 90) ||
(mvf.getID() == 50) )
{
// we found the corresponding variable field
return( mvf.presentLong(markupType, out) );
}
}
// we couldn't find any corresponding variable field
return( ReturnCodes.NOT_FOUND );
}
/**
format the publishing information of this MarcRecord
@param markupType specifies which markup type the result should be
only ASCII is supported now
@return the formated publish information of this object as a string
*/
protected int presentPublishingInfoSection(int markupType, BufferedWriter out)
throws IOException
{
int Err;
String lineBreak;
String indent;
switch ( markupType )
{
case DigInfObj.HTML:
lineBreak = new String("
" + System.getProperty("line.separator"));
indent = " ";
break;
default:
debug.dumpTrace("MarcDocument.presentLong(): unsupported markup type: using ASCII.");
// FALL THROUGH.
case DigInfObj.ASCII:
case DigInfObj.ANSEL:
lineBreak = System.getProperty("line.separator");
indent = " ";
break;
}
PresentableMarcVarField mvf;
PresentableMarcVarField editionField = null;
Enumeration varfld = varFields.elements();
try { while ( true )
{
mvf = (PresentableMarcVarField) varfld.nextElement();
switch (mvf.getID())
{
case 250: // Save for the end.
editionField = mvf;
break;
case 260:
if (markupType == HTML)
out.write("Imprint: ");
else
out.write("Imprint: ");
if ( (Err = mvf.presentLong(markupType, out))
!= ReturnCodes.OK )
return(Err);
out.write(lineBreak);
break;
case 300:
out.write(indent);
out.write("Description: " );
if ( (Err = mvf.presentLong(markupType, out))
!= ReturnCodes.OK )
return(Err);
out.write(lineBreak);
break;
case 310:
out.write(indent);
out.write("Frequency: ");
if ( (Err = mvf.presentLong(markupType, out))
!= ReturnCodes.OK )
return(Err);
out.write(lineBreak);
break;
case 362:
out.write(indent);
out.write("Published: ");
if ( (Err = mvf.presentLong(markupType, out))
!= ReturnCodes.OK )
return(Err);
out.write(lineBreak);
case 490:
out.write(indent);
out.write("Series: ");
if ( (Err = mvf.presentLong(markupType, out))
!= ReturnCodes.OK )
return(Err);
out.write(lineBreak);
default:
}
} } catch(NoSuchElementException e) {}
if ( editionField != null )
{
out.write(indent);
out.write("Edition: ");
if ( (Err = editionField.presentLong(markupType, out))
!= ReturnCodes.OK )
return(Err);
out.write(lineBreak);
}
return( ReturnCodes.OK );
}
/**
Prsent the notes information of this object.
@param markupType specifies which markup type the result should be
@return the formated notes information of this object as a string
*/
protected int presentNotesSection(int markupType, BufferedWriter out) throws IOException
{
int Err;
// First, pull together all the undifferentiated note fields:
if ( (Err = presentSection(noteSection, markupType, out))
!= ReturnCodes.OK )
return( Err );
String lineBreak;
String indent;
switch ( markupType )
{
case DigInfObj.HTML:
lineBreak = new String("
" + System.getProperty("line.separator"));
indent = " ";
break;
default:
debug.dumpTrace("MarcDocument.presentLong(): unsupported markup type: using ASCII.");
// FALL THROUGH.
case DigInfObj.ASCII:
case DigInfObj.ANSEL:
lineBreak = System.getProperty("line.separator");
indent = " ";
break;
}
PresentableMarcVarField mvf = null;
for (int i=0; i
" + System.getProperty("line.separator"));
for (i=0; i
(Substitutes for public Object clone() until we straighten things out.)
@return another MarcDocument object just like this one.
*/
public DigInfObj copy()
{
MarcDocument retDoc = null;
try
{
StringWriter sw = new StringWriter();
BufferedWriter out = new BufferedWriter( sw );
if ( presentAsXml(out) != ReturnCodes.OK )
return( null );
StringReader sr = new StringReader( out.toString() );
BufferedReader in = new BufferedReader( sr );
retDoc = new MarcDocument(debug);
if ( retDoc.setFromXml(in) != ReturnCodes.OK )
return( null );
} catch (Exception e)
{
debug.dumpTrace("MarcDocument.copy(): expection " + e.toString() +
" raised: bailing out.");
return( null );
}
return( retDoc );
}
/**
Return a short description
(probably only one sentence) of the document this represents.
@param markupType -- specifies the charater set type need to be returned
@return the short description of this object as a string
*/
public String presentShort(int markupType)
{
StringWriter sw = new StringWriter();
BufferedWriter out = new BufferedWriter( sw );
int Err;
try {
if ( (Err = presentShort(markupType, out)) != ReturnCodes.OK )
return( null );
out.flush();
} catch( Exception e ) { return( null ); }
return(sw.toString());
}
public int presentShort(int markupType, BufferedWriter out) throws IOException
{
PresentableMarcVarField mvf = null;
int Err;
switch ( markupType )
{
default:
// only ASCII and XML are supported now. ASCII produces the sort
// of single-line description used in results lists. XML produces
// an OAMS description of the document.
debug.dumpTrace("MarcDocument.presentShort(): unsupported markup type: using ASCII.");
// FALL THROUGH:
case DigInfObj.ASCII:
boolean seenAuthor = false;
for (int i = 0; i < getNumberVarFields(); i++)
{
mvf = (PresentableMarcVarField) getVarFieldByIndex(i);
switch (mvf.getID())
{
case 100:
case 110:
case 111:
if (seenAuthor)
{
// this is not the first author, so add
// separator between them
out.write(", ");
}
else
{
// this is the first author, no separator
// before it
seenAuthor = true;
}
if ( (Err = mvf.presentShort(markupType, out))
!= ReturnCodes.OK )
return( Err );
break;
case 245: // Main title.
if (seenAuthor)
{
// add separator
out.write(": ");
}
if ( (Err = mvf.presentShort(markupType, out))
!= ReturnCodes.OK )
return( Err );
mvf.presentShort(markupType, out);
out.write(". ");
break;
case 260: // Publication info.
if ( (Err = mvf.presentShort(markupType, out))
!= ReturnCodes.OK )
return( Err );
break;
default:
} // end -- switch
} // end -- for(...)
break;
case DigInfObj.XML: // Produce OAMS record.
out.write("
" + System.getProperty("line.separator"));
paraBreak = new String("
NOTE: At the risk of conflating two uses -- presentaion
of the complete object for tranport to another system and
presentation to a sophisticated data administrator (e.g.,
a library cataloger) -- we are using the transport
methods defined in MarcRecord to implement this.
@param markupType specifies the character set type need to be returned
@return the full description of this document as a string
*/
public String presentFull(int markupType)
{
StringWriter sw = new StringWriter();
BufferedWriter out = new BufferedWriter( sw );
int Err;
try {
if ( (Err = presentFull(markupType, out)) != ReturnCodes.OK )
return( null );
out.flush();
} catch( Exception e ) { return( null ); }
return(sw.toString());
}
public int presentFull(int markupType, BufferedWriter out) throws IOException
{
switch ( markupType )
{
case DigInfObj.XML:
case DigInfObj.SGML:
return( presentAsXml(out) );
default:
debug.dumpTrace("MarcDocument.presentFull(): Unexpected markup type" +
markupType + ": treating as ASCII.");
// Fall through:
case DigInfObj.ASCII:
case DigInfObj.ANSEL:
return( presentAsTapeFormat(out) );
}
}
/**
return a Vector of metadata attributes for this document.
@param markupType how to mark up the string returned (e.g., HTML or ASCII).
@return a Vector of triples [attrName, attrType, attrValue].
*/
public Vector presentAttributes(int markupType)
{
debug.dumpTrace("MarcDocument.presentAttributes(): not yet implemented");
return null;
}
/**
Return an Object (almost certainly a String) in some markupType for the given
attribute.
@param markupType how to mark up the string returned (e.g., HTML or ASCII).
@return a Vector of triples [attrName, attrType, attrValue].
*/
public Object presentAttribute(int attrID, int markupType)
{
debug.dumpTrace("MarcDocument.presentAttributes(): not yet implemented");
return null;
}
/**
return a Vector of metadata attributes for this document.
@param markupType how to mark up the string returned (e.g., HTML or ASCII).
@return a Vector of triples [attrName, attrType, attrValue].
*/
public Vector attributes()
{
debug.dumpTrace("SOIFDocument.presentAttributes(): not yet implemented");
return null;
}
}