DCBatch.java

Date HTML Created

27 Apr 2010

 
package edu.umn.dspace.batch_upload;

import java.io.File;
import java.io.FileWriter;
import java.io.StringWriter;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import jxl.*;
import java.util.HashMap;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.regex.*;
import java.lang.Integer;
import org.dspace.core.ConfigurationManager;

public class DCBatch {
  private static File inputFile = null;
  private static File outputFolder = null;
  private static File fileFolder = null;
  private static String BatchRootDir = ConfigurationManager.getProperty("batch.ingest.dir");

  public static HashMap headerLocations = new HashMap();
  
  public static StringWriter xml;
  public static FileWriter xmlFile;
  public static String DCTreeRoot;
  
  public static final Pattern isoRegEx = Pattern.compile("^([\\+-]?\\d{4}(?!\\d{2}\\b))((-?)((0[1-9]|1[0-2])(\\3([12]\\d|0[1-9]|3[01]))?|W([0-4]\\d|5[0-2])-?[1-7]|(00[1-9]|0[1-9]\\d|[12]\\d{2}|3([0-5]\\d|6[1-6])))([T\\s](([01]\\d|2[0-3])((:?)[0-5]\\d)?|24\\:?00)?(\\15([0-5]\\d))?([zZ]|([\\+-])([01]\\d|2[0-3]):?([0-5]\\d)?)?)?)?$");

  public static void main(String[] args) {
    String NewIngestDir = args[0];
    // give the ouput file a name that is not likely to be chosen by users submitting a run
    outputFolder = new File(BatchRootDir + "/temp_output_dir_for_batch_aaa");
    inputFile = new File(BatchRootDir + "/" + NewIngestDir + "/IngestMetaData.xls");
    fileFolder = new File(BatchRootDir + "/" + NewIngestDir + "/Assets_to_Ingest");
    
    if (!outputFolder.exists()) {
      outputFolder.delete();
    }
    outputFolder.mkdir();
    
    if (!fileFolder.exists()) {
      System.out.println("fileFolder: " + fileFolder);
      System.out.println("ERROR: fileFolder does not exist.");
      System.exit(1);
    }
    if (!fileFolder.isDirectory()) {
      System.out.println("ERROR: fileFolder is not a directory.");
      System.exit(1);
    }
    Workbook workbook = null;
    try {
      workbook = Workbook.getWorkbook(inputFile);
    } catch (Exception err) {
      System.out.println("Error: The input file could not be read.");
      System.out.println(err.getMessage());
      System.exit(1);
    }
    
    Sheet sheet = workbook.getSheet(0);
    Cell cell, headerCell;
    ProcessBuilder procBuild;
    Process proc;
    int rows, cols;
    
    cols = sheet.getColumns();
    System.out.println("Found " + cols + " columns.");
    rows = sheet.getRows();
    System.out.println("Found " + rows + " rows.");
    
    System.out.println("Populating header locations.");
    for (int c=0; c\n");
    xml.write("\n");
    xml.write("  " + eperson + "\n");
    xml.write("  " + collectionId + "\n");
    xml.write("\n");
    
    try {
      xmlFile = new FileWriter(outputFolder.getAbsolutePath() + "/" + DCTreeRoot + "/config.xml");
      xmlFile.write(xml.toString());
      xmlFile.close();
      xml = null;
    } catch (Exception err) {
      System.out.println("ERROR: " + err);
    }
    
    String filename, title, date, data, element, qualifier;
    File fileObj, ingestDir;
    FileWriter contentsFile, dublinCoreFile;
    boolean copySuccess;
    Matcher isoMatcher;
    String[] otherColumns = new String[10];
    int dots;
    
    for (int r=1; r\n");
      xml.write("\n");
      cell_x= Integer.parseInt(headerLocations.get("dc.title").toString());
      title = sheet.getCell(cell_x,r).getContents();
      
      if (title.trim().equals("")) {
        System.out.println("ERROR: title of row " + r + " was null!");
      }
      xml.write("\n");
      xml.write(title + "\n");
      xml.write("\n");
      
      cell_x= Integer.parseInt(headerLocations.get("dc.date.issued").toString());
      date = sheet.getCell(cell_x,r).getContents();
      
      if (date.trim().equals("")) {
        System.out.println("ERROR: date of row " + r + " was null!");
      }
      
      isoMatcher = isoRegEx.matcher(date);
      
      if (!isoMatcher.matches()) {
        System.out.println("ERROR: date of row " + r + " was not valid!");
      }
      
      
      xml.write("\n");
      xml.write(date + "\n");
      xml.write("\n");
      
      otherColumns[0] = "dc.title.alternative";
      otherColumns[1] = "dc.contributor.author";
      otherColumns[2] = "dc.contributor.editor";
      otherColumns[3] = "dc.subject";
      otherColumns[4] = "dc.subject.other";
      otherColumns[5] = "dc.identifier.citation";
      otherColumns[6] = "dc.relation.ispartofseries";
      otherColumns[7] = "dc.description.abstract";
      otherColumns[8] = "dc.description";
      otherColumns[9] = "dc.identifier.govdoc";
      
      for (int i=0; i < otherColumns.length; i++) {
        String columnName = otherColumns[i];
        cell_x = Integer.parseInt(headerLocations.get(columnName).toString());
        data = sheet.getCell(cell_x ,r).getContents();
        element = "";
        qualifier = "none";
        
        dots = elementCount(columnName, ".");
        
        if (dots == 1) {
          element = columnName.substring(columnName.indexOf(".") + 1);
        } else {
          element = columnName.substring(columnName.indexOf(".") + 1, columnName.lastIndexOf("."));
          qualifier = columnName.substring(columnName.lastIndexOf(".") + 1);
        }
        
        if (data.trim().equals("")) {
          continue;
        }
        String delimiter = "\\;\\;";
        String[] separate_values = data.split(delimiter);
        for(int ii =0; ii < separate_values.length ; ii++){
           xml.write("\n");
           xml.write(separate_values[ii] + "\n");
           xml.write("\n");
        }
      }
      xml.write("");
      
      try {
        dublinCoreFile = new FileWriter(ingestDir.getAbsolutePath() + "/dublin_core.xml");
        dublinCoreFile.write(xml.toString());
        dublinCoreFile.close();
      } catch (Exception err) {
        System.out.println("ERROR: " + err);
      }
      
    }
  }
  
  public static boolean copyfile(File srFile, File dtFile) {
    return copyfile(srFile.getAbsolutePath(), dtFile.getAbsolutePath());
  }
  
  public static boolean copyfile(File srFile, String dtFile) {
    return copyfile(srFile.getAbsolutePath(), dtFile);
  }
  
  public static boolean copyfile(String srFile, File dtFile) {
    return copyfile(srFile, dtFile.getAbsolutePath());
  }
  
  public static boolean copyfile(String srFile, String dtFile) {
    try {
      File f1 = new File(srFile);
      File f2 = new File(dtFile);
      InputStream in = new FileInputStream(f1);
      
      //For Append the file.
      //      OutputStream out = new FileOutputStream(f2,true);
      
      //For Overwrite the file.
      OutputStream out = new FileOutputStream(f2);
      
      byte[] buf = new byte[1024];
      int len;
      while ((len = in.read(buf)) > 0){
        out.write(buf, 0, len);
      }
      in.close();
      out.close();
      System.out.println("File copied.");
    }
    catch(java.io.FileNotFoundException ex){
      return false;
      //System.out.println(ex.getMessage() + " in the specified directory.");
      //System.exit(0);
    }
    catch(java.io.IOException e){
      return false;
      //System.out.println(e.getMessage());
    }
    return true;
  }
  
  public static int elementCount(String in, String element) {
    int index = -1;
    int returnInt = 0;
    do {
      index = in.indexOf(element, index + 1);
      if (index > -1) {
        returnInt++;
      }
    } while (index > -1);
    
    return returnInt;
  }
}