2010-09-22 19:04:10 +02:00
|
|
|
package com.google.refine.importers;
|
2010-05-05 01:24:48 +02:00
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
import java.io.FileInputStream;
|
2010-08-06 07:04:25 +02:00
|
|
|
import java.io.FileNotFoundException;
|
2010-05-05 01:24:48 +02:00
|
|
|
import java.io.FileOutputStream;
|
2010-08-06 07:04:25 +02:00
|
|
|
import java.io.IOException;
|
2010-05-05 01:24:48 +02:00
|
|
|
import java.io.InputStream;
|
|
|
|
import java.io.OutputStream;
|
|
|
|
import java.util.Properties;
|
|
|
|
|
|
|
|
import org.marc4j.MarcPermissiveStreamReader;
|
|
|
|
import org.marc4j.MarcWriter;
|
|
|
|
import org.marc4j.MarcXmlWriter;
|
|
|
|
import org.marc4j.marc.Record;
|
|
|
|
|
2010-09-22 19:04:10 +02:00
|
|
|
import com.google.refine.ProjectMetadata;
|
|
|
|
import com.google.refine.model.Project;
|
2010-05-05 01:24:48 +02:00
|
|
|
|
2010-08-06 07:04:25 +02:00
|
|
|
public class MarcImporter implements StreamImporter {
|
2010-05-05 01:24:48 +02:00
|
|
|
|
2010-08-06 07:04:25 +02:00
|
|
|
@Override
|
2010-05-05 01:24:48 +02:00
|
|
|
public void read(
|
2010-05-26 15:18:48 +02:00
|
|
|
InputStream inputStream,
|
2010-05-05 01:24:48 +02:00
|
|
|
Project project,
|
2010-09-17 03:00:23 +02:00
|
|
|
ProjectMetadata metadata, Properties options
|
2010-08-06 07:04:25 +02:00
|
|
|
) throws ImportException {
|
2010-05-05 01:24:48 +02:00
|
|
|
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
|
|
|
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
2010-05-26 15:18:48 +02:00
|
|
|
|
2010-08-06 07:04:25 +02:00
|
|
|
File tempFile;
|
|
|
|
try {
|
2010-09-22 20:36:33 +02:00
|
|
|
tempFile = File.createTempFile("refine-import-", ".marc.xml");
|
2010-08-06 07:04:25 +02:00
|
|
|
} catch (IOException e) {
|
|
|
|
throw new ImportException("Unexpected error creating temp file",e);
|
|
|
|
}
|
2010-05-05 01:24:48 +02:00
|
|
|
try {
|
|
|
|
OutputStream os = new FileOutputStream(tempFile);
|
|
|
|
try {
|
|
|
|
MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader(
|
|
|
|
inputStream,
|
|
|
|
true,
|
|
|
|
true
|
|
|
|
);
|
|
|
|
MarcWriter writer = new MarcXmlWriter(os, true);
|
2010-05-26 15:18:48 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
int count = 0;
|
|
|
|
while (reader.hasNext()) {
|
|
|
|
Record record = reader.next();
|
|
|
|
if (skip <= 0) {
|
|
|
|
if (limit == -1 || count < limit) {
|
|
|
|
writer.write(record);
|
|
|
|
count++;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
skip--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
writer.close();
|
|
|
|
} finally {
|
2010-08-06 07:04:25 +02:00
|
|
|
try {
|
|
|
|
os.close();
|
|
|
|
} catch (IOException e) {
|
|
|
|
// Just ignore - not much we can do anyway
|
|
|
|
}
|
2010-05-05 01:24:48 +02:00
|
|
|
}
|
2010-05-26 15:18:48 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
InputStream is = new FileInputStream(tempFile);
|
|
|
|
try {
|
2010-09-17 03:00:23 +02:00
|
|
|
new XmlImporter().read(is, project, metadata, options);
|
2010-05-05 01:24:48 +02:00
|
|
|
} finally {
|
2010-08-06 07:04:25 +02:00
|
|
|
try {
|
|
|
|
is.close();
|
|
|
|
} catch (IOException e) {
|
|
|
|
// Just ignore - not much we can do anyway
|
|
|
|
}
|
2010-05-05 01:24:48 +02:00
|
|
|
}
|
2010-08-06 07:04:25 +02:00
|
|
|
} catch (FileNotFoundException e) {
|
|
|
|
throw new ImportException("Input file not found", e);
|
2010-05-05 01:24:48 +02:00
|
|
|
} finally {
|
|
|
|
tempFile.delete();
|
|
|
|
}
|
|
|
|
}
|
2010-05-26 15:18:48 +02:00
|
|
|
|
2010-08-06 07:04:25 +02:00
|
|
|
@Override
|
2010-05-26 15:18:48 +02:00
|
|
|
public boolean canImportData(String contentType, String fileName) {
|
|
|
|
if (contentType != null) {
|
|
|
|
contentType = contentType.toLowerCase().trim();
|
|
|
|
|
|
|
|
if ("application/marc".equals(contentType)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} else if (fileName != null) {
|
|
|
|
fileName = fileName.toLowerCase();
|
|
|
|
if (
|
|
|
|
fileName.endsWith(".mrc") ||
|
|
|
|
fileName.endsWith(".marc") ||
|
|
|
|
fileName.contains(".mrc.") ||
|
|
|
|
fileName.contains(".marc.")
|
|
|
|
) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2010-05-05 01:24:48 +02:00
|
|
|
}
|