2010-10-20 22:45:52 +02:00
|
|
|
/*
|
|
|
|
|
|
|
|
Copyright 2010, Google Inc.
|
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions are
|
|
|
|
met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above
|
|
|
|
copyright notice, this list of conditions and the following disclaimer
|
|
|
|
in the documentation and/or other materials provided with the
|
|
|
|
distribution.
|
|
|
|
* Neither the name of Google Inc. nor the names of its
|
|
|
|
contributors may be used to endorse or promote products derived from
|
|
|
|
this software without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
2010-09-22 19:04:10 +02:00
|
|
|
package com.google.refine.importers;
|
2010-05-05 01:24:48 +02:00
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
import java.io.FileInputStream;
|
2010-08-06 07:04:25 +02:00
|
|
|
import java.io.FileNotFoundException;
|
2010-05-05 01:24:48 +02:00
|
|
|
import java.io.FileOutputStream;
|
2010-08-06 07:04:25 +02:00
|
|
|
import java.io.IOException;
|
2010-05-05 01:24:48 +02:00
|
|
|
import java.io.InputStream;
|
|
|
|
import java.io.OutputStream;
|
2011-08-02 05:34:47 +02:00
|
|
|
import java.util.List;
|
2010-05-05 01:24:48 +02:00
|
|
|
|
2011-08-02 05:34:47 +02:00
|
|
|
import org.json.JSONObject;
|
2010-05-05 01:24:48 +02:00
|
|
|
import org.marc4j.MarcPermissiveStreamReader;
|
|
|
|
import org.marc4j.MarcWriter;
|
|
|
|
import org.marc4j.MarcXmlWriter;
|
|
|
|
import org.marc4j.marc.Record;
|
|
|
|
|
2010-09-22 19:04:10 +02:00
|
|
|
import com.google.refine.ProjectMetadata;
|
2011-08-02 05:34:47 +02:00
|
|
|
import com.google.refine.importers.tree.ImportColumnGroup;
|
|
|
|
import com.google.refine.importing.ImportingJob;
|
2010-09-22 19:04:10 +02:00
|
|
|
import com.google.refine.model.Project;
|
2010-05-05 01:24:48 +02:00
|
|
|
|
2011-08-02 05:34:47 +02:00
|
|
|
public class MarcImporter extends XmlImporter {
|
2010-08-06 07:04:25 +02:00
|
|
|
@Override
|
2011-08-02 05:34:47 +02:00
|
|
|
public void parseOneFile(Project project, ProjectMetadata metadata,
|
|
|
|
ImportingJob job, String fileSource, InputStream inputStream,
|
|
|
|
ImportColumnGroup rootColumnGroup, int limit, JSONObject options,
|
|
|
|
List<Exception> exceptions) {
|
|
|
|
|
2010-08-06 07:04:25 +02:00
|
|
|
File tempFile;
|
|
|
|
try {
|
2010-09-22 20:36:33 +02:00
|
|
|
tempFile = File.createTempFile("refine-import-", ".marc.xml");
|
2010-08-06 07:04:25 +02:00
|
|
|
} catch (IOException e) {
|
2011-08-02 05:34:47 +02:00
|
|
|
exceptions.add(new ImportException("Unexpected error creating temp file", e));
|
|
|
|
return;
|
2010-08-06 07:04:25 +02:00
|
|
|
}
|
2011-08-02 05:34:47 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
try {
|
|
|
|
OutputStream os = new FileOutputStream(tempFile);
|
|
|
|
try {
|
|
|
|
MarcWriter writer = new MarcXmlWriter(os, true);
|
2011-08-02 05:34:47 +02:00
|
|
|
|
|
|
|
MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader(
|
|
|
|
inputStream, true, true);
|
2010-05-05 01:24:48 +02:00
|
|
|
while (reader.hasNext()) {
|
|
|
|
Record record = reader.next();
|
2011-08-02 05:34:47 +02:00
|
|
|
writer.write(record);
|
2010-05-05 01:24:48 +02:00
|
|
|
}
|
|
|
|
writer.close();
|
|
|
|
} finally {
|
2010-08-06 07:04:25 +02:00
|
|
|
try {
|
|
|
|
os.close();
|
|
|
|
} catch (IOException e) {
|
|
|
|
// Just ignore - not much we can do anyway
|
|
|
|
}
|
2010-05-05 01:24:48 +02:00
|
|
|
}
|
2010-05-26 15:18:48 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
InputStream is = new FileInputStream(tempFile);
|
|
|
|
try {
|
2011-08-02 05:34:47 +02:00
|
|
|
super.parseOneFile(project, metadata, job, fileSource, inputStream,
|
|
|
|
rootColumnGroup, limit, options, exceptions);
|
2010-05-05 01:24:48 +02:00
|
|
|
} finally {
|
2010-08-06 07:04:25 +02:00
|
|
|
try {
|
|
|
|
is.close();
|
|
|
|
} catch (IOException e) {
|
|
|
|
// Just ignore - not much we can do anyway
|
|
|
|
}
|
2010-05-05 01:24:48 +02:00
|
|
|
}
|
2010-08-06 07:04:25 +02:00
|
|
|
} catch (FileNotFoundException e) {
|
2011-08-02 05:34:47 +02:00
|
|
|
exceptions.add(new ImportException("Input file not found", e));
|
|
|
|
return;
|
2010-05-05 01:24:48 +02:00
|
|
|
} finally {
|
|
|
|
tempFile.delete();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|