2010-09-22 19:04:10 +02:00
|
|
|
package com.google.refine.importers;
|
2010-05-05 01:24:48 +02:00
|
|
|
|
|
|
|
import java.io.ByteArrayInputStream;
|
2010-08-06 07:04:25 +02:00
|
|
|
import java.io.IOException;
|
2010-05-05 01:24:48 +02:00
|
|
|
import java.io.InputStream;
|
|
|
|
import java.io.PushbackInputStream;
|
|
|
|
import java.util.Properties;
|
|
|
|
|
2010-05-26 21:22:38 +02:00
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
2010-09-22 19:04:10 +02:00
|
|
|
import com.google.refine.ProjectMetadata;
|
2010-09-27 18:09:44 +02:00
|
|
|
import com.google.refine.importers.TreeImporter.ImportColumnGroup;
|
2010-09-22 19:04:10 +02:00
|
|
|
import com.google.refine.model.Project;
|
2010-05-05 01:24:48 +02:00
|
|
|
|
2010-08-06 07:04:25 +02:00
|
|
|
public class XmlImporter implements StreamImporter {
|
2010-05-05 01:24:48 +02:00
|
|
|
|
2010-05-26 21:22:38 +02:00
|
|
|
final static Logger logger = LoggerFactory.getLogger("XmlImporter");
|
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
public static final int BUFFER_SIZE = 64 * 1024;
|
2010-05-26 15:18:48 +02:00
|
|
|
|
2010-08-06 07:04:25 +02:00
|
|
|
@Override
|
2010-05-05 01:24:48 +02:00
|
|
|
public void read(
|
2010-05-26 15:18:48 +02:00
|
|
|
InputStream inputStream,
|
2010-05-05 01:24:48 +02:00
|
|
|
Project project,
|
2010-09-17 03:00:23 +02:00
|
|
|
ProjectMetadata metadata, Properties options
|
2010-08-06 07:04:25 +02:00
|
|
|
) throws ImportException {
|
2010-05-26 21:22:38 +02:00
|
|
|
logger.trace("XmlImporter.read");
|
2010-05-05 01:24:48 +02:00
|
|
|
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
2010-05-26 15:18:48 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
String[] recordPath = null;
|
|
|
|
{
|
|
|
|
byte[] buffer = new byte[BUFFER_SIZE];
|
|
|
|
int bytes_read = 0;
|
2010-08-06 07:04:25 +02:00
|
|
|
try {
|
|
|
|
while (bytes_read < BUFFER_SIZE) {
|
|
|
|
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
|
|
|
|
if (c == -1) break;
|
|
|
|
bytes_read +=c ;
|
|
|
|
}
|
|
|
|
pis.unread(buffer, 0, bytes_read);
|
|
|
|
} catch (IOException e) {
|
|
|
|
throw new ImportException("Read error",e);
|
2010-05-05 01:24:48 +02:00
|
|
|
}
|
2010-08-06 07:04:25 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
if (options.containsKey("importer-record-tag")) {
|
|
|
|
recordPath = XmlImportUtilities.detectPathFromTag(
|
2010-05-26 15:18:48 +02:00
|
|
|
new ByteArrayInputStream(buffer, 0, bytes_read),
|
2010-05-05 01:24:48 +02:00
|
|
|
options.getProperty("importer-record-tag"));
|
|
|
|
} else {
|
|
|
|
recordPath = XmlImportUtilities.detectRecordElement(
|
|
|
|
new ByteArrayInputStream(buffer, 0, bytes_read));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-08-25 08:17:08 +02:00
|
|
|
if (recordPath == null)
|
2010-05-29 21:39:07 +02:00
|
|
|
return;
|
2010-08-25 08:17:08 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
2010-05-26 15:18:48 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
XmlImportUtilities.importXml(pis, project, recordPath, rootColumnGroup);
|
|
|
|
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
2010-08-25 08:17:08 +02:00
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
project.columnModel.update();
|
|
|
|
}
|
2010-05-26 15:18:48 +02:00
|
|
|
|
2010-08-06 07:04:25 +02:00
|
|
|
@Override
|
2010-05-26 15:18:48 +02:00
|
|
|
public boolean canImportData(String contentType, String fileName) {
|
|
|
|
if (contentType != null) {
|
|
|
|
contentType = contentType.toLowerCase().trim();
|
|
|
|
|
|
|
|
if("application/xml".equals(contentType) ||
|
|
|
|
"text/xml".equals(contentType) ||
|
|
|
|
"application/rss+xml".equals(contentType) ||
|
|
|
|
"application/atom+xml".equals(contentType)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} else if (fileName != null) {
|
|
|
|
fileName = fileName.toLowerCase();
|
|
|
|
if (
|
|
|
|
fileName.endsWith(".xml") ||
|
|
|
|
fileName.endsWith(".atom") ||
|
|
|
|
fileName.endsWith(".rss")
|
|
|
|
) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2010-05-05 01:24:48 +02:00
|
|
|
}
|