diff --git a/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java b/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java index 06ffb6fa1..07d334f34 100644 --- a/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java +++ b/src/main/java/com/metaweb/gridworks/commands/edit/CreateProjectCommand.java @@ -24,6 +24,7 @@ import com.metaweb.gridworks.commands.Command; import com.metaweb.gridworks.importers.ExcelImporter; import com.metaweb.gridworks.importers.Importer; import com.metaweb.gridworks.importers.TsvCsvImporter; +import com.metaweb.gridworks.importers.XmlImporter; import com.metaweb.gridworks.model.Project; import com.metaweb.gridworks.util.ParsingUtilities; import com.oreilly.servlet.multipart.FilePart; @@ -301,6 +302,11 @@ public class CreateProjectCommand extends Command { return new ExcelImporter(false); } else if (fileName.endsWith(".xlsx")) { return new ExcelImporter(true); + } else if ( + fileName.endsWith(".xml") || + fileName.endsWith(".rss") + ) { + return new XmlImporter(); } } diff --git a/src/main/java/com/metaweb/gridworks/importers/XmlImportUtilities.java b/src/main/java/com/metaweb/gridworks/importers/XmlImportUtilities.java new file mode 100644 index 000000000..3bc6b190a --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/importers/XmlImportUtilities.java @@ -0,0 +1,395 @@ +package com.metaweb.gridworks.importers; + +import java.io.InputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import com.metaweb.gridworks.model.Cell; +import com.metaweb.gridworks.model.Project; +import com.metaweb.gridworks.model.Row; +import com.metaweb.gridworks.model.Column; + +public class XmlImportUtilities { + static protected class RecordElementCandidate { + String[] path; + int count; + } + + static public class ImportColumnGroup { + public String name = ""; + + public Map subgroups = new HashMap(); + public Map columns = new HashMap(); + } + + static public class ImportColumn { + public int cellIndex; + public String name; + } + + static public class ImportRecord { + List> rows = new LinkedList>(); + List columnEmptyRowIndices = new ArrayList(); + } + + static public String[] detectRecordElement(InputStream inputStream) { + List candidates = new ArrayList(); + + try { + XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); + + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.START_ELEMENT) { + RecordElementCandidate candidate = + detectRecordElement( + parser, + new String[] { parser.getLocalName() }); + + if (candidate != null) { + candidates.add(candidate); + } + } + } + } catch (Exception e) { + // silent + // e.printStackTrace(); + } + + if (candidates.size() > 0) { + sortRecordElementCandidates(candidates); + + return candidates.get(0).path; + } + return null; + } + + static protected RecordElementCandidate detectRecordElement(XMLStreamReader parser, String[] path) { + List candidateList = new ArrayList(); + + Map candidates = new HashMap(); + + try { + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.END_ELEMENT) { + break; + } else if (eventType == XMLStreamConstants.START_ELEMENT) { + String tagName = parser.getLocalName(); + + candidates.put(tagName, candidates.containsKey(tagName) ? candidates.get(tagName) + 1 : 1); + + String[] path2 = new String[path.length + 1]; + System.arraycopy(path, 0, path2, 0, path.length); + path2[path.length] = tagName; + + RecordElementCandidate c = detectRecordElement(parser, path2); + if (c != null) { + candidateList.add(c); + } + } + } + } catch (Exception e) { + // silent + // e.printStackTrace(); + } + + if (candidates.size() > 0) { + List ourCandidateList = new ArrayList(candidates.size()); + for (Entry entry : candidates.entrySet()) { + int count = entry.getValue(); + if (count > 1) { + String[] path2 = new String[path.length + 1]; + System.arraycopy(path, 0, path2, 0, path.length); + path2[path.length] = entry.getKey(); + + RecordElementCandidate candidate = new RecordElementCandidate(); + candidate.path = path2; + candidate.count = count; + ourCandidateList.add(candidate); + } + } + + if (ourCandidateList.size() > 0) { + sortRecordElementCandidates(ourCandidateList); + + RecordElementCandidate ourCandidate = ourCandidateList.get(0); + if (ourCandidate.count > 10) { + return ourCandidate; + } + + candidateList.add(ourCandidate); + } + } + + if (candidateList.size() > 0) { + sortRecordElementCandidates(candidateList); + + return candidateList.get(0); + } + + return null; + } + + static public void sortRecordElementCandidates(List list) { + Collections.sort(list, new Comparator() { + public int compare(RecordElementCandidate o1, RecordElementCandidate o2) { + return o2.count - o1.count; + } + }); + } + + static public void importXml( + InputStream inputStream, + Project project, + String[] recordPath, + ImportColumnGroup rootColumnGroup + ) { + try { + XMLStreamReader parser = XMLInputFactory.newInstance().createXMLStreamReader(inputStream); + + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.START_ELEMENT) { + findRecord(project, parser, recordPath, 0, rootColumnGroup); + } + } + } catch (Exception e) { + // silent + } + } + + static public void createColumnsFromImport( + Project project, + ImportColumnGroup columnGroup + ) { + for (ImportColumn c : columnGroup.columns.values()) { + Column column = new com.metaweb.gridworks.model.Column(c.cellIndex, c.name); + project.columnModel.columns.add(column); + } + + for (ImportColumnGroup g : columnGroup.subgroups.values()) { + createColumnsFromImport(project, g); + } + } + + static protected void findRecord( + Project project, + XMLStreamReader parser, + String[] recordPath, + int pathIndex, + ImportColumnGroup rootColumnGroup + ) throws XMLStreamException { + String tagName = parser.getLocalName(); + if (tagName.equals(recordPath[pathIndex])) { + if (pathIndex < recordPath.length - 1) { + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.START_ELEMENT) { + findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup); + } else if (eventType == XMLStreamConstants.END_ELEMENT) { + break; + } + } + } else { + processRecord(project, parser, rootColumnGroup); + } + } else { + skip(parser); + } + } + + static protected void skip(XMLStreamReader parser) throws XMLStreamException { + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.START_ELEMENT) { + skip(parser); + } else if (eventType == XMLStreamConstants.END_ELEMENT) { + return; + } + } + } + + static protected void processRecord( + Project project, + XMLStreamReader parser, + ImportColumnGroup rootColumnGroup + ) throws XMLStreamException { + ImportRecord record = new ImportRecord(); + + processSubRecord(project, parser, rootColumnGroup, record); + + for (List row : record.rows) { + Row realRow = new Row(row.size()); + + for (int c = 0; c < row.size(); c++) { + Cell cell = row.get(c); + if (cell != null) { + realRow.setCell(c, cell); + } + } + + project.rows.add(realRow); + } + } + + static protected void processSubRecord( + Project project, + XMLStreamReader parser, + ImportColumnGroup columnGroup, + ImportRecord record + ) throws XMLStreamException { + int commonStartingRowIndex = 0; + for (ImportColumn column : columnGroup.columns.values()) { + if (column.cellIndex < record.columnEmptyRowIndices.size()) { + commonStartingRowIndex = Math.max( + commonStartingRowIndex, + record.columnEmptyRowIndices.get(column.cellIndex)); + } + } + + while (parser.hasNext()) { + int eventType = parser.next(); + if (eventType == XMLStreamConstants.START_ELEMENT) { + int attributeCount = parser.getAttributeCount(); + for (int i = 0; i < attributeCount; i++) { + addCell( + project, + columnGroup, + record, + parser.getAttributeLocalName(i), + parser.getAttributeValue(i), + commonStartingRowIndex + ); + } + + processSubRecord( + project, + parser, + getColumnGroup(project, columnGroup, parser.getLocalName()), + record + ); + } else if (eventType == XMLStreamConstants.CDATA || + eventType == XMLStreamConstants.CHARACTERS) { + addCell( + project, + columnGroup, + record, + null, + parser.getText(), + commonStartingRowIndex + ); + } else if (eventType == XMLStreamConstants.END_ELEMENT) { + break; + } + } + } + + static protected void addCell( + Project project, + ImportColumnGroup columnGroup, + ImportRecord record, + String columnLocalName, + Serializable value, + int commonStaringRowIndex + ) { + if (value == null || (value instanceof String && ((String) value).isEmpty())) { + return; + } + + ImportColumn column = getColumn(project, columnGroup, columnLocalName); + int cellIndex = column.cellIndex; + + while (cellIndex >= record.columnEmptyRowIndices.size()) { + record.columnEmptyRowIndices.add(commonStaringRowIndex); + } + int rowIndex = record.columnEmptyRowIndices.get(cellIndex); + + while (rowIndex >= record.rows.size()) { + record.rows.add(new ArrayList()); + } + List row = record.rows.get(rowIndex); + + while (cellIndex >= row.size()) { + row.add(null); + } + + row.set(cellIndex, new Cell(value, null)); + + record.columnEmptyRowIndices.set(cellIndex, rowIndex + 1); + } + + static protected ImportColumn getColumn( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + if (columnGroup.columns.containsKey(localName)) { + return columnGroup.columns.get(localName); + } + + ImportColumn column = createColumn(project, columnGroup, localName); + columnGroup.columns.put(localName, column); + + return column; + } + + static protected ImportColumn createColumn( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + ImportColumn newColumn = new ImportColumn(); + + newColumn.name = + columnGroup.name.length() == 0 ? + (localName == null ? "Text" : localName) : + (localName == null ? columnGroup.name : (columnGroup.name + " - " + localName)); + + newColumn.cellIndex = project.columnModel.allocateNewCellIndex(); + + return newColumn; + } + + static protected ImportColumnGroup getColumnGroup( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + if (columnGroup.subgroups.containsKey(localName)) { + return columnGroup.subgroups.get(localName); + } + + ImportColumnGroup subgroup = createColumnGroup(project, columnGroup, localName); + columnGroup.subgroups.put(localName, subgroup); + + return subgroup; + } + + static protected ImportColumnGroup createColumnGroup( + Project project, + ImportColumnGroup columnGroup, + String localName + ) { + ImportColumnGroup newGroup = new ImportColumnGroup(); + + newGroup.name = + columnGroup.name.length() == 0 ? + (localName == null ? "Text" : localName) : + (localName == null ? columnGroup.name : (columnGroup.name + " - " + localName)); + + return newGroup; + } +} diff --git a/src/main/java/com/metaweb/gridworks/importers/XmlImporter.java b/src/main/java/com/metaweb/gridworks/importers/XmlImporter.java new file mode 100644 index 000000000..0a4690ecf --- /dev/null +++ b/src/main/java/com/metaweb/gridworks/importers/XmlImporter.java @@ -0,0 +1,49 @@ +package com.metaweb.gridworks.importers; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.Reader; +import java.util.Properties; + +import org.apache.commons.lang.NotImplementedException; + +import com.metaweb.gridworks.importers.XmlImportUtilities.ImportColumnGroup; +import com.metaweb.gridworks.model.Project; + +public class XmlImporter implements Importer { + + public boolean takesReader() { + return false; + } + + public void read(Reader reader, Project project, Properties options, int skip, int limit) + throws Exception { + + throw new NotImplementedException(); + } + + public void read(InputStream inputStream, Project project, + Properties options, int skip, int limit) throws Exception { + + BufferedInputStream bis = new BufferedInputStream(inputStream); + + String[] recordPath = null; + { + byte[] buffer = new byte[64 * 1024]; + + bis.mark(buffer.length); + int c = bis.read(buffer); + bis.reset(); + + recordPath = XmlImportUtilities.detectRecordElement( + new ByteArrayInputStream(buffer, 0, c)); + } + + ImportColumnGroup rootColumnGroup = new ImportColumnGroup(); + + XmlImportUtilities.importXml(bis, project, recordPath, rootColumnGroup); + XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup); + } + +}