Added extension for importing PC-Axis files.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@2365 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
5aec75696d
commit
94e0369af7
@ -1,10 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry kind="src" path="main/src"/>
|
||||
<classpathentry kind="src" path="extensions/jython/src"/>
|
||||
<classpathentry kind="src" path="server/src"/>
|
||||
<classpathentry kind="src" path="extensions/freebase/src"/>
|
||||
<classpathentry kind="src" path="extensions/gdata/src"/>
|
||||
<classpathentry kind="src" path="extensions/jython/src"/>
|
||||
<classpathentry kind="src" path="extensions/pc-axis/src"/>
|
||||
<classpathentry kind="src" path="extensions/sample/src"/>
|
||||
<classpathentry kind="src" path="main/tests/server/src"/>
|
||||
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
|
||||
|
@ -13,6 +13,7 @@
|
||||
<ant dir="jython/" target="build" />
|
||||
<ant dir="freebase/" target="build" />
|
||||
<ant dir="gdata/" target="build" />
|
||||
<ant dir="pc-axis/" target="build" />
|
||||
</target>
|
||||
|
||||
<target name="clean">
|
||||
@ -21,5 +22,6 @@
|
||||
<ant dir="jython/" target="clean" />
|
||||
<ant dir="freebase/" target="clean" />
|
||||
<ant dir="gdata/" target="clean" />
|
||||
<ant dir="pc-axis/" target="clean" />
|
||||
</target>
|
||||
</project>
|
||||
|
7
extensions/pc-axis/.classpath
Normal file
7
extensions/pc-axis/.classpath
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry kind="src" path="src"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/grefine-all"/>
|
||||
<classpathentry kind="output" path="module/MOD-INF/classes"/>
|
||||
</classpath>
|
17
extensions/pc-axis/.project
Normal file
17
extensions/pc-axis/.project
Normal file
@ -0,0 +1,17 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>refine-pd-extension</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
97
extensions/pc-axis/build.xml
Normal file
97
extensions/pc-axis/build.xml
Normal file
@ -0,0 +1,97 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!--+
|
||||
|
|
||||
| Google Refine PC Axis extension build file
|
||||
|
|
||||
+-->
|
||||
|
||||
<project name="refine-pc-axis" default="build" basedir=".">
|
||||
<property environment="env"/>
|
||||
|
||||
<condition property="version" value="trunk">
|
||||
<not><isset property="version"/></not>
|
||||
</condition>
|
||||
|
||||
<condition property="revision" value="rXXXX">
|
||||
<not><isset property="revision"/></not>
|
||||
</condition>
|
||||
|
||||
<condition property="full_version" value="0.0.0.0">
|
||||
<not><isset property="full_version"/></not>
|
||||
</condition>
|
||||
|
||||
<condition property="dist.dir" value="dist">
|
||||
<not><isset property="dist.dir"/></not>
|
||||
</condition>
|
||||
|
||||
<property name="fullname" value="${name}-${version}-${revision}" />
|
||||
|
||||
<property name="refine.dir" value="${basedir}/../../main" />
|
||||
<property name="refine.webinf.dir" value="${refine.dir}/webapp/WEB-INF" />
|
||||
<property name="refine.modinf.dir" value="${refine.dir}/webapp/modules/core/MOD-INF" />
|
||||
<property name="refine.classes.dir" value="${refine.webinf.dir}/classes" />
|
||||
<property name="refine.lib.dir" value="${refine.webinf.dir}/lib" />
|
||||
<property name="server.dir" value="${basedir}/../../server" />
|
||||
<property name="server.lib.dir" value="${server.dir}/lib" />
|
||||
|
||||
<property name="src.dir" value="${basedir}/src" />
|
||||
<property name="module.dir" value="${basedir}/module" />
|
||||
<property name="modinf.dir" value="${module.dir}/MOD-INF" />
|
||||
<property name="lib.dir" value="${modinf.dir}/lib" />
|
||||
<property name="classes.dir" value="${modinf.dir}/classes" />
|
||||
|
||||
<path id="class.path">
|
||||
<fileset dir="${refine.lib.dir}">
|
||||
<include name="**/*.jar" />
|
||||
</fileset>
|
||||
<fileset dir="${server.lib.dir}">
|
||||
<include name="**/*.jar" />
|
||||
</fileset>
|
||||
<pathelement path="${refine.classes.dir}"/>
|
||||
</path>
|
||||
|
||||
<target name="build_java">
|
||||
<mkdir dir="${classes.dir}" />
|
||||
<javac encoding="utf-8" destdir="${classes.dir}" debug="true" includeAntRuntime="no">
|
||||
<src path="${src.dir}"/>
|
||||
<classpath refid="class.path" />
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<target name="build" depends="build_java"/>
|
||||
|
||||
<target name="dist" depends="build">
|
||||
<mkdir dir="${ext.dir}"/>
|
||||
|
||||
<copy todir="${ext.dir}/module">
|
||||
<fileset dir="module">
|
||||
<include name="**/*.*"/>
|
||||
</fileset>
|
||||
</copy>
|
||||
|
||||
<copy todir="${ext.dir}/licenses">
|
||||
<fileset dir="${basedir}/licenses">
|
||||
<include name="**"/>
|
||||
</fileset>
|
||||
</copy>
|
||||
<copy file="${basedir}/LICENSE.txt" tofile="${ext.dir}/LICENSE.txt"/>
|
||||
<copy file="${basedir}/README.txt" tofile="${ext.dir}/README.txt"/>
|
||||
|
||||
<zip destfile="${dist.dir}/google-refine-${fullname}.zip">
|
||||
<zipfileset dir="${ext.dir}/..">
|
||||
<include name="**/**"/>
|
||||
</zipfileset>
|
||||
</zip>
|
||||
|
||||
<delete dir="${ext.dir}"/>
|
||||
</target>
|
||||
|
||||
<target name="clean">
|
||||
<delete dir="${classes.dir}" />
|
||||
</target>
|
||||
|
||||
<target name="distclean">
|
||||
<delete dir="${dist.dir}" />
|
||||
</target>
|
||||
</project>
|
59
extensions/pc-axis/module/MOD-INF/controller.js
Normal file
59
extensions/pc-axis/module/MOD-INF/controller.js
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
* Function invoked to initialize the extension.
|
||||
*/
|
||||
function init() {
|
||||
var IM = Packages.com.google.refine.importing.ImportingManager;
|
||||
IM.registerFormat("text/line-based/pc-axis", "PC-Axis text files", "PCAxisParserUI",
|
||||
new Packages.com.google.refine.pcaxis.PCAxisImporter());
|
||||
IM.registerExtension(".px", "text/line-based/pc-axis");
|
||||
|
||||
var ClientSideResourceManager = Packages.com.google.refine.ClientSideResourceManager;
|
||||
|
||||
// Script files to inject into /index page
|
||||
ClientSideResourceManager.addPaths(
|
||||
"index/scripts",
|
||||
module,
|
||||
[
|
||||
"scripts/pc-axis-parser-ui.js"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
* Function invoked to handle each request in a custom way.
|
||||
*/
|
||||
function process(path, request, response) {
|
||||
}
|
4
extensions/pc-axis/module/MOD-INF/module.properties
Normal file
4
extensions/pc-axis/module/MOD-INF/module.properties
Normal file
@ -0,0 +1,4 @@
|
||||
name = pc-axis
|
||||
description = Google Refine extension for PC Axis file format
|
||||
templating.macros =
|
||||
requires = core
|
30
extensions/pc-axis/module/scripts/pc-axis-parser-ui.html
Normal file
30
extensions/pc-axis/module/scripts/pc-axis-parser-ui.html
Normal file
@ -0,0 +1,30 @@
|
||||
<div class="grid-layout layout-loose layout-full"><table>
|
||||
<tr>
|
||||
<td><div class="grid-layout layout-tighter"><table>
|
||||
<tr>
|
||||
<td width="1%">Character encoding</td>
|
||||
<td><input bind="encodingInput"></input></td>
|
||||
</tr>
|
||||
</table></div></td>
|
||||
<td colspan="2"><div class="grid-layout layout-tighter layout-full"><table>
|
||||
<tr>
|
||||
<td style="text-align: right;"> </td>
|
||||
<td width="1%"><button class="button" bind="previewButton">Update Preview</button></td>
|
||||
</tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2"><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td width="1%"><input type="checkbox" bind="skipCheckbox" id="$skip" /></td>
|
||||
<td><label for="$skip">Discard initial</label></td>
|
||||
<td><input bind="skipInput" type="text" class="lightweight" size="2" value="0" />
|
||||
<label for="$skip">row(s) of data</label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" id="$limit" /></td>
|
||||
<td><label for="$limit">Load at most</label></td>
|
||||
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" />
|
||||
<label for="$limit">row(s) of data</label></td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" id="$include-file-sources" /></td>
|
||||
<td><label for="$include-file-sources">Store file source<br/>(file names, URLs)<br/>in each row</label></td></tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
</table></div>
|
158
extensions/pc-axis/module/scripts/pc-axis-parser-ui.js
Normal file
158
extensions/pc-axis/module/scripts/pc-axis-parser-ui.js
Normal file
@ -0,0 +1,158 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
Refine.PCAxisParserUI = function(controller, jobID, job, format, config,
|
||||
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
|
||||
|
||||
this._controller = controller;
|
||||
this._jobID = jobID;
|
||||
this._job = job;
|
||||
this._format = format;
|
||||
this._config = config;
|
||||
|
||||
this._dataContainer = dataContainerElmt;
|
||||
this._progressContainer = progressContainerElmt;
|
||||
this._optionContainer = optionContainerElmt;
|
||||
|
||||
this._timerID = null;
|
||||
this._initialize();
|
||||
this._updatePreview();
|
||||
};
|
||||
Refine.DefaultImportingController.parserUIs.PCAxisParserUI = Refine.PCAxisParserUI;
|
||||
|
||||
Refine.PCAxisParserUI.prototype.confirmReadyToCreateProject = function() {
|
||||
return true;
|
||||
};
|
||||
|
||||
Refine.PCAxisParserUI.prototype.dispose = function() {
|
||||
if (this._timerID !== null) {
|
||||
window.clearTimeout(this._timerID);
|
||||
this._timerID = null;
|
||||
}
|
||||
};
|
||||
|
||||
Refine.PCAxisParserUI.prototype.getOptions = function() {
|
||||
var options = {
|
||||
encoding: $.trim(this._optionContainerElmts.encodingInput[0].value)
|
||||
};
|
||||
|
||||
var parseIntDefault = function(s, def) {
|
||||
try {
|
||||
var n = parseInt(s,10);
|
||||
if (!isNaN(n)) {
|
||||
return n;
|
||||
}
|
||||
} catch (e) {
|
||||
// Ignore
|
||||
}
|
||||
return def;
|
||||
};
|
||||
|
||||
if (this._optionContainerElmts.limitCheckbox[0].checked) {
|
||||
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
|
||||
} else {
|
||||
options.limit = -1;
|
||||
}
|
||||
if (this._optionContainerElmts.skipCheckbox[0].checked) {
|
||||
options.skipDataLines = parseIntDefault(this._optionContainerElmts.skipInput[0].value, -1);
|
||||
} else {
|
||||
options.skipDataLines = -1;
|
||||
}
|
||||
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||
|
||||
return options;
|
||||
};
|
||||
|
||||
Refine.PCAxisParserUI.prototype._initialize = function() {
|
||||
var self = this;
|
||||
|
||||
this._optionContainer.unbind().empty().html(
|
||||
DOM.loadHTML("pc-axis", "scripts/pc-axis-parser-ui.html"));
|
||||
this._optionContainerElmts = DOM.bind(this._optionContainer);
|
||||
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
|
||||
|
||||
this._optionContainerElmts.encodingInput
|
||||
.attr('value', this._config.encoding || '')
|
||||
.click(function() {
|
||||
Encoding.selectEncoding($(this), function() {
|
||||
self._updatePreview();
|
||||
});
|
||||
});
|
||||
|
||||
if (this._config.limit > 0) {
|
||||
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||
}
|
||||
if (this._config.skipDataLines > 0) {
|
||||
this._optionContainerElmts.skipCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.skipInput.value[0].value = this._config.skipDataLines.toString();
|
||||
}
|
||||
if (this._config.includeFileSources) {
|
||||
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
|
||||
}
|
||||
|
||||
var onChange = function() {
|
||||
self._scheduleUpdatePreview();
|
||||
};
|
||||
this._optionContainer.find("input").bind("change", onChange);
|
||||
this._optionContainer.find("select").bind("change", onChange);
|
||||
};
|
||||
|
||||
Refine.PCAxisParserUI.prototype._scheduleUpdatePreview = function() {
|
||||
if (this._timerID !== null) {
|
||||
window.clearTimeout(this._timerID);
|
||||
this._timerID = null;
|
||||
}
|
||||
|
||||
var self = this;
|
||||
this._timerID = window.setTimeout(function() {
|
||||
self._timerID = null;
|
||||
self._updatePreview();
|
||||
}, 500); // 0.5 second
|
||||
};
|
||||
|
||||
Refine.PCAxisParserUI.prototype._updatePreview = function() {
|
||||
var self = this;
|
||||
|
||||
this._progressContainer.show();
|
||||
|
||||
this._controller.updateFormatAndOptions(this.getOptions(), function(result) {
|
||||
if (result.status == "ok") {
|
||||
self._controller.getPreviewData(function(projectData) {
|
||||
self._progressContainer.hide();
|
||||
|
||||
new Refine.PreviewTable(projectData, self._dataContainer.unbind().empty());
|
||||
});
|
||||
}
|
||||
});
|
||||
};
|
@ -0,0 +1,92 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.pcaxis;
|
||||
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.Reader;
|
||||
import java.util.List;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.TabularImportingParserBase;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class PCAxisImporter extends TabularImportingParserBase {
|
||||
static final Logger logger = LoggerFactory.getLogger(PCAxisImporter.class);
|
||||
|
||||
public PCAxisImporter() {
|
||||
super(false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject createParserUIInitializationData(
|
||||
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = new JSONObject();
|
||||
JSONUtilities.safePut(options, "includeFileSources", fileRecords.size() > 1);
|
||||
JSONUtilities.safePut(options, "skipDataLines", 0);
|
||||
JSONUtilities.safePut(options, "limit", -1);
|
||||
return options;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
Reader reader,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
LineNumberReader lnReader = new LineNumberReader(reader);
|
||||
TableDataReader dataReader = new PCAxisTableDataReader(lnReader, exceptions);
|
||||
|
||||
// Stuff these settings to get TabularImportingParserBase.readTable
|
||||
// to behave as we want.
|
||||
JSONUtilities.safePut(options, "ignoreLines", -1);
|
||||
JSONUtilities.safePut(options, "headerLines", 1);
|
||||
JSONUtilities.safePut(options, "storeBlankRows", true);
|
||||
JSONUtilities.safePut(options, "storeBlankCellsAsNulls", true);
|
||||
|
||||
TabularImportingParserBase.readTable(
|
||||
project, metadata, job, dataReader,
|
||||
fileSource, limit, options, exceptions);
|
||||
}
|
||||
}
|
@ -0,0 +1,275 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.pcaxis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.LineNumberReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.refine.importers.TabularImportingParserBase.TableDataReader;
|
||||
|
||||
public class PCAxisTableDataReader implements TableDataReader {
|
||||
final private static class Dimension {
|
||||
String name;
|
||||
List<String> values;
|
||||
int next;
|
||||
}
|
||||
|
||||
final private LineNumberReader _lnReader;
|
||||
final private List<Exception> exceptions;
|
||||
|
||||
String _line = null;
|
||||
List<Dimension> _dimensions = null;
|
||||
|
||||
public PCAxisTableDataReader(LineNumberReader lnReader, List<Exception> exceptions) {
|
||||
this._lnReader = lnReader;
|
||||
this.exceptions = exceptions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Object> getNextRowOfCells() throws IOException {
|
||||
if (_line == null) {
|
||||
_line = _lnReader.readLine();
|
||||
}
|
||||
if (_line == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (_dimensions == null) {
|
||||
return parseMetadataPrologueForColumnNames();
|
||||
} else {
|
||||
return parseForNextDataRow();
|
||||
}
|
||||
}
|
||||
|
||||
private List<Object> parseMetadataPrologueForColumnNames() throws IOException {
|
||||
_dimensions = new LinkedList<Dimension>();
|
||||
|
||||
List<String> dimensionNames = new ArrayList<String>();
|
||||
while (_line != null) {
|
||||
int equal = _line.indexOf('=');
|
||||
if (equal < 0 || _line.startsWith("DATA=")) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Save the line in case parseValues() changes it.
|
||||
String savedLine = _line;
|
||||
|
||||
List<String> values = parseMetadataValues(equal + 1, exceptions);
|
||||
|
||||
if (savedLine.startsWith("VALUES(\"")) {
|
||||
Dimension dimension = new Dimension();
|
||||
dimension.name = savedLine.substring(8, equal - 2);
|
||||
dimension.values = values;
|
||||
_dimensions.add(dimension);
|
||||
} else if (savedLine.startsWith("STUB=")) {
|
||||
dimensionNames.addAll(0, values);
|
||||
} else if (savedLine.startsWith("HEADING=")) {
|
||||
dimensionNames.addAll(values);
|
||||
}
|
||||
_line = _lnReader.readLine();
|
||||
}
|
||||
|
||||
final Map<String, Integer> dimensionNameToOrder = new HashMap<String, Integer>();
|
||||
for (int i = 0; i < dimensionNames.size(); i++) {
|
||||
dimensionNameToOrder.put(dimensionNames.get(i), dimensionNames.size() - i - 1);
|
||||
}
|
||||
|
||||
Collections.sort(_dimensions, new Comparator<Dimension>() {
|
||||
@Override
|
||||
public int compare(Dimension d0, Dimension d1) {
|
||||
return dimensionNameToOrder.get(d0.name)
|
||||
.compareTo(dimensionNameToOrder.get(d1.name));
|
||||
}
|
||||
});
|
||||
|
||||
List<Object> columnNames = new LinkedList<Object>();
|
||||
if (_dimensions.size() > 0) {
|
||||
for (int i = _dimensions.size() - 1; i > 0; i--) {
|
||||
Dimension d = _dimensions.get(i);
|
||||
columnNames.add(d.name);
|
||||
}
|
||||
columnNames.addAll(_dimensions.get(0).values);
|
||||
}
|
||||
return columnNames;
|
||||
}
|
||||
|
||||
private List<String> parseMetadataValues(int start, List<Exception> _exceptions) throws IOException {
|
||||
List<String> values = new ArrayList<String>();
|
||||
outer:while (_line != null && start < _line.length()) {
|
||||
char c = _line.charAt(start);
|
||||
if (c == '"') {
|
||||
// A string
|
||||
StringBuffer sb = new StringBuffer();
|
||||
inner:while (_line != null && start < _line.length()) {
|
||||
int close = _line.indexOf('"', start + 1);
|
||||
if (close < 0) {
|
||||
// Exceptional case of missing closing "
|
||||
_exceptions.add(new Exception(
|
||||
"Missing closing quotation mark on line " + _lnReader.getLineNumber()));
|
||||
|
||||
sb.append(_line.substring(start + 1));
|
||||
values.add(sb.toString());
|
||||
break outer;
|
||||
} else {
|
||||
sb.append(_line.substring(start + 1, close));
|
||||
if (close == _line.length() - 1) {
|
||||
// String value continues on next line
|
||||
_line = _lnReader.readLine();
|
||||
start = 0;
|
||||
if (_line != null && _line.length() > 0) {
|
||||
c = _line.charAt(0);
|
||||
if (c == '"') {
|
||||
continue inner;
|
||||
}
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
start = close + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
values.add(sb.toString());
|
||||
} else {
|
||||
// A number or identifier
|
||||
int comma = customIndexOf(_line, ',', start + 1);
|
||||
int semicolon = customIndexOf(_line, ';', start + 1);
|
||||
int space = customIndexOf(_line, ' ', start + 1);
|
||||
int end = Math.min(comma, Math.min(semicolon, space));
|
||||
values.add(_line.substring(start, end));
|
||||
start = end;
|
||||
}
|
||||
|
||||
if (start == _line.length()) {
|
||||
// End of line but no ;. Continue onto next line.
|
||||
_line = _lnReader.readLine();
|
||||
start = 0;
|
||||
} else {
|
||||
c = _line.charAt(start);
|
||||
if (c == ';' || c == ')') {
|
||||
break;
|
||||
} else if (c == ',' || c == ' ' || c == '-') {
|
||||
start++;
|
||||
if (start == _line.length()) {
|
||||
_line = _lnReader.readLine();
|
||||
start = 0;
|
||||
}
|
||||
} else {
|
||||
// Exceptional case.
|
||||
_exceptions.add(new Exception(
|
||||
"Unrecognized character " + c + " on line " + _lnReader.getLineNumber()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
private List<Object> parseForNextDataRow() throws IOException {
|
||||
List<Object> cells = getNextBatchOfDataValues(_dimensions.get(0).values.size());
|
||||
if (cells.size() == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (_dimensions.size() > 0) {
|
||||
for (int i = 1; i < _dimensions.size(); i++) {
|
||||
Dimension d = _dimensions.get(i);
|
||||
if (d.next == d.values.size()) {
|
||||
d.next = 0;
|
||||
if (i < _dimensions.size() - 1) {
|
||||
_dimensions.get(i + 1).next++;
|
||||
}
|
||||
}
|
||||
cells.add(0, d.values.get(d.next));
|
||||
if (i == 1) {
|
||||
d.next++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return cells;
|
||||
}
|
||||
|
||||
private List<Object> getNextBatchOfDataValues(int expectedCount) throws IOException {
|
||||
List<Object> cells = new LinkedList<Object>();
|
||||
|
||||
int start = _line.startsWith("DATA=") ? 5 : 0;
|
||||
while (_line != null) {
|
||||
int end = Math.min(
|
||||
customIndexOf(_line, ';', start),
|
||||
Math.min(
|
||||
customIndexOf(_line, ' ', start),
|
||||
customIndexOf(_line, '\t', start)));
|
||||
|
||||
if (end > start) {
|
||||
cells.add(_line.substring(start, end));
|
||||
}
|
||||
|
||||
while (end < _line.length()) {
|
||||
if (Character.isWhitespace(_line.charAt(end))) {
|
||||
end++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (end == _line.length()) {
|
||||
_line = _lnReader.readLine();
|
||||
start = 0;
|
||||
} else if (_line.charAt(end) == ';') {
|
||||
_line = _lnReader.readLine();
|
||||
break;
|
||||
} else {
|
||||
start = end;
|
||||
}
|
||||
|
||||
if (cells.size() == expectedCount) {
|
||||
_line = _line.substring(start);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
|
||||
static private int customIndexOf(String s, char c, int start) {
|
||||
int i = s.indexOf(c, start);
|
||||
return i < 0 ? s.length() : i;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user