Initial version of the wikitext importer
This commit is contained in:
parent
9eeba8efe2
commit
e6406f56ef
254
main/src/com/google/refine/importers/WikitextImporter.java
Normal file
254
main/src/com/google/refine/importers/WikitextImporter.java
Normal file
@ -0,0 +1,254 @@
|
|||||||
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.LineNumberReader;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import com.google.common.io.CharStreams;
|
||||||
|
import de.fau.cs.osr.ptk.common.AstVisitor;
|
||||||
|
import de.fau.cs.osr.ptk.common.ParserCommon;
|
||||||
|
|
||||||
|
/*
|
||||||
|
import org.sweble.wikitext.engine.PageId;
|
||||||
|
import org.sweble.wikitext.engine.PageTitle;
|
||||||
|
import org.sweble.wikitext.engine.WtEngineImpl;
|
||||||
|
import org.sweble.wikitext.engine.EngineException;
|
||||||
|
import org.sweble.wikitext.engine.config.WikiConfig;
|
||||||
|
import org.sweble.wikitext.engine.nodes.EngProcessedPage;
|
||||||
|
import org.sweble.wikitext.engine.utils.DefaultConfigEnWp; */
|
||||||
|
import org.sweble.wikitext.parser.ParserConfig;
|
||||||
|
import org.sweble.wikitext.parser.utils.SimpleParserConfig;
|
||||||
|
import org.sweble.wikitext.parser.WikitextParser;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtNode;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtText;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTable;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTableHeader;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTableRow;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtTableCell;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtParsedWikitextPage;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtBody;
|
||||||
|
import org.sweble.wikitext.parser.parser.LinkTargetException;
|
||||||
|
|
||||||
|
import org.sweble.wikitext.parser.WikitextEncodingValidator;
|
||||||
|
import org.sweble.wikitext.parser.WikitextPreprocessor;
|
||||||
|
import org.sweble.wikitext.parser.encval.ValidatedWikitext;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtParsedWikitextPage;
|
||||||
|
import org.sweble.wikitext.parser.nodes.WtPreproWikitextPage;
|
||||||
|
import org.sweble.wikitext.parser.parser.PreprocessorToParserTransformer;
|
||||||
|
import org.sweble.wikitext.parser.preprocessor.PreprocessedWikitext;
|
||||||
|
|
||||||
|
import xtc.parser.ParseException;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public class WikitextImporter extends TabularImportingParserBase {
|
||||||
|
static final Logger logger = LoggerFactory.getLogger(WikitextImporter.class);
|
||||||
|
|
||||||
|
public WikitextImporter() {
|
||||||
|
super(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JSONObject createParserUIInitializationData(
|
||||||
|
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||||
|
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||||
|
|
||||||
|
JSONUtilities.safePut(options, "guessCellValueTypes", false);
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class WikitextTableVisitor extends AstVisitor<WtNode> {
|
||||||
|
|
||||||
|
public List<String> header;
|
||||||
|
public List<List<String>> rows;
|
||||||
|
private List<String> currentRow;
|
||||||
|
private StringBuilder currentCellString;
|
||||||
|
// private String currentCellLink;
|
||||||
|
|
||||||
|
public WikitextTableVisitor() {
|
||||||
|
header = null;
|
||||||
|
rows = new ArrayList<List<String>>();
|
||||||
|
currentCellString = null;
|
||||||
|
// currentCellLink = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean before(WtNode node) {
|
||||||
|
return super.before(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtNode e) {
|
||||||
|
/*
|
||||||
|
System.out.println("ignoring node:");
|
||||||
|
System.out.println(e.getNodeTypeName());
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtParsedWikitextPage e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtBody e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtTable e) {
|
||||||
|
iterate(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtTableHeader e) {
|
||||||
|
currentRow = new ArrayList<String>();
|
||||||
|
iterate(e);
|
||||||
|
header = currentRow;
|
||||||
|
currentRow = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtTableRow e)
|
||||||
|
{
|
||||||
|
if (currentRow == null) {
|
||||||
|
currentRow = new ArrayList<String>();
|
||||||
|
iterate(e);
|
||||||
|
if(currentRow.size() > 0) {
|
||||||
|
rows.add(currentRow);
|
||||||
|
}
|
||||||
|
currentRow = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visit(WtTableCell e)
|
||||||
|
{
|
||||||
|
if (currentRow != null) {
|
||||||
|
currentCellString = new StringBuilder();
|
||||||
|
iterate(e);
|
||||||
|
String cellValue = currentCellString.toString().trim();
|
||||||
|
currentRow.add(cellValue);
|
||||||
|
currentCellString = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void visit(WtText text) {
|
||||||
|
currentCellString.append(text.getContent());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Object after(WtNode node, Object result)
|
||||||
|
{
|
||||||
|
return rows;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
String fileSource,
|
||||||
|
Reader reader,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) {
|
||||||
|
/*
|
||||||
|
final List<Object> columnNames;
|
||||||
|
if (options.has("columnNames")) {
|
||||||
|
columnNames = new ArrayList<Object>();
|
||||||
|
String[] strings = JSONUtilities.getStringArray(options, "columnNames");
|
||||||
|
for (String s : strings) {
|
||||||
|
columnNames.add(s);
|
||||||
|
}
|
||||||
|
JSONUtilities.safePut(options, "headerLines", 1);
|
||||||
|
} else {
|
||||||
|
columnNames = null;
|
||||||
|
JSONUtilities.safePut(options, "headerLines", 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
final LineNumberReader lnReader = new LineNumberReader(reader);
|
||||||
|
|
||||||
|
try {
|
||||||
|
int skip = JSONUtilities.getInt(options, "ignoreLines", -1);
|
||||||
|
while (skip > 0) {
|
||||||
|
lnReader.readLine();
|
||||||
|
skip--;
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.error("Error reading line-based file", e);
|
||||||
|
}
|
||||||
|
JSONUtilities.safePut(options, "ignoreLines", -1); */
|
||||||
|
|
||||||
|
// Set-up a simple wiki configuration
|
||||||
|
ParserConfig parserConfig = new SimpleParserConfig();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Encoding validation
|
||||||
|
|
||||||
|
WikitextEncodingValidator v = new WikitextEncodingValidator();
|
||||||
|
|
||||||
|
String wikitext = CharStreams.toString(reader);
|
||||||
|
String title = "Page title";
|
||||||
|
ValidatedWikitext validated = v.validate(parserConfig, wikitext, title);
|
||||||
|
|
||||||
|
// Pre-processing
|
||||||
|
|
||||||
|
WikitextPreprocessor prep = new WikitextPreprocessor(parserConfig);
|
||||||
|
|
||||||
|
WtPreproWikitextPage prepArticle =
|
||||||
|
(WtPreproWikitextPage) prep.parseArticle(validated, title, false);
|
||||||
|
|
||||||
|
// Parsing
|
||||||
|
|
||||||
|
PreprocessedWikitext ppw = PreprocessorToParserTransformer
|
||||||
|
.transform(prepArticle);
|
||||||
|
|
||||||
|
WikitextParser parser = new WikitextParser(parserConfig);
|
||||||
|
|
||||||
|
WtParsedWikitextPage parsedArticle;
|
||||||
|
parsedArticle = (WtParsedWikitextPage) parser.parseArticle(ppw, title);
|
||||||
|
|
||||||
|
// Compile the retrieved page
|
||||||
|
final WikitextTableVisitor vs = new WikitextTableVisitor();
|
||||||
|
vs.go(parsedArticle);
|
||||||
|
|
||||||
|
TableDataReader dataReader = new TableDataReader() {
|
||||||
|
private int currentRow = 0;
|
||||||
|
@Override
|
||||||
|
public List<Object> getNextRowOfCells() throws IOException {
|
||||||
|
List<Object> row = null;
|
||||||
|
if(currentRow < vs.rows.size()) {
|
||||||
|
List<String> origRow = vs.rows.get(currentRow);
|
||||||
|
row = new ArrayList<Object>();
|
||||||
|
for (int i = 0; i < origRow.size(); i++) {
|
||||||
|
row.add(origRow.get(i));
|
||||||
|
}
|
||||||
|
currentRow++;
|
||||||
|
}
|
||||||
|
return row;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
int headerLines = vs.header != null ? 1 : 0;
|
||||||
|
|
||||||
|
JSONUtilities.safePut(options, "headerLines", headerLines);
|
||||||
|
|
||||||
|
TabularImportingParserBase.readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||||
|
} catch (IOException e1) {
|
||||||
|
e1.printStackTrace();
|
||||||
|
} catch (ParseException e1) {
|
||||||
|
exceptions.add(e1);
|
||||||
|
e1.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,137 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010,2011 Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.tests.importers;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.times;
|
||||||
|
import static org.mockito.Mockito.verify;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.AfterMethod;
|
||||||
|
import org.testng.annotations.BeforeMethod;
|
||||||
|
import org.testng.annotations.BeforeTest;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import com.google.refine.importers.WikitextImporter;
|
||||||
|
|
||||||
|
public class WikitextImporterTests extends ImporterTest {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@BeforeTest
|
||||||
|
public void init() {
|
||||||
|
logger = LoggerFactory.getLogger(this.getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
//constants
|
||||||
|
String SAMPLE_ROW = "NDB_No,Shrt_Desc,Water";
|
||||||
|
|
||||||
|
//System Under Test
|
||||||
|
WikitextImporter importer = null;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@BeforeMethod
|
||||||
|
public void setUp() {
|
||||||
|
super.setUp();
|
||||||
|
importer = new WikitextImporter();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@AfterMethod
|
||||||
|
public void tearDown(){
|
||||||
|
importer = null;
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void readSimpleData() {
|
||||||
|
String input = "\n"
|
||||||
|
+ "{|\n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "| a || b || c \n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "| d || e || f\n"
|
||||||
|
+ "|-\n"
|
||||||
|
+ "|}\n";
|
||||||
|
try {
|
||||||
|
prepareOptions(0, 0, 0, 0, true);
|
||||||
|
parse(input);
|
||||||
|
} catch (Exception e) {
|
||||||
|
Assert.fail("Parsing failed", e);
|
||||||
|
}
|
||||||
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
|
Assert.assertEquals(project.rows.size(), 2);
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||||
|
Assert.assertEquals(project.rows.get(0).cells.get(0).value, "a");
|
||||||
|
Assert.assertEquals(project.rows.get(1).cells.get(2).value, "f");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//--helpers--
|
||||||
|
|
||||||
|
private void parse(String wikitext) {
|
||||||
|
parseOneFile(importer, new StringReader(wikitext));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void prepareOptions(
|
||||||
|
int limit, int skip, int ignoreLines,
|
||||||
|
int headerLines, boolean guessValueType) {
|
||||||
|
|
||||||
|
whenGetIntegerOption("limit", options, limit);
|
||||||
|
whenGetIntegerOption("skipDataLines", options, skip);
|
||||||
|
whenGetIntegerOption("ignoreLines", options, ignoreLines);
|
||||||
|
whenGetIntegerOption("headerLines", options, headerLines);
|
||||||
|
whenGetBooleanOption("guessCellValueTypes", options, guessValueType);
|
||||||
|
whenGetBooleanOption("storeBlankCellsAsNulls", options, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyOptions() {
|
||||||
|
try {
|
||||||
|
verify(options, times(1)).getString("separator");
|
||||||
|
verify(options, times(1)).getInt("limit");
|
||||||
|
verify(options, times(1)).getInt("skipDataLines");
|
||||||
|
verify(options, times(1)).getInt("ignoreLines");
|
||||||
|
verify(options, times(1)).getInt("headerLines");
|
||||||
|
verify(options, times(1)).getBoolean("guessCellValueTypes");
|
||||||
|
verify(options, times(1)).getBoolean("processQuotes");
|
||||||
|
verify(options, times(1)).getBoolean("storeBlankCellsAsNulls");
|
||||||
|
} catch (JSONException e) {
|
||||||
|
Assert.fail("JSON exception",e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
BIN
main/webapp/WEB-INF/lib/swc-parser-lazy-2.0.0.jar
Normal file
BIN
main/webapp/WEB-INF/lib/swc-parser-lazy-2.0.0.jar
Normal file
Binary file not shown.
@ -212,6 +212,7 @@ function registerImporting() {
|
|||||||
IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter());
|
IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfTriplesParserUI", new Packages.com.google.refine.importers.RdfXmlTripleImporter());
|
||||||
IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter());
|
IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter());
|
||||||
IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter());
|
IM.registerFormat("text/marc", "MARC files", "XmlParserUI", new Packages.com.google.refine.importers.MarcImporter());
|
||||||
|
IM.registerFormat("text/wiki", "Wikitext files", "WikitextParserUI", new Packages.com.google.refine.importers.WikitextImporter());
|
||||||
|
|
||||||
IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it
|
IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it
|
||||||
|
|
||||||
@ -344,7 +345,8 @@ function init() {
|
|||||||
"scripts/index/parser-interfaces/excel-parser-ui.js",
|
"scripts/index/parser-interfaces/excel-parser-ui.js",
|
||||||
"scripts/index/parser-interfaces/xml-parser-ui.js",
|
"scripts/index/parser-interfaces/xml-parser-ui.js",
|
||||||
"scripts/index/parser-interfaces/json-parser-ui.js",
|
"scripts/index/parser-interfaces/json-parser-ui.js",
|
||||||
"scripts/index/parser-interfaces/rdf-triples-parser-ui.js"
|
"scripts/index/parser-interfaces/rdf-triples-parser-ui.js",
|
||||||
|
"scripts/index/parser-interfaces/wikitext-parser-ui.js"
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -0,0 +1,237 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
Refine.WikitextParserUI = function(controller, jobID, job, format, config,
|
||||||
|
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
|
||||||
|
this._controller = controller;
|
||||||
|
this._jobID = jobID;
|
||||||
|
this._job = job;
|
||||||
|
this._format = format;
|
||||||
|
this._config = config;
|
||||||
|
|
||||||
|
this._dataContainer = dataContainerElmt;
|
||||||
|
this._progressContainer = progressContainerElmt;
|
||||||
|
this._optionContainer = optionContainerElmt;
|
||||||
|
|
||||||
|
this._timerID = null;
|
||||||
|
this._initialize();
|
||||||
|
this._updatePreview();
|
||||||
|
};
|
||||||
|
Refine.DefaultImportingController.parserUIs.WikitextParserUI = Refine.WikitextParserUI;
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype.dispose = function() {
|
||||||
|
if (this._timerID !== null) {
|
||||||
|
window.clearTimeout(this._timerID);
|
||||||
|
this._timerID = null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype.confirmReadyToCreateProject = function() {
|
||||||
|
return true; // always ready
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype.getOptions = function() {
|
||||||
|
var options = {
|
||||||
|
encoding: $.trim(this._optionContainerElmts.encodingInput[0].value)
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (this._optionContainer.find("input[name='column-separator']:checked")[0].value) {
|
||||||
|
case 'comma':
|
||||||
|
options.separator = ",";
|
||||||
|
break;
|
||||||
|
case 'tab':
|
||||||
|
options.separator = "\\t";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
options.separator = this._optionContainerElmts.columnSeparatorInput[0].value;
|
||||||
|
}
|
||||||
|
|
||||||
|
var parseIntDefault = function(s, def) {
|
||||||
|
try {
|
||||||
|
var n = parseInt(s,10);
|
||||||
|
if (!isNaN(n)) {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
return def;
|
||||||
|
};
|
||||||
|
if (this._optionContainerElmts.ignoreCheckbox[0].checked) {
|
||||||
|
options.ignoreLines = parseIntDefault(this._optionContainerElmts.ignoreInput[0].value, -1);
|
||||||
|
} else {
|
||||||
|
options.ignoreLines = -1;
|
||||||
|
}
|
||||||
|
if (this._optionContainerElmts.headerLinesCheckbox[0].checked) {
|
||||||
|
options.headerLines = parseIntDefault(this._optionContainerElmts.headerLinesInput[0].value, 0);
|
||||||
|
} else {
|
||||||
|
options.headerLines = 0;
|
||||||
|
}
|
||||||
|
if (this._optionContainerElmts.skipCheckbox[0].checked) {
|
||||||
|
options.skipDataLines = parseIntDefault(this._optionContainerElmts.skipInput[0].value, 0);
|
||||||
|
} else {
|
||||||
|
options.skipDataLines = 0;
|
||||||
|
}
|
||||||
|
if (this._optionContainerElmts.limitCheckbox[0].checked) {
|
||||||
|
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
|
||||||
|
} else {
|
||||||
|
options.limit = -1;
|
||||||
|
}
|
||||||
|
options.storeBlankRows = this._optionContainerElmts.storeBlankRowsCheckbox[0].checked;
|
||||||
|
|
||||||
|
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
|
||||||
|
options.processQuotes = this._optionContainerElmts.processQuoteMarksCheckbox[0].checked;
|
||||||
|
|
||||||
|
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
||||||
|
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||||
|
|
||||||
|
return options;
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype._initialize = function() {
|
||||||
|
var self = this;
|
||||||
|
console.log('wikitext ui initialize');
|
||||||
|
|
||||||
|
this._optionContainer.unbind().empty().html(
|
||||||
|
DOM.loadHTML("core", "scripts/index/parser-interfaces/separator-based-parser-ui.html"));
|
||||||
|
this._optionContainerElmts = DOM.bind(this._optionContainer);
|
||||||
|
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
|
||||||
|
|
||||||
|
this._optionContainerElmts.previewButton.html($.i18n._('core-buttons')["update-preview"]);
|
||||||
|
$('#or-import-encoding').html($.i18n._('core-index-import')["char-encoding"]);
|
||||||
|
$('#or-import-colsep').html($.i18n._('core-index-parser')["col-separated-by"]);
|
||||||
|
$('#or-import-commas').html($.i18n._('core-index-parser')["commas"]);
|
||||||
|
$('#or-import-tabs').html($.i18n._('core-index-parser')["tabs"]);
|
||||||
|
$('#or-import-custom').html($.i18n._('core-index-parser')["custom"]);
|
||||||
|
$('#or-import-escape').html($.i18n._('core-index-parser')["escape"]);
|
||||||
|
|
||||||
|
$('#or-import-ignore').text($.i18n._('core-index-parser')["ignore-first"]);
|
||||||
|
$('#or-import-lines').text($.i18n._('core-index-parser')["lines-beg"]);
|
||||||
|
$('#or-import-parse').text($.i18n._('core-index-parser')["parse-next"]);
|
||||||
|
$('#or-import-header').text($.i18n._('core-index-parser')["lines-header"]);
|
||||||
|
$('#or-import-discard').text($.i18n._('core-index-parser')["discard-initial"]);
|
||||||
|
$('#or-import-rows').text($.i18n._('core-index-parser')["rows-data"]);
|
||||||
|
$('#or-import-load').text($.i18n._('core-index-parser')["load-at-most"]);
|
||||||
|
$('#or-import-rows2').text($.i18n._('core-index-parser')["rows-data"]);
|
||||||
|
$('#or-import-parseCell').html($.i18n._('core-index-parser')["parse-cell"]);
|
||||||
|
$('#or-import-quote').html($.i18n._('core-index-parser')["quotation-mark"]);
|
||||||
|
$('#or-import-blank').text($.i18n._('core-index-parser')["store-blank"]);
|
||||||
|
$('#or-import-null').text($.i18n._('core-index-parser')["store-nulls"]);
|
||||||
|
$('#or-import-source').html($.i18n._('core-index-parser')["store-source"]);
|
||||||
|
|
||||||
|
this._optionContainerElmts.encodingInput
|
||||||
|
.attr('value', this._config.encoding || '')
|
||||||
|
.click(function() {
|
||||||
|
Encoding.selectEncoding($(this), function() {
|
||||||
|
self._updatePreview();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
var columnSeparatorValue = (this._config.separator == ",") ? 'comma' :
|
||||||
|
((this._config.separator == "\\t") ? 'tab' : 'custom');
|
||||||
|
this._optionContainer.find(
|
||||||
|
"input[name='column-separator'][value='" + columnSeparatorValue + "']").prop("checked", true);
|
||||||
|
this._optionContainerElmts.columnSeparatorInput[0].value = this._config.separator;
|
||||||
|
|
||||||
|
if (this._config.ignoreLines > 0) {
|
||||||
|
this._optionContainerElmts.ignoreCheckbox.prop("checked", true);
|
||||||
|
this._optionContainerElmts.ignoreInput[0].value = this._config.ignoreLines.toString();
|
||||||
|
}
|
||||||
|
if (this._config.headerLines > 0) {
|
||||||
|
this._optionContainerElmts.headerLinesCheckbox.prop("checked", true);
|
||||||
|
this._optionContainerElmts.headerLinesInput[0].value = this._config.headerLines.toString();
|
||||||
|
}
|
||||||
|
if (this._config.limit > 0) {
|
||||||
|
this._optionContainerElmts.limitCheckbox.prop("checked", true);
|
||||||
|
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||||
|
}
|
||||||
|
if (this._config.skipDataLines > 0) {
|
||||||
|
this._optionContainerElmts.skipCheckbox.prop("checked", true);
|
||||||
|
this._optionContainerElmts.skipInput.value[0].value = this._config.skipDataLines.toString();
|
||||||
|
}
|
||||||
|
if (this._config.storeBlankRows) {
|
||||||
|
this._optionContainerElmts.storeBlankRowsCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this._config.guessCellValueTypes) {
|
||||||
|
this._optionContainerElmts.guessCellValueTypesCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
if (this._config.processQuotes) {
|
||||||
|
this._optionContainerElmts.processQuoteMarksCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this._config.storeBlankCellsAsNulls) {
|
||||||
|
this._optionContainerElmts.storeBlankCellsAsNullsCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
if (this._config.includeFileSources) {
|
||||||
|
this._optionContainerElmts.includeFileSourcesCheckbox.prop("checked", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
var onChange = function() {
|
||||||
|
self._scheduleUpdatePreview();
|
||||||
|
};
|
||||||
|
this._optionContainer.find("input").bind("change", onChange);
|
||||||
|
this._optionContainer.find("select").bind("change", onChange);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype._scheduleUpdatePreview = function() {
|
||||||
|
console.log('scheduleUpdatePreview');
|
||||||
|
if (this._timerID !== null) {
|
||||||
|
window.clearTimeout(this._timerID);
|
||||||
|
this._timerID = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var self = this;
|
||||||
|
this._timerID = window.setTimeout(function() {
|
||||||
|
self._timerID = null;
|
||||||
|
self._updatePreview();
|
||||||
|
}, 500); // 0.5 second
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.WikitextParserUI.prototype._updatePreview = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._progressContainer.show();
|
||||||
|
console.log('updatePreview');
|
||||||
|
|
||||||
|
this._controller.updateFormatAndOptions(this.getOptions(), function(result) {
|
||||||
|
console.log(result.status);
|
||||||
|
if (result.status == "ok") {
|
||||||
|
self._controller.getPreviewData(function(projectData) {
|
||||||
|
self._progressContainer.hide();
|
||||||
|
|
||||||
|
new Refine.PreviewTable(projectData, self._dataContainer.unbind().empty());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
Loading…
Reference in New Issue
Block a user