Merged new importer UI work from branch over.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@2170 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
0fa99d21ca
commit
78edff6f7f
@ -33,8 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
var html = "text/html";
|
var html = "text/html";
|
||||||
var encoding = "UTF-8";
|
var encoding = "UTF-8";
|
||||||
var version="0.2"
|
var version = "0.2";
|
||||||
var ClientSideResourceManager = Packages.com.google.refine.ClientSideResourceManager;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Function invoked to initialize the extension.
|
* Function invoked to initialize the extension.
|
||||||
@ -43,21 +42,24 @@ function init() {
|
|||||||
// Packages.java.lang.System.err.println("Initializing gData extension");
|
// Packages.java.lang.System.err.println("Initializing gData extension");
|
||||||
// Packages.java.lang.System.err.println(module.getMountPoint());
|
// Packages.java.lang.System.err.println(module.getMountPoint());
|
||||||
|
|
||||||
Packages.com.google.refine.RefineServlet.registerCommand(
|
var RS = Packages.com.google.refine.RefineServlet;
|
||||||
module, "authorize", Packages.com.google.refine.extension.gdata.AuthorizeCommand());
|
RS.registerCommand(module, "authorize", Packages.com.google.refine.extension.gdata.AuthorizeCommand());
|
||||||
Packages.com.google.refine.RefineServlet.registerCommand(
|
RS.registerCommand(module, "authorize2", Packages.com.google.refine.extension.gdata.AuthorizeCommand2());
|
||||||
module, "authorize2", Packages.com.google.refine.extension.gdata.AuthorizeCommand2());
|
RS.registerCommand(module, "deauthorize", Packages.com.google.refine.extension.gdata.DeAuthorizeCommand());
|
||||||
Packages.com.google.refine.RefineServlet.registerCommand(
|
|
||||||
module, "deauthorize", Packages.com.google.refine.extension.gdata.DeAuthorizeCommand());
|
|
||||||
|
|
||||||
// Register importer and exporter
|
// Register importer and exporter
|
||||||
Packages.com.google.refine.importers.ImporterRegistry.registerImporter(
|
var IM = Packages.com.google.refine.importing.ImportingManager;
|
||||||
"gdata-importer", new Packages.com.google.refine.extension.gdata.GDataImporter());
|
IM.registerFormat("service/gdata", "GData services"); // generic format, no parser to handle it
|
||||||
|
IM.registerFormat("service/gdata/spreadsheet", "Google spreadsheets", false, "GoogleSpreadsheetParserUI",
|
||||||
|
new Packages.com.google.refine.extension.gdata.GDataImporter());
|
||||||
|
IM.registerUrlRewriter(new Packages.com.google.refine.extension.gdata.GDataUrlRewriter())
|
||||||
|
IM.registerUrlRewriter(new Packages.com.google.refine.extension.gdata.FusionTablesUrlRewriter())
|
||||||
|
|
||||||
// Packages.com.google.refine.exporters.ExporterRegistry.registerExporter(
|
// Packages.com.google.refine.exporters.ExporterRegistry.registerExporter(
|
||||||
// "gdata-exporter", new Packages.com.google.refine.extension.gdata.GDataExporter());
|
// "gdata-exporter", new Packages.com.google.refine.extension.gdata.GDataExporter());
|
||||||
|
|
||||||
// Script files to inject into /project page
|
// Script files to inject into /project page
|
||||||
|
var ClientSideResourceManager = Packages.com.google.refine.ClientSideResourceManager;
|
||||||
ClientSideResourceManager.addPaths(
|
ClientSideResourceManager.addPaths(
|
||||||
"project/scripts",
|
"project/scripts",
|
||||||
module,
|
module,
|
||||||
|
@ -0,0 +1,128 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2010, Thomas F. Morris
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* - Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* Neither the name of Google nor the names of its contributors may be used to
|
||||||
|
* endorse or promote products derived from this software without specific
|
||||||
|
* prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||||
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package com.google.refine.extension.gdata;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
|
||||||
|
import com.google.gdata.client.GoogleService;
|
||||||
|
import com.google.gdata.client.Service.GDataRequest;
|
||||||
|
import com.google.gdata.client.Service.GDataRequest.RequestType;
|
||||||
|
import com.google.gdata.util.ContentType;
|
||||||
|
import com.google.gdata.util.ServiceException;
|
||||||
|
import com.google.refine.importing.UrlRewriter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Tom Morris <tfmorris@gmail.com>
|
||||||
|
* @copyright 2010 Thomas F. Morris
|
||||||
|
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
|
||||||
|
*/
|
||||||
|
public class FusionTablesUrlRewriter implements UrlRewriter {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Result rewrite(String urlString) {
|
||||||
|
try {
|
||||||
|
URL url = new URL(urlString);
|
||||||
|
if (isFusionTableURL(url)) {
|
||||||
|
Result result = new Result();
|
||||||
|
try {
|
||||||
|
result.rewrittenUrl = generateQueryUrl(url, 0, -1).toExternalForm();
|
||||||
|
result.format = "text/line-based/*sv";
|
||||||
|
result.download = true;
|
||||||
|
return result;
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
// TODO: what do we do here?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public boolean isFusionTableURL(URL url) {
|
||||||
|
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
|
||||||
|
String query = url.getQuery();
|
||||||
|
if (query == null) {
|
||||||
|
query = "";
|
||||||
|
}
|
||||||
|
return url.getHost().endsWith(".google.com")
|
||||||
|
&& url.getPath().startsWith("/fusiontables/DataSource")
|
||||||
|
&& query.contains("dsrcid=");
|
||||||
|
}
|
||||||
|
|
||||||
|
static public URL generateQueryUrl(URL url, int start, int limit)
|
||||||
|
throws MalformedURLException, UnsupportedEncodingException {
|
||||||
|
|
||||||
|
String tableId = getFusionTableKey(url);
|
||||||
|
|
||||||
|
final String SERVICE_URL =
|
||||||
|
"http://www.google.com/fusiontables/api/query";
|
||||||
|
final String selectQuery = "select * from " + tableId
|
||||||
|
+ " offset " + (start) + (limit > 0 ? (" limit " + limit) : "");
|
||||||
|
|
||||||
|
return new URL(SERVICE_URL + "?sql=" + URLEncoder.encode(selectQuery, "UTF-8"));
|
||||||
|
}
|
||||||
|
|
||||||
|
static public InputStream openInputStream(URL queryUrl) throws IOException, ServiceException {
|
||||||
|
GoogleService service = new GoogleService("fusiontables", GDataExtension.SERVICE_APP_NAME);
|
||||||
|
// String token = TokenCookie.getToken(request);
|
||||||
|
// if (token != null) {
|
||||||
|
// service.setAuthSubToken(token);
|
||||||
|
// }
|
||||||
|
GDataRequest queryRequest = service.getRequestFactory().getRequest(
|
||||||
|
RequestType.QUERY, queryUrl, ContentType.TEXT_PLAIN);
|
||||||
|
queryRequest.execute();
|
||||||
|
|
||||||
|
return queryRequest.getResponseStream();
|
||||||
|
}
|
||||||
|
|
||||||
|
static private String getFusionTableKey(URL url) {
|
||||||
|
String query = url.getQuery();
|
||||||
|
if (query != null) {
|
||||||
|
String[] parts = query.split("&");
|
||||||
|
for (String part : parts) {
|
||||||
|
if (part.startsWith("dsrcid=")) {
|
||||||
|
int offset = ("dsrcid=").length();
|
||||||
|
String tableId = part.substring(offset);
|
||||||
|
// TODO: Any special id format considerations to worry about?
|
||||||
|
// if (tableId.startsWith("p") || !tableId.contains(".")) {
|
||||||
|
// return tableId;
|
||||||
|
// }
|
||||||
|
return tableId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2010, Thomas F. Morris
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* - Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* Neither the name of Google nor the names of its contributors may be used to
|
||||||
|
* endorse or promote products derived from this software without specific
|
||||||
|
* prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||||
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package com.google.refine.extension.gdata;
|
||||||
|
|
||||||
|
import com.google.gdata.client.spreadsheet.FeedURLFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Tom Morris <tfmorris@gmail.com>
|
||||||
|
* @copyright 2010 Thomas F. Morris
|
||||||
|
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
|
||||||
|
*/
|
||||||
|
abstract public class GDataExtension {
|
||||||
|
static final String SERVICE_APP_NAME = "Google-Refine-GData-Extension";
|
||||||
|
|
||||||
|
static private FeedURLFactory factory;
|
||||||
|
static public FeedURLFactory getFeedUrlFactory() {
|
||||||
|
if (factory == null) {
|
||||||
|
// Careful - this is shared by everyone.
|
||||||
|
factory = FeedURLFactory.getDefault();
|
||||||
|
}
|
||||||
|
return factory;
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2010,2011. Thomas F. Morris
|
* Copyright (c) 2010, Thomas F. Morris
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
@ -29,281 +29,125 @@
|
|||||||
package com.google.refine.extension.gdata;
|
package com.google.refine.extension.gdata;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Serializable;
|
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLEncoder;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.Scanner;
|
|
||||||
import java.util.regex.MatchResult;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import com.google.gdata.client.GoogleService;
|
import org.json.JSONObject;
|
||||||
import com.google.gdata.client.Service.GDataRequest;
|
|
||||||
import com.google.gdata.client.Service.GDataRequest.RequestType;
|
|
||||||
import com.google.gdata.client.spreadsheet.CellQuery;
|
import com.google.gdata.client.spreadsheet.CellQuery;
|
||||||
import com.google.gdata.client.spreadsheet.FeedURLFactory;
|
|
||||||
import com.google.gdata.client.spreadsheet.SpreadsheetService;
|
import com.google.gdata.client.spreadsheet.SpreadsheetService;
|
||||||
|
import com.google.gdata.data.spreadsheet.Cell;
|
||||||
import com.google.gdata.data.spreadsheet.CellEntry;
|
import com.google.gdata.data.spreadsheet.CellEntry;
|
||||||
import com.google.gdata.data.spreadsheet.CellFeed;
|
import com.google.gdata.data.spreadsheet.CellFeed;
|
||||||
import com.google.gdata.data.spreadsheet.ListEntry;
|
|
||||||
import com.google.gdata.data.spreadsheet.ListFeed;
|
|
||||||
import com.google.gdata.data.spreadsheet.SpreadsheetEntry;
|
import com.google.gdata.data.spreadsheet.SpreadsheetEntry;
|
||||||
import com.google.gdata.data.spreadsheet.SpreadsheetFeed;
|
import com.google.gdata.data.spreadsheet.SpreadsheetFeed;
|
||||||
import com.google.gdata.data.spreadsheet.WorksheetEntry;
|
import com.google.gdata.data.spreadsheet.WorksheetEntry;
|
||||||
import com.google.gdata.data.spreadsheet.WorksheetFeed;
|
import com.google.gdata.data.spreadsheet.WorksheetFeed;
|
||||||
import com.google.gdata.util.ContentType;
|
|
||||||
import com.google.gdata.util.InvalidEntryException;
|
|
||||||
import com.google.gdata.util.ServiceException;
|
import com.google.gdata.util.ServiceException;
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
import com.google.refine.expr.ExpressionUtils;
|
import com.google.refine.importers.TabularImportingParserBase;
|
||||||
import com.google.refine.importers.ImporterUtilities;
|
import com.google.refine.importing.ImportingJob;
|
||||||
import com.google.refine.importers.UrlImporter;
|
import com.google.refine.importing.ImportingUtilities;
|
||||||
import com.google.refine.model.Cell;
|
|
||||||
import com.google.refine.model.Column;
|
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Google Refine importer for Google Spreadsheets.
|
* Google Refine parser for Google Spreadsheets.
|
||||||
*
|
*
|
||||||
* @author Tom Morris <tfmorris@gmail.com>
|
* @author Tom Morris <tfmorris@gmail.com>
|
||||||
* @copyright 2010 Thomas F. Morris
|
* @copyright 2010 Thomas F. Morris
|
||||||
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
|
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
|
||||||
*/
|
*/
|
||||||
public class GDataImporter implements UrlImporter {
|
public class GDataImporter extends TabularImportingParserBase {
|
||||||
|
|
||||||
static final String SERVICE_APP_NAME = "Google-Refine-GData-Extension";
|
|
||||||
|
|
||||||
private FeedURLFactory factory;
|
|
||||||
|
|
||||||
public GDataImporter() {
|
public GDataImporter() {
|
||||||
// Careful - this constructor is called at server init time
|
super(false);
|
||||||
// and is shared by everyone.
|
|
||||||
factory = FeedURLFactory.getDefault();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
public void parseOneFile(
|
||||||
public void read(URL url, Project project, ProjectMetadata metadata,
|
Project project,
|
||||||
Properties options) throws Exception {
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
JSONObject fileRecord,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) throws IOException {
|
||||||
|
String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
||||||
|
String urlString = JSONUtilities.getString(fileRecord, "url", null);
|
||||||
|
URL url = new URL(urlString);
|
||||||
|
|
||||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
SpreadsheetService service = new SpreadsheetService(GDataExtension.SERVICE_APP_NAME);
|
||||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
|
||||||
int limit = ImporterUtilities.getIntegerOption("limit", options, -1);
|
|
||||||
|
|
||||||
// Note: Unlike TSV/CSV importer, we count all rows towards skip, not
|
|
||||||
// just "data" rows
|
|
||||||
int skip = ImporterUtilities.getIntegerOption("skip", options, 0);
|
|
||||||
int dataStart = ignoreLines + headerLines + skip;
|
|
||||||
boolean guessValueType = ImporterUtilities.getBooleanOption(
|
|
||||||
"guess-value-type", options, true);
|
|
||||||
|
|
||||||
// TODO: Put this in a namespace?
|
|
||||||
metadata.setCustomMetadata("source-url", url.toExternalForm());
|
|
||||||
|
|
||||||
// Start fresh for each read so that we're not caching authorization or
|
|
||||||
// anything
|
|
||||||
if (isSpreadsheetURL(url)) {
|
|
||||||
importSpreadsheet(url, project, ignoreLines, headerLines, limit,
|
|
||||||
dataStart, guessValueType);
|
|
||||||
} else if (isFusionTableURL(url)) {
|
|
||||||
importFusionTable(url, project, ignoreLines, headerLines, limit,
|
|
||||||
dataStart, guessValueType);
|
|
||||||
} else {
|
|
||||||
// should never happen (famous last words)
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"Got invalid format URL in GDataImporter.read()");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void importSpreadsheet(URL url, Project project, int ignoreLines,
|
|
||||||
int headerLines, int limit, int dataStart, boolean guessValueType)
|
|
||||||
throws MalformedURLException, IOException, ServiceException,
|
|
||||||
Exception {
|
|
||||||
SpreadsheetService service = new SpreadsheetService(SERVICE_APP_NAME);
|
|
||||||
// String token = TokenCookie.getToken(request);
|
// String token = TokenCookie.getToken(request);
|
||||||
// if (token != null) {
|
// if (token != null) {
|
||||||
// service.setAuthSubToken(token);
|
// service.setAuthSubToken(token);
|
||||||
// }
|
// }
|
||||||
String spreadsheetKey = getSpreadsheetKey(url);
|
String spreadsheetKey = getSpreadsheetKey(url);
|
||||||
|
|
||||||
|
int[] sheets = JSONUtilities.getIntArray(options, "sheets");
|
||||||
|
for (int sheetIndex : sheets) {
|
||||||
WorksheetEntry worksheet;
|
WorksheetEntry worksheet;
|
||||||
try {
|
try {
|
||||||
worksheet = getWorksheetEntries(service, spreadsheetKey).get(0);
|
worksheet = getWorksheetEntries(service, spreadsheetKey).get(sheetIndex);
|
||||||
} catch (InvalidEntryException e) {
|
} catch (ServiceException e) {
|
||||||
throw new RuntimeException("Failed to open spreadsheet "
|
exceptions.add(e);
|
||||||
+ e.getResponseBody(), e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create columns
|
|
||||||
List<String> columnHeaders = getColumnHeaders(service, worksheet,
|
|
||||||
ignoreLines, headerLines);
|
|
||||||
|
|
||||||
int columnCount = worksheet.getColCount();
|
|
||||||
project.columnModel.setMaxCellIndex(columnCount);
|
|
||||||
boolean validColumn[] = new boolean[columnCount];
|
|
||||||
int index = 0;
|
|
||||||
for (String name : columnHeaders) {
|
|
||||||
Column column = new Column(index, name + " " + index);
|
|
||||||
project.columnModel.columns.add(column);
|
|
||||||
validColumn[index++] = true;
|
|
||||||
}
|
|
||||||
for (int i = index; index < columnCount; index++) {
|
|
||||||
Column column = new Column(index, "Column " + index);
|
|
||||||
project.columnModel.columns.add(column);
|
|
||||||
validColumn[i] = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create data rows & cells
|
|
||||||
int previousRow = dataStart - 1;
|
|
||||||
int previousCol = -1;
|
|
||||||
List<CellEntry> cellEntries = getCells(service, worksheet, dataStart);
|
|
||||||
Row row = null;
|
|
||||||
for (CellEntry cellEntry : cellEntries) {
|
|
||||||
com.google.gdata.data.spreadsheet.Cell cell = cellEntry.getCell();
|
|
||||||
if (cell == null) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int r = cell.getRow() - 1; // convert from 1-based to 0-based
|
|
||||||
int c = cell.getCol() - 1;
|
|
||||||
|
|
||||||
if (limit > 0 && r > limit) {
|
readTable(
|
||||||
break;
|
project,
|
||||||
|
metadata,
|
||||||
|
job,
|
||||||
|
new BatchRowReader(service, worksheet, 20),
|
||||||
|
fileSource + "#" + worksheet.getTitle().getPlainText(),
|
||||||
|
limit,
|
||||||
|
options,
|
||||||
|
exceptions
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle gaps in rows
|
static private class BatchRowReader implements TableDataReader {
|
||||||
if (r > previousRow) {
|
final int batchSize;
|
||||||
// Finish and add current row
|
final SpreadsheetService service;
|
||||||
if (row != null) {
|
final WorksheetEntry worksheet;
|
||||||
project.rows.add(row);
|
final int totalRowCount;
|
||||||
// project.columnModel.setMaxCellIndex(row.cells.size()); //
|
|
||||||
// TODO: ???
|
int nextRow = 0; // 0-based
|
||||||
|
int batchRowStart = -1; // 0-based
|
||||||
|
List<List<Object>> rowsOfCells = null;
|
||||||
|
|
||||||
|
public BatchRowReader(SpreadsheetService service, WorksheetEntry worksheet, int batchSize) {
|
||||||
|
this.service = service;
|
||||||
|
this.worksheet = worksheet;
|
||||||
|
this.batchSize = batchSize;
|
||||||
|
this.totalRowCount = worksheet.getRowCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add empty rows for skipped rows
|
@Override
|
||||||
while (previousRow < r - 1) {
|
public List<Object> getNextRowOfCells() throws IOException {
|
||||||
project.rows.add(new Row(columnCount));
|
if (rowsOfCells == null || nextRow > batchRowStart + rowsOfCells.size()) {
|
||||||
previousRow++;
|
batchRowStart = batchRowStart + (rowsOfCells == null ? 0 : rowsOfCells.size());
|
||||||
|
if (batchRowStart < totalRowCount) {
|
||||||
|
try {
|
||||||
|
rowsOfCells = getRowsOfCells(service, worksheet, batchRowStart + 1, batchSize);
|
||||||
|
} catch (ServiceException e) {
|
||||||
|
rowsOfCells = null;
|
||||||
|
throw new IOException(e);
|
||||||
}
|
}
|
||||||
row = new Row(columnCount);
|
|
||||||
previousRow = r;
|
|
||||||
previousCol = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add blank cells for any that were skipped before the current one
|
|
||||||
for (int col = previousCol + 1; col < c; col++) {
|
|
||||||
row.cells.add(new Cell("", null));
|
|
||||||
}
|
|
||||||
previousCol = c;
|
|
||||||
|
|
||||||
String s = cell.getValue();
|
|
||||||
if (s != null) {
|
|
||||||
s = s.trim();
|
|
||||||
}
|
|
||||||
if (ExpressionUtils.isNonBlankData(s)) {
|
|
||||||
Serializable value = guessValueType ? ImporterUtilities
|
|
||||||
.parseCellValue(s) : s;
|
|
||||||
row.cells.add(new Cell(value, null));
|
|
||||||
} else {
|
} else {
|
||||||
row.cells.add(null);
|
rowsOfCells = null;
|
||||||
}
|
|
||||||
}
|
|
||||||
// Add last row
|
|
||||||
if (row != null) {
|
|
||||||
project.rows.add(row);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void importFusionTable(URL url, Project project, int ignoreLines,
|
if (rowsOfCells != null && nextRow - batchRowStart < rowsOfCells.size()) {
|
||||||
int headerLines, int limit, int dataStart, boolean guessValueType)
|
return rowsOfCells.get(nextRow++ - batchRowStart);
|
||||||
throws MalformedURLException, IOException, ServiceException,
|
|
||||||
Exception {
|
|
||||||
GoogleService service = new GoogleService("fusiontables", SERVICE_APP_NAME);
|
|
||||||
// String token = TokenCookie.getToken(request);
|
|
||||||
// if (token != null) {
|
|
||||||
// service.setAuthSubToken(token);
|
|
||||||
// }
|
|
||||||
String tableId = getFusionTableKey(url);
|
|
||||||
|
|
||||||
final String SERVICE_URL =
|
|
||||||
"http://www.google.com/fusiontables/api/query";
|
|
||||||
final String selectQuery = "select * from " + tableId
|
|
||||||
+ " offset " + (dataStart) + (limit>0 ? (" limit " + limit):"");
|
|
||||||
|
|
||||||
URL queryUrl = new URL(
|
|
||||||
SERVICE_URL + "?sql=" + URLEncoder.encode(selectQuery, "UTF-8"));
|
|
||||||
GDataRequest queryRequest = service.getRequestFactory().getRequest(
|
|
||||||
RequestType.QUERY, queryUrl, ContentType.TEXT_PLAIN);
|
|
||||||
queryRequest.execute();
|
|
||||||
|
|
||||||
Scanner scanner = new Scanner(queryRequest.getResponseStream(),"UTF-8");
|
|
||||||
|
|
||||||
// TODO: Just use the first row of data as column headers for now
|
|
||||||
List<String> columnHeaders = getTableRow(scanner);
|
|
||||||
|
|
||||||
// Create columns
|
|
||||||
int columnCount = columnHeaders.size();
|
|
||||||
project.columnModel.setMaxCellIndex(columnCount);
|
|
||||||
boolean validColumn[] = new boolean[columnCount];
|
|
||||||
int index = 0;
|
|
||||||
for (String name : columnHeaders) {
|
|
||||||
Column column = new Column(index, name + " " + index);
|
|
||||||
project.columnModel.columns.add(column);
|
|
||||||
validColumn[index++] = true;
|
|
||||||
}
|
|
||||||
for (int i = index; index < columnCount; index++) {
|
|
||||||
Column column = new Column(index, "Column " + index);
|
|
||||||
project.columnModel.columns.add(column);
|
|
||||||
validColumn[i] = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create data rows & cells
|
|
||||||
List<String> values = columnHeaders;
|
|
||||||
while (values != null) {
|
|
||||||
Row row = new Row(columnCount);
|
|
||||||
for (String valString : values) {
|
|
||||||
valString = valString.trim();
|
|
||||||
if (ExpressionUtils.isNonBlankData(valString)) {
|
|
||||||
Serializable value = guessValueType ? ImporterUtilities
|
|
||||||
.parseCellValue(valString) : valString;
|
|
||||||
row.cells.add(new Cell(value, null));
|
|
||||||
} else {
|
} else {
|
||||||
row.cells.add(null);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
project.rows.add(row);
|
|
||||||
values = getTableRow(scanner);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<String> getTableRow(Scanner scanner) {
|
|
||||||
/**
|
|
||||||
* CSV values are terminated by comma or end-of-line and consist either of
|
|
||||||
* plain text without commas or quotes, or a quoted expression, where inner
|
|
||||||
* quotes are escaped by doubling.
|
|
||||||
*/
|
|
||||||
final Pattern CSV_VALUE_PATTERN =
|
|
||||||
Pattern.compile("([^,\\r\\n\"]*|\"(([^\"]*\"\")*[^\"]*)\")(,|\\r?\\n)");
|
|
||||||
|
|
||||||
if (!scanner.hasNextLine()) {
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> result = new ArrayList<String>();
|
|
||||||
while (scanner.hasNextLine()) {
|
|
||||||
scanner.findWithinHorizon(CSV_VALUE_PATTERN, 0);
|
|
||||||
MatchResult match = scanner.match();
|
|
||||||
String quotedString = match.group(2);
|
|
||||||
String decoded = quotedString == null ? match.group(1)
|
|
||||||
: quotedString.replaceAll("\"\"", "\"");
|
|
||||||
result.add(decoded);
|
|
||||||
if (!match.group(4).equals(",")) {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the spreadsheets that an authenticated user has access to. Not
|
* Retrieves the spreadsheets that an authenticated user has access to. Not
|
||||||
@ -313,130 +157,67 @@ public class GDataImporter implements UrlImporter {
|
|||||||
* @throws Exception
|
* @throws Exception
|
||||||
* if error in retrieving the spreadsheet information
|
* if error in retrieving the spreadsheet information
|
||||||
*/
|
*/
|
||||||
public List<SpreadsheetEntry> getSpreadsheetEntries(
|
static public List<SpreadsheetEntry> getSpreadsheetEntries(
|
||||||
SpreadsheetService service) throws Exception {
|
SpreadsheetService service
|
||||||
|
) throws Exception {
|
||||||
SpreadsheetFeed feed = service.getFeed(
|
SpreadsheetFeed feed = service.getFeed(
|
||||||
factory.getSpreadsheetsFeedUrl(), SpreadsheetFeed.class);
|
GDataExtension.getFeedUrlFactory().getSpreadsheetsFeedUrl(),
|
||||||
|
SpreadsheetFeed.class);
|
||||||
return feed.getEntries();
|
return feed.getEntries();
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<WorksheetEntry> getWorksheetEntries(SpreadsheetService service,
|
static public List<WorksheetEntry> getWorksheetEntries(
|
||||||
String spreadsheetKey) throws MalformedURLException, IOException,
|
SpreadsheetService service, String spreadsheetKey
|
||||||
ServiceException {
|
) throws MalformedURLException, IOException, ServiceException {
|
||||||
WorksheetFeed feed = service
|
WorksheetFeed feed = service.getFeed(
|
||||||
.getFeed(factory.getWorksheetFeedUrl(spreadsheetKey, "public",
|
GDataExtension.getFeedUrlFactory().getWorksheetFeedUrl(spreadsheetKey, "public", "values"),
|
||||||
"values"), WorksheetFeed.class);
|
WorksheetFeed.class);
|
||||||
return feed.getEntries();
|
return feed.getEntries();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static public List<List<Object>> getRowsOfCells(
|
||||||
* Retrieves the columns headers from the cell feed of the worksheet entry.
|
SpreadsheetService service,
|
||||||
*
|
WorksheetEntry worksheet,
|
||||||
* @param worksheet
|
int startRow, // 1-based
|
||||||
* worksheet entry containing the cell feed in question
|
int rowCount
|
||||||
* @return a list of column headers
|
) throws IOException, ServiceException {
|
||||||
* @throws Exception
|
|
||||||
* if error in retrieving the spreadsheet information
|
|
||||||
*/
|
|
||||||
public List<String> getColumnHeaders(SpreadsheetService service,
|
|
||||||
WorksheetEntry worksheet, int startRow, int rows) throws Exception {
|
|
||||||
List<String> headers = new ArrayList<String>();
|
|
||||||
|
|
||||||
// Get the appropriate URL for a cell feed
|
|
||||||
URL cellFeedUrl = worksheet.getCellFeedUrl();
|
URL cellFeedUrl = worksheet.getCellFeedUrl();
|
||||||
|
|
||||||
// Create a query for the cells in the header row(s) (1-based)
|
int minRow = Math.max(1, startRow);
|
||||||
CellQuery cellQuery = new CellQuery(cellFeedUrl);
|
int maxRow = Math.min(worksheet.getRowCount(), startRow + rowCount - 1);
|
||||||
if (startRow > 0) {
|
int rows = maxRow - minRow + 1;
|
||||||
cellQuery.setMinimumRow(startRow + 1);
|
|
||||||
}
|
|
||||||
cellQuery.setMaximumRow(startRow + rows);
|
|
||||||
|
|
||||||
// Get the cell feed matching the query
|
|
||||||
CellFeed topRowCellFeed = service.query(cellQuery, CellFeed.class);
|
|
||||||
|
|
||||||
// Get the cell entries from the feed
|
|
||||||
List<CellEntry> cellEntries = topRowCellFeed.getEntries();
|
|
||||||
for (CellEntry entry : cellEntries) {
|
|
||||||
|
|
||||||
// Get the cell element from the entry
|
|
||||||
com.google.gdata.data.spreadsheet.Cell cell = entry.getCell();
|
|
||||||
int r = cell.getRow() - 1;
|
|
||||||
if (cell != null) {
|
|
||||||
if (r == startRow) {
|
|
||||||
headers.add(cell.getValue().trim());
|
|
||||||
} else if (r < startRow + rows) {
|
|
||||||
headers.set(r, headers.get(r) + " "
|
|
||||||
+ cell.getValue().trim());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return headers;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<CellEntry> getCells(SpreadsheetService service,
|
|
||||||
WorksheetEntry worksheet, int startRow) throws IOException,
|
|
||||||
ServiceException {
|
|
||||||
|
|
||||||
URL cellFeedUrl = worksheet.getCellFeedUrl();
|
|
||||||
|
|
||||||
// Create a query skipping the desired number of rows
|
|
||||||
CellQuery cellQuery = new CellQuery(cellFeedUrl);
|
|
||||||
cellQuery.setMinimumRow(startRow + 1); // 1-based
|
|
||||||
int rows = worksheet.getRowCount();
|
|
||||||
cellQuery.setMaximumRow(rows);
|
|
||||||
// cellQuery.setMinimumCol(1);
|
|
||||||
int cols = worksheet.getColCount();
|
int cols = worksheet.getColCount();
|
||||||
|
|
||||||
|
CellQuery cellQuery = new CellQuery(cellFeedUrl);
|
||||||
|
cellQuery.setMinimumRow(minRow);
|
||||||
|
cellQuery.setMaximumRow(maxRow);
|
||||||
cellQuery.setMaximumCol(cols);
|
cellQuery.setMaximumCol(cols);
|
||||||
cellQuery.setMaxResults(rows * cols);
|
cellQuery.setMaxResults(rows * cols);
|
||||||
cellQuery.setReturnEmpty(false);
|
cellQuery.setReturnEmpty(false);
|
||||||
|
|
||||||
CellFeed cellFeed = service.query(cellQuery, CellFeed.class);
|
CellFeed cellFeed = service.query(cellQuery, CellFeed.class);
|
||||||
return cellFeed.getEntries();
|
List<CellEntry> cellEntries = cellFeed.getEntries();
|
||||||
|
|
||||||
|
List<List<Object>> rowsOfCells = new ArrayList<List<Object>>(rows);
|
||||||
|
for (CellEntry cellEntry : cellEntries) {
|
||||||
|
Cell cell = cellEntry.getCell();
|
||||||
|
int row = cell.getRow();
|
||||||
|
int col = cell.getCol();
|
||||||
|
|
||||||
|
while (row > rowsOfCells.size()) {
|
||||||
|
rowsOfCells.add(new ArrayList<Object>(cols));
|
||||||
|
}
|
||||||
|
List<Object> rowOfCells = rowsOfCells.get(row - 1); // 1-based
|
||||||
|
|
||||||
|
while (col > rowOfCells.size()) {
|
||||||
|
rowOfCells.add(null);
|
||||||
|
}
|
||||||
|
rowOfCells.set(col - 1, cell.getValue());
|
||||||
|
}
|
||||||
|
return rowsOfCells;
|
||||||
}
|
}
|
||||||
|
|
||||||
List<ListEntry> getListEntries(SpreadsheetService service,
|
// Modified version of FeedURLFactory.getSpreadsheetKeyFromUrl()
|
||||||
WorksheetEntry worksheet) throws IOException, ServiceException {
|
|
||||||
URL listFeedUrl = worksheet.getListFeedUrl();
|
|
||||||
ListFeed feed = service.getFeed(listFeedUrl, ListFeed.class);
|
|
||||||
return feed.getEntries();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canImportData(String contentType, String filename) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canImportData(URL url) {
|
|
||||||
return isSpreadsheetURL(url) || isFusionTableURL(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isSpreadsheetURL(URL url) {
|
|
||||||
String host = url.getHost();
|
|
||||||
String query = url.getQuery();
|
|
||||||
if (query == null) {
|
|
||||||
query = "";
|
|
||||||
}
|
|
||||||
// http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
|
|
||||||
return host.endsWith(".google.com")
|
|
||||||
&& host.contains("spreadsheet")
|
|
||||||
&& getSpreadsheetKey(url) != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean isFusionTableURL(URL url) {
|
|
||||||
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
|
|
||||||
String query = url.getQuery();
|
|
||||||
if (query == null) {
|
|
||||||
query = "";
|
|
||||||
}
|
|
||||||
return url.getHost().endsWith(".google.com")
|
|
||||||
&& url.getPath().startsWith("/fusiontables/DataSource")
|
|
||||||
&& getFusionTableKey(url) != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Modified version of FeedURLFactor.getSpreadsheetKeyFromUrl()
|
|
||||||
private String getSpreadsheetKey(URL url) {
|
private String getSpreadsheetKey(URL url) {
|
||||||
String query = url.getQuery();
|
String query = url.getQuery();
|
||||||
if (query != null) {
|
if (query != null) {
|
||||||
@ -472,23 +253,4 @@ public class GDataImporter implements UrlImporter {
|
|||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getFusionTableKey(URL url) {
|
|
||||||
String query = url.getQuery();
|
|
||||||
if (query != null) {
|
|
||||||
String[] parts = query.split("&");
|
|
||||||
for (String part : parts) {
|
|
||||||
if (part.startsWith("dsrcid=")) {
|
|
||||||
int offset = ("dsrcid=").length();
|
|
||||||
String tableId = part.substring(offset);
|
|
||||||
// TODO: Any special id format considerations to worry about?
|
|
||||||
// if (tableId.startsWith("p") || !tableId.contains(".")) {
|
|
||||||
// return tableId;
|
|
||||||
// }
|
|
||||||
return tableId;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
@ -0,0 +1,69 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2010, Thomas F. Morris
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* - Redistributions of source code must retain the above copyright notice, this
|
||||||
|
* list of conditions and the following disclaimer.
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* Neither the name of Google nor the names of its contributors may be used to
|
||||||
|
* endorse or promote products derived from this software without specific
|
||||||
|
* prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||||
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
package com.google.refine.extension.gdata;
|
||||||
|
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
|
||||||
|
import com.google.refine.importing.UrlRewriter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Tom Morris <tfmorris@gmail.com>
|
||||||
|
* @copyright 2010 Thomas F. Morris
|
||||||
|
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
|
||||||
|
*/
|
||||||
|
public class GDataUrlRewriter implements UrlRewriter {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Result rewrite(String urlString) {
|
||||||
|
try {
|
||||||
|
URL url = new URL(urlString);
|
||||||
|
if (isSpreadsheetURL(url)) {
|
||||||
|
Result result = new Result();
|
||||||
|
result.rewrittenUrl = urlString;
|
||||||
|
result.format = "service/gdata/spreadsheet";
|
||||||
|
result.download = false;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public boolean isSpreadsheetURL(URL url) {
|
||||||
|
String host = url.getHost();
|
||||||
|
String query = url.getQuery();
|
||||||
|
if (query == null) {
|
||||||
|
query = "";
|
||||||
|
}
|
||||||
|
// http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
|
||||||
|
return host.endsWith(".google.com") && host.contains("spreadsheet") && query.contains("key=");
|
||||||
|
}
|
||||||
|
}
|
19
main/src/com/google/refine/HttpResponder.java
Normal file
19
main/src/com/google/refine/HttpResponder.java
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
package com.google.refine;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import javax.servlet.ServletException;
|
||||||
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
|
import com.google.refine.RefineServlet;
|
||||||
|
|
||||||
|
public interface HttpResponder {
|
||||||
|
public void init(RefineServlet servlet);
|
||||||
|
|
||||||
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
|
throws ServletException, IOException;
|
||||||
|
|
||||||
|
public void doGet(HttpServletRequest request, HttpServletResponse response)
|
||||||
|
throws ServletException, IOException;
|
||||||
|
}
|
@ -50,7 +50,7 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.refine.commands.Command;
|
import com.google.refine.commands.Command;
|
||||||
import com.google.refine.commands.importing.ImportManager;
|
import com.google.refine.importing.ImportingManager;
|
||||||
import com.google.refine.io.FileProjectManager;
|
import com.google.refine.io.FileProjectManager;
|
||||||
|
|
||||||
import edu.mit.simile.butterfly.Butterfly;
|
import edu.mit.simile.butterfly.Butterfly;
|
||||||
@ -125,7 +125,7 @@ public class RefineServlet extends Butterfly {
|
|||||||
|
|
||||||
s_dataDir = new File(data);
|
s_dataDir = new File(data);
|
||||||
FileProjectManager.initialize(s_dataDir);
|
FileProjectManager.initialize(s_dataDir);
|
||||||
ImportManager.initialize(this);
|
ImportingManager.initialize(this);
|
||||||
|
|
||||||
if (_timer == null) {
|
if (_timer == null) {
|
||||||
_timer = new Timer("autosave");
|
_timer = new Timer("autosave");
|
||||||
|
180
main/src/com/google/refine/commands/HttpUtilities.java
Normal file
180
main/src/com/google/refine/commands/HttpUtilities.java
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
package com.google.refine.commands;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
import java.io.Writer;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import javax.servlet.ServletException;
|
||||||
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
|
import org.apache.velocity.VelocityContext;
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.refine.Jsonizable;
|
||||||
|
import com.google.refine.RefineServlet;
|
||||||
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
|
abstract public class HttpUtilities {
|
||||||
|
final static protected Logger logger = LoggerFactory.getLogger("command");
|
||||||
|
|
||||||
|
static public void respond(HttpServletResponse response, String content)
|
||||||
|
throws IOException, ServletException {
|
||||||
|
|
||||||
|
response.setCharacterEncoding("UTF-8");
|
||||||
|
response.setStatus(HttpServletResponse.SC_OK);
|
||||||
|
Writer w = response.getWriter();
|
||||||
|
if (w != null) {
|
||||||
|
w.write(content);
|
||||||
|
w.flush();
|
||||||
|
w.close();
|
||||||
|
} else {
|
||||||
|
throw new ServletException("response returned a null writer");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void respond(HttpServletResponse response, String status, String message)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
Writer w = response.getWriter();
|
||||||
|
try {
|
||||||
|
JSONWriter writer = new JSONWriter(w);
|
||||||
|
writer.object();
|
||||||
|
writer.key("status"); writer.value(status);
|
||||||
|
writer.key("message"); writer.value(message);
|
||||||
|
writer.endObject();
|
||||||
|
w.flush();
|
||||||
|
w.close();
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// This can never occue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void respondJSON(HttpServletResponse response, Jsonizable o)
|
||||||
|
throws IOException, JSONException {
|
||||||
|
|
||||||
|
respondJSON(response, o, new Properties());
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void respondJSON(
|
||||||
|
HttpServletResponse response, Jsonizable o, Properties options)
|
||||||
|
throws IOException, JSONException {
|
||||||
|
|
||||||
|
response.setCharacterEncoding("UTF-8");
|
||||||
|
response.setHeader("Content-Type", "application/json");
|
||||||
|
|
||||||
|
Writer w = response.getWriter();
|
||||||
|
JSONWriter writer = new JSONWriter(w);
|
||||||
|
|
||||||
|
o.write(writer, options);
|
||||||
|
w.flush();
|
||||||
|
w.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void respondException(HttpServletResponse response, Exception e)
|
||||||
|
throws IOException, ServletException {
|
||||||
|
|
||||||
|
logger.warn("Exception caught", e);
|
||||||
|
|
||||||
|
if (response == null) {
|
||||||
|
throw new ServletException("Response object can't be null");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
JSONObject o = new JSONObject();
|
||||||
|
o.put("code", "error");
|
||||||
|
o.put("message", e.getMessage());
|
||||||
|
|
||||||
|
StringWriter sw = new StringWriter();
|
||||||
|
PrintWriter pw = new PrintWriter(sw);
|
||||||
|
e.printStackTrace(pw);
|
||||||
|
pw.flush();
|
||||||
|
sw.flush();
|
||||||
|
|
||||||
|
o.put("stack", sw.toString());
|
||||||
|
|
||||||
|
response.setCharacterEncoding("UTF-8");
|
||||||
|
response.setHeader("Content-Type", "application/json");
|
||||||
|
respond(response, o.toString());
|
||||||
|
} catch (JSONException e1) {
|
||||||
|
e.printStackTrace(response.getWriter());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void redirect(HttpServletResponse response, String url) throws IOException {
|
||||||
|
response.sendRedirect(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public int getIntegerParameter(HttpServletRequest request, String name, int def) {
|
||||||
|
if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null");
|
||||||
|
try {
|
||||||
|
return Integer.parseInt(request.getParameter(name));
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.warn("Error getting integer parameter", e);
|
||||||
|
}
|
||||||
|
return def;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public JSONObject getJsonParameter(HttpServletRequest request, String name) {
|
||||||
|
if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null");
|
||||||
|
String value = request.getParameter(name);
|
||||||
|
if (value != null) {
|
||||||
|
try {
|
||||||
|
return ParsingUtilities.evaluateJsonStringToObject(value);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
logger.warn("Error getting json parameter", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void respondWithErrorPage(
|
||||||
|
RefineServlet servlet,
|
||||||
|
HttpServletRequest request,
|
||||||
|
HttpServletResponse response,
|
||||||
|
String message,
|
||||||
|
Throwable e
|
||||||
|
) {
|
||||||
|
respondWithErrorPage(servlet, request, response, message,
|
||||||
|
HttpServletResponse.SC_INTERNAL_SERVER_ERROR, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void respondWithErrorPage(
|
||||||
|
RefineServlet servlet,
|
||||||
|
HttpServletRequest request,
|
||||||
|
HttpServletResponse response,
|
||||||
|
String message,
|
||||||
|
int status,
|
||||||
|
Throwable e
|
||||||
|
) {
|
||||||
|
VelocityContext context = new VelocityContext();
|
||||||
|
|
||||||
|
context.put("message", message);
|
||||||
|
|
||||||
|
if (e != null) {
|
||||||
|
StringWriter writer = new StringWriter();
|
||||||
|
|
||||||
|
e.printStackTrace(new PrintWriter(writer));
|
||||||
|
|
||||||
|
context.put("stack", writer.toString());
|
||||||
|
} else {
|
||||||
|
context.put("stack", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
response.setStatus(status);
|
||||||
|
|
||||||
|
servlet.getModule("core").sendTextFromTemplate(
|
||||||
|
request, response, context, "error.vt", "UTF-8", "text/html", true);
|
||||||
|
|
||||||
|
} catch (Exception e1) {
|
||||||
|
e1.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.commands.importing;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import javax.servlet.ServletException;
|
||||||
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
|
import com.google.refine.commands.Command;
|
||||||
|
import com.google.refine.commands.HttpUtilities;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingManager;
|
||||||
|
|
||||||
|
public class CancelImportingJobCommand extends Command {
|
||||||
|
@Override
|
||||||
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
long jobID = Long.parseLong(request.getParameter("jobID"));
|
||||||
|
ImportingJob job = ImportingManager.getJob(jobID);
|
||||||
|
if (job == null) {
|
||||||
|
HttpUtilities.respond(response, "error", "No such import job");
|
||||||
|
} else {
|
||||||
|
job.canceled = true;
|
||||||
|
HttpUtilities.respond(response, "ok", "Job canceled");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -43,8 +43,9 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.refine.commands.Command;
|
import com.google.refine.commands.Command;
|
||||||
|
import com.google.refine.importing.ImportingManager;
|
||||||
|
|
||||||
public class CreateImportJobCommand extends Command {
|
public class CreateImportingJobCommand extends Command {
|
||||||
|
|
||||||
final static Logger logger = LoggerFactory.getLogger("create-import-job_command");
|
final static Logger logger = LoggerFactory.getLogger("create-import-job_command");
|
||||||
|
|
||||||
@ -52,7 +53,7 @@ public class CreateImportJobCommand extends Command {
|
|||||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
throws ServletException, IOException {
|
throws ServletException, IOException {
|
||||||
|
|
||||||
long id = ImportManager.singleton().createJob().id;
|
long id = ImportingManager.createJob().id;
|
||||||
|
|
||||||
response.setCharacterEncoding("UTF-8");
|
response.setCharacterEncoding("UTF-8");
|
||||||
response.setHeader("Content-Type", "application/json");
|
response.setHeader("Content-Type", "application/json");
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
|
|
||||||
Copyright 2010, Google Inc.
|
Copyright 2011, Google Inc.
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -31,24 +31,38 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.google.refine.importers;
|
package com.google.refine.commands.importing;
|
||||||
|
|
||||||
import java.io.InputStream;
|
import java.io.IOException;
|
||||||
|
import java.io.Writer;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
import javax.servlet.ServletException;
|
||||||
import com.google.refine.model.Project;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
public interface StreamImporter extends Importer {
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
/**
|
import com.google.refine.commands.Command;
|
||||||
* @param inputStream stream to be imported
|
import com.google.refine.importing.ImportingManager;
|
||||||
* @param project project to import stream into
|
|
||||||
* @param metadata metadata of new project
|
|
||||||
* @param options
|
|
||||||
* @throws ImportException
|
|
||||||
*/
|
|
||||||
public void read(InputStream inputStream, Project project,
|
|
||||||
ProjectMetadata metadata, Properties options) throws ImportException;
|
|
||||||
|
|
||||||
|
public class GetImportingConfigurationCommand extends Command {
|
||||||
|
@Override
|
||||||
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
Writer w = response.getWriter();
|
||||||
|
JSONWriter writer = new JSONWriter(w);
|
||||||
|
try {
|
||||||
|
writer.object();
|
||||||
|
writer.key("config"); ImportingManager.writeConfiguration(writer, new Properties());
|
||||||
|
writer.endObject();
|
||||||
|
} catch (JSONException e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
} finally {
|
||||||
|
w.flush();
|
||||||
|
w.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -34,9 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
package com.google.refine.commands.importing;
|
package com.google.refine.commands.importing;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintWriter;
|
|
||||||
import java.io.StringWriter;
|
|
||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
import javax.servlet.ServletException;
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
@ -44,22 +43,18 @@ import javax.servlet.http.HttpServletResponse;
|
|||||||
|
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONWriter;
|
import org.json.JSONWriter;
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.google.refine.commands.Command;
|
import com.google.refine.commands.Command;
|
||||||
import com.google.refine.commands.importing.ImportJob.State;
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingManager;
|
||||||
public class GetImportJobStatusCommand extends Command {
|
|
||||||
|
|
||||||
final static Logger logger = LoggerFactory.getLogger("get-import-job-status_command");
|
|
||||||
|
|
||||||
|
public class GetImportingJobStatusCommand extends Command {
|
||||||
@Override
|
@Override
|
||||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
throws ServletException, IOException {
|
throws ServletException, IOException {
|
||||||
|
|
||||||
long jobID = Long.parseLong(request.getParameter("jobID"));
|
long jobID = Long.parseLong(request.getParameter("jobID"));
|
||||||
ImportJob job = ImportManager.singleton().getJob(jobID);
|
ImportingJob job = ImportingManager.getJob(jobID);
|
||||||
|
|
||||||
Writer w = response.getWriter();
|
Writer w = response.getWriter();
|
||||||
JSONWriter writer = new JSONWriter(w);
|
JSONWriter writer = new JSONWriter(w);
|
||||||
@ -70,32 +65,11 @@ public class GetImportJobStatusCommand extends Command {
|
|||||||
writer.key("message"); writer.value("No such import job");
|
writer.key("message"); writer.value("No such import job");
|
||||||
} else {
|
} else {
|
||||||
writer.key("code"); writer.value("ok");
|
writer.key("code"); writer.value("ok");
|
||||||
writer.key("state");
|
writer.key("job"); job.write(writer, new Properties());
|
||||||
if (job.state == State.NEW) {
|
|
||||||
writer.value("new");
|
|
||||||
} else if (job.state == State.RETRIEVING_DATA) {
|
|
||||||
writer.value("retrieving");
|
|
||||||
writer.key("progress"); writer.value(job.retrievingProgress);
|
|
||||||
writer.key("bytesSaved"); writer.value(job.bytesSaved);
|
|
||||||
} else if (job.state == State.READY) {
|
|
||||||
writer.value("ready");
|
|
||||||
} else if (job.state == State.ERROR) {
|
|
||||||
writer.value("error");
|
|
||||||
writer.key("message"); writer.value(job.errorMessage);
|
|
||||||
if (job.exception != null) {
|
|
||||||
StringWriter sw = new StringWriter();
|
|
||||||
PrintWriter pw = new PrintWriter(sw);
|
|
||||||
job.exception.printStackTrace(pw);
|
|
||||||
pw.flush();
|
|
||||||
sw.flush();
|
|
||||||
|
|
||||||
writer.key("stack"); writer.value(sw.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
writer.endObject();
|
writer.endObject();
|
||||||
} catch (JSONException e) {
|
} catch (JSONException e) {
|
||||||
throw new IOException(e);
|
throw new ServletException(e);
|
||||||
} finally {
|
} finally {
|
||||||
w.flush();
|
w.flush();
|
||||||
w.close();
|
w.close();
|
@ -1,49 +0,0 @@
|
|||||||
package com.google.refine.commands.importing;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
|
||||||
|
|
||||||
import com.google.refine.model.meta.ImportSource;
|
|
||||||
|
|
||||||
public class ImportJob {
|
|
||||||
static public enum State {
|
|
||||||
NEW,
|
|
||||||
RETRIEVING_DATA,
|
|
||||||
READY,
|
|
||||||
ERROR
|
|
||||||
}
|
|
||||||
|
|
||||||
final public long id;
|
|
||||||
final public File dir;
|
|
||||||
|
|
||||||
public long lastTouched;
|
|
||||||
public State state = State.NEW;
|
|
||||||
|
|
||||||
// Data for retrieving phase
|
|
||||||
public int retrievingProgress = 0; // from 0 to 100
|
|
||||||
public long bytesSaved = 0; // in case percentage is unknown
|
|
||||||
public String errorMessage;
|
|
||||||
public Throwable exception;
|
|
||||||
|
|
||||||
public ImportSource importSource;
|
|
||||||
|
|
||||||
public ImportJob(long id, File dir) {
|
|
||||||
this.id = id;
|
|
||||||
this.dir = dir;
|
|
||||||
|
|
||||||
dir.mkdirs();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void touch() {
|
|
||||||
lastTouched = System.currentTimeMillis();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void dispose() {
|
|
||||||
try {
|
|
||||||
FileUtils.deleteDirectory(dir);
|
|
||||||
} catch (IOException e) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,101 +0,0 @@
|
|||||||
package com.google.refine.commands.importing;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
|
||||||
|
|
||||||
import com.google.refine.RefineServlet;
|
|
||||||
import com.google.refine.model.meta.ImportSource;
|
|
||||||
|
|
||||||
public class ImportManager {
|
|
||||||
static final private Map<String, Class<? extends ImportSource>> nameToImportSourceClass =
|
|
||||||
new HashMap<String, Class<? extends ImportSource>>();
|
|
||||||
|
|
||||||
static final private Map<String, String> importSourceClassNameToName =
|
|
||||||
new HashMap<String, String>();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Register a single import source class.
|
|
||||||
*
|
|
||||||
* @param name importer verb for importer
|
|
||||||
* @param importerObject object implementing the importer
|
|
||||||
*
|
|
||||||
* @return true if importer was loaded and registered successfully
|
|
||||||
*/
|
|
||||||
static public boolean registerImportSourceClass(String name, Class<? extends ImportSource> klass) {
|
|
||||||
if (nameToImportSourceClass.containsKey(name)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
nameToImportSourceClass.put(name, klass);
|
|
||||||
importSourceClassNameToName.put(klass.getName(), name);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static public Class<? extends ImportSource> getImportSourceClass(String name) {
|
|
||||||
return nameToImportSourceClass.get(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
static public String getImportSourceClassName(Class<? extends ImportSource> klass) {
|
|
||||||
return importSourceClassNameToName.get(klass.getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
final private RefineServlet servlet;
|
|
||||||
final private Map<Long, ImportJob> jobs = new HashMap<Long, ImportJob>();
|
|
||||||
private File importDir;
|
|
||||||
|
|
||||||
static private ImportManager singleton;
|
|
||||||
|
|
||||||
static public void initialize(RefineServlet servlet) {
|
|
||||||
singleton = new ImportManager(servlet);
|
|
||||||
}
|
|
||||||
|
|
||||||
static public ImportManager singleton() {
|
|
||||||
return singleton;
|
|
||||||
}
|
|
||||||
|
|
||||||
private ImportManager(RefineServlet servlet) {
|
|
||||||
this.servlet = servlet;
|
|
||||||
}
|
|
||||||
|
|
||||||
private File getImportDir() {
|
|
||||||
if (importDir == null) {
|
|
||||||
File tempDir = servlet.getTempDir();
|
|
||||||
importDir = tempDir == null ? new File(".import-temp") : new File(tempDir, "import");
|
|
||||||
|
|
||||||
if (importDir.exists()) {
|
|
||||||
try {
|
|
||||||
// start fresh
|
|
||||||
FileUtils.deleteDirectory(importDir);
|
|
||||||
} catch (IOException e) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
importDir.mkdirs();
|
|
||||||
}
|
|
||||||
return importDir;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ImportJob createJob() {
|
|
||||||
long id = System.currentTimeMillis() + (long) (Math.random() * 1000000);
|
|
||||||
File jobDir = new File(getImportDir(), Long.toString(id));
|
|
||||||
|
|
||||||
ImportJob job = new ImportJob(id, jobDir);
|
|
||||||
jobs.put(id, job);
|
|
||||||
|
|
||||||
return job;
|
|
||||||
}
|
|
||||||
|
|
||||||
public ImportJob getJob(long id) {
|
|
||||||
return jobs.get(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void disposeJob(long id) {
|
|
||||||
ImportJob job = getJob(id);
|
|
||||||
if (job != null) {
|
|
||||||
job.dispose();
|
|
||||||
jobs.remove(id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -44,18 +44,40 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.refine.commands.Command;
|
import com.google.refine.commands.Command;
|
||||||
import com.google.refine.commands.importing.ImportJob.State;
|
import com.google.refine.commands.HttpUtilities;
|
||||||
import com.google.refine.model.meta.ImportSource;
|
import com.google.refine.importing.ImportingController;
|
||||||
|
import com.google.refine.importing.ImportingManager;
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
public class RetrieveImportContentCommand extends Command {
|
public class ImportingControllerCommand extends Command {
|
||||||
|
|
||||||
final static Logger logger = LoggerFactory.getLogger("retrieve-import-content_command");
|
final static Logger logger = LoggerFactory.getLogger("importing-controller_command");
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
throws ServletException, IOException {
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
ImportingController controller = getController(request);
|
||||||
|
if (controller != null) {
|
||||||
|
controller.doPost(request, response);
|
||||||
|
} else {
|
||||||
|
HttpUtilities.respond(response, "error", "No such import controller");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void doGet(HttpServletRequest request, HttpServletResponse response)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
ImportingController controller = getController(request);
|
||||||
|
if (controller != null) {
|
||||||
|
controller.doPost(request, response);
|
||||||
|
} else {
|
||||||
|
HttpUtilities.respond(response, "error", "No such import controller");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private ImportingController getController(HttpServletRequest request) {
|
||||||
/*
|
/*
|
||||||
* The uploaded file is in the POST body as a "file part". If
|
* The uploaded file is in the POST body as a "file part". If
|
||||||
* we call request.getParameter() then the POST body will get
|
* we call request.getParameter() then the POST body will get
|
||||||
@ -64,39 +86,10 @@ public class RetrieveImportContentCommand extends Command {
|
|||||||
* Don't call request.getParameter() before calling internalImport().
|
* Don't call request.getParameter() before calling internalImport().
|
||||||
*/
|
*/
|
||||||
Properties options = ParsingUtilities.parseUrlParameters(request);
|
Properties options = ParsingUtilities.parseUrlParameters(request);
|
||||||
|
String name = options.getProperty("controller");
|
||||||
long jobID = Long.parseLong(options.getProperty("jobID"));
|
if (name != null) {
|
||||||
ImportJob job = ImportManager.singleton().getJob(jobID);
|
return ImportingManager.controllers.get(name);
|
||||||
if (job == null) {
|
|
||||||
respondWithErrorPage(request, response, "No such import job", null);
|
|
||||||
return;
|
|
||||||
} else if (job.state != State.NEW) {
|
|
||||||
respondWithErrorPage(request, response, "Import job already started", null);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
Class<? extends ImportSource> importSourceClass =
|
|
||||||
ImportManager.getImportSourceClass(options.getProperty("source"));
|
|
||||||
if (importSourceClass == null) {
|
|
||||||
respondWithErrorPage(request, response, "No such import source class", null);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
ImportSource importSource = importSourceClass.newInstance();
|
|
||||||
job.importSource = importSource;
|
|
||||||
job.state = State.RETRIEVING_DATA;
|
|
||||||
|
|
||||||
importSource.retrieveContent(request, options, job);
|
|
||||||
|
|
||||||
job.retrievingProgress = 100;
|
|
||||||
job.state = State.READY;
|
|
||||||
} catch (Throwable e) {e.printStackTrace();
|
|
||||||
job.state = State.ERROR;
|
|
||||||
job.errorMessage = e.getLocalizedMessage();
|
|
||||||
job.exception = e;
|
|
||||||
|
|
||||||
respondWithErrorPage(request, response, "Failed to kick start import job", e);
|
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
|
|
||||||
Copyright 2010,2011. Google Inc.
|
Copyright 2010, Google Inc.
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -33,60 +33,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.commands.project;
|
package com.google.refine.commands.project;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FilterInputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.net.URI;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.net.URLConnection;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import java.util.zip.GZIPInputStream;
|
|
||||||
import java.util.zip.ZipEntry;
|
|
||||||
import java.util.zip.ZipInputStream;
|
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
import javax.servlet.ServletException;
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
import javax.servlet.http.HttpServletResponse;
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
import org.apache.commons.fileupload.FileItemIterator;
|
|
||||||
import org.apache.commons.fileupload.FileItemStream;
|
|
||||||
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
|
||||||
import org.apache.commons.fileupload.util.Streams;
|
|
||||||
import org.apache.tools.bzip2.CBZip2InputStream;
|
|
||||||
import org.apache.tools.tar.TarEntry;
|
|
||||||
import org.apache.tools.tar.TarInputStream;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.refine.ProjectManager;
|
import com.google.refine.ProjectManager;
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
import com.google.refine.commands.Command;
|
import com.google.refine.commands.Command;
|
||||||
import com.google.refine.importers.Importer;
|
import com.google.refine.commands.HttpUtilities;
|
||||||
import com.google.refine.importers.ImporterRegistry;
|
|
||||||
import com.google.refine.importers.ReaderImporter;
|
|
||||||
import com.google.refine.importers.StreamImporter;
|
|
||||||
import com.google.refine.importers.TsvCsvImporter;
|
|
||||||
import com.google.refine.importers.UrlImporter;
|
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.util.IOUtils;
|
|
||||||
import com.google.refine.util.ParsingUtilities;
|
import com.google.refine.util.ParsingUtilities;
|
||||||
import com.ibm.icu.text.CharsetDetector;
|
|
||||||
import com.ibm.icu.text.CharsetMatch;
|
|
||||||
|
|
||||||
public class CreateProjectCommand extends Command {
|
public class CreateProjectCommand extends Command {
|
||||||
|
|
||||||
@ -99,13 +61,6 @@ public class CreateProjectCommand extends Command {
|
|||||||
ProjectManager.singleton.setBusy(true);
|
ProjectManager.singleton.setBusy(true);
|
||||||
try {
|
try {
|
||||||
|
|
||||||
/*
|
|
||||||
* Set UTF-8 as request encoding, then ServletFileUpload will use it as default encoding
|
|
||||||
*/
|
|
||||||
if (request.getCharacterEncoding() == null) {
|
|
||||||
request.setCharacterEncoding("UTF-8");
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The uploaded file is in the POST body as a "file part". If
|
* The uploaded file is in the POST body as a "file part". If
|
||||||
* we call request.getParameter() then the POST body will get
|
* we call request.getParameter() then the POST body will get
|
||||||
@ -118,7 +73,7 @@ public class CreateProjectCommand extends Command {
|
|||||||
Project project = new Project();
|
Project project = new Project();
|
||||||
ProjectMetadata pm = new ProjectMetadata();
|
ProjectMetadata pm = new ProjectMetadata();
|
||||||
|
|
||||||
internalImport(request, project, pm, options);
|
//internalImport(request, project, pm, options);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The import process above populates options with parameters
|
* The import process above populates options with parameters
|
||||||
@ -133,382 +88,11 @@ public class CreateProjectCommand extends Command {
|
|||||||
|
|
||||||
project.update();
|
project.update();
|
||||||
|
|
||||||
redirect(response, "/project?project=" + project.id);
|
HttpUtilities.redirect(response, "/project?project=" + project.id);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
respondWithErrorPage(request, response, "Failed to import file", e);
|
respondWithErrorPage(request, response, "Failed to import file", e);
|
||||||
} finally {
|
} finally {
|
||||||
ProjectManager.singleton.setBusy(false);
|
ProjectManager.singleton.setBusy(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void internalImport(
|
|
||||||
HttpServletRequest request,
|
|
||||||
Project project,
|
|
||||||
ProjectMetadata metadata,
|
|
||||||
Properties options
|
|
||||||
) throws Exception {
|
|
||||||
|
|
||||||
ServletFileUpload upload = new ServletFileUpload();
|
|
||||||
String url = options.getProperty("url");
|
|
||||||
boolean imported = false;
|
|
||||||
|
|
||||||
FileItemIterator iter = upload.getItemIterator(request);
|
|
||||||
while (iter.hasNext()) {
|
|
||||||
FileItemStream item = iter.next();
|
|
||||||
String name = item.getFieldName().toLowerCase();
|
|
||||||
InputStream stream = item.openStream();
|
|
||||||
if (item.isFormField()) {
|
|
||||||
if (name.equals("raw-text")) {
|
|
||||||
Reader reader = new InputStreamReader(stream,request.getCharacterEncoding());
|
|
||||||
try {
|
|
||||||
internalInvokeImporter(project, new TsvCsvImporter(), metadata, options, reader);
|
|
||||||
imported = true;
|
|
||||||
} finally {
|
|
||||||
reader.close();
|
|
||||||
}
|
|
||||||
} else if (name.equals("project-url")) {
|
|
||||||
url = Streams.asString(stream, request.getCharacterEncoding());
|
|
||||||
} else {
|
|
||||||
options.put(name, Streams.asString(stream, request.getCharacterEncoding()));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
String fileName = item.getName().toLowerCase();
|
|
||||||
if (fileName.length() > 0) {
|
|
||||||
try {
|
|
||||||
internalImportFile(project, metadata, options, fileName, stream);
|
|
||||||
imported = true;
|
|
||||||
} finally {
|
|
||||||
stream.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!imported && url != null && url.length() > 0) {
|
|
||||||
internalImportURL(request, project, metadata, options, url);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static class SafeInputStream extends FilterInputStream {
|
|
||||||
public SafeInputStream(InputStream stream) {
|
|
||||||
super(stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
// some libraries attempt to close the input stream while they can't
|
|
||||||
// read anymore from it... unfortunately this behavior prevents
|
|
||||||
// the zip input stream from functioning correctly so we just have
|
|
||||||
// to ignore those close() calls and just close it ourselves
|
|
||||||
// forcefully later
|
|
||||||
}
|
|
||||||
|
|
||||||
public void reallyClose() throws IOException {
|
|
||||||
super.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void internalImportFile(
|
|
||||||
Project project,
|
|
||||||
ProjectMetadata metadata,
|
|
||||||
Properties options,
|
|
||||||
String fileName,
|
|
||||||
InputStream inputStream
|
|
||||||
) throws Exception {
|
|
||||||
|
|
||||||
logger.info("Importing '{}'", fileName);
|
|
||||||
|
|
||||||
if (fileName.endsWith(".zip") || fileName.endsWith(".tar") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz") || fileName.endsWith(".tar.bz2")) {
|
|
||||||
|
|
||||||
// first, save the file on disk, since we need two passes and we might
|
|
||||||
// not have enough memory to keep it all in there
|
|
||||||
File file = save(inputStream);
|
|
||||||
|
|
||||||
// in the first pass, gather statistics about what files are in there
|
|
||||||
// unfortunately, we have to rely on files extensions, which is horrible but
|
|
||||||
// better than nothing
|
|
||||||
HashMap<String,Integer> ext_map = new HashMap<String,Integer>();
|
|
||||||
|
|
||||||
FileInputStream fis = new FileInputStream(file);
|
|
||||||
InputStream is = getStream(fileName, fis);
|
|
||||||
|
|
||||||
// NOTE(SM): unfortunately, java.io does not provide any generalized class for
|
|
||||||
// archive-like input streams so while both TarInputStream and ZipInputStream
|
|
||||||
// behave precisely the same, there is no polymorphic behavior so we have
|
|
||||||
// to treat each instance explicitly... one of those times you wish you had
|
|
||||||
// closures
|
|
||||||
try {
|
|
||||||
if (is instanceof TarInputStream) {
|
|
||||||
TarInputStream tis = (TarInputStream) is;
|
|
||||||
TarEntry te;
|
|
||||||
while ((te = tis.getNextEntry()) != null) {
|
|
||||||
if (!te.isDirectory()) {
|
|
||||||
mapExtension(te.getName(),ext_map);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (is instanceof ZipInputStream) {
|
|
||||||
ZipInputStream zis = (ZipInputStream) is;
|
|
||||||
ZipEntry ze;
|
|
||||||
while ((ze = zis.getNextEntry()) != null) {
|
|
||||||
if (!ze.isDirectory()) {
|
|
||||||
mapExtension(ze.getName(),ext_map);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
try {
|
|
||||||
is.close();
|
|
||||||
fis.close();
|
|
||||||
} catch (IOException e) {}
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort extensions by how often they appear
|
|
||||||
List<Entry<String,Integer>> values = new ArrayList<Entry<String,Integer>>(ext_map.entrySet());
|
|
||||||
Collections.sort(values, new ValuesComparator());
|
|
||||||
|
|
||||||
if (values.size() == 0) {
|
|
||||||
throw new RuntimeException("The archive contains no files.");
|
|
||||||
}
|
|
||||||
|
|
||||||
// this will contain the set of extensions we'll load from the archive
|
|
||||||
HashSet<String> exts = new HashSet<String>();
|
|
||||||
|
|
||||||
// find the extension that is most frequent or those who share the highest frequency value
|
|
||||||
if (values.size() == 1) {
|
|
||||||
exts.add(values.get(0).getKey());
|
|
||||||
} else {
|
|
||||||
Entry<String,Integer> most_frequent = values.get(0);
|
|
||||||
Entry<String,Integer> second_most_frequent = values.get(1);
|
|
||||||
if (most_frequent.getValue() > second_most_frequent.getValue()) { // we have a winner
|
|
||||||
exts.add(most_frequent.getKey());
|
|
||||||
} else { // multiple extensions have the same frequency
|
|
||||||
int winning_frequency = most_frequent.getValue();
|
|
||||||
for (Entry<String,Integer> e : values) {
|
|
||||||
if (e.getValue() == winning_frequency) {
|
|
||||||
exts.add(e.getKey());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info("Most frequent extensions: {}", exts.toString());
|
|
||||||
|
|
||||||
// second pass, load the data for real
|
|
||||||
is = getStream(fileName, new FileInputStream(file));
|
|
||||||
SafeInputStream sis = new SafeInputStream(is);
|
|
||||||
try {
|
|
||||||
if (is instanceof TarInputStream) {
|
|
||||||
TarInputStream tis = (TarInputStream) is;
|
|
||||||
TarEntry te;
|
|
||||||
while ((te = tis.getNextEntry()) != null) {
|
|
||||||
if (!te.isDirectory()) {
|
|
||||||
String name = te.getName();
|
|
||||||
String ext = getExtension(name)[1];
|
|
||||||
if (exts.contains(ext)) {
|
|
||||||
internalImportFile(project, metadata, options, name, sis);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (is instanceof ZipInputStream) {
|
|
||||||
ZipInputStream zis = (ZipInputStream) is;
|
|
||||||
ZipEntry ze;
|
|
||||||
while ((ze = zis.getNextEntry()) != null) {
|
|
||||||
if (!ze.isDirectory()) {
|
|
||||||
String name = ze.getName();
|
|
||||||
String ext = getExtension(name)[1];
|
|
||||||
if (exts.contains(ext)) {
|
|
||||||
internalImportFile(project, metadata, options, name, sis);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
try {
|
|
||||||
sis.reallyClose();
|
|
||||||
} catch (IOException e) {}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else if (fileName.endsWith(".gz")) {
|
|
||||||
internalImportFile(project, metadata, options, getExtension(fileName)[0], new GZIPInputStream(inputStream));
|
|
||||||
} else if (fileName.endsWith(".bz2")) {
|
|
||||||
internalImportFile(project, metadata, options, getExtension(fileName)[0], new CBZip2InputStream(inputStream));
|
|
||||||
} else {
|
|
||||||
load(project, metadata, options, fileName, inputStream);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class ValuesComparator implements Comparator<Entry<String,Integer>>, Serializable {
|
|
||||||
private static final long serialVersionUID = 8845863616149837657L;
|
|
||||||
|
|
||||||
public int compare(Entry<String,Integer> o1, Entry<String,Integer> o2) {
|
|
||||||
return o2.getValue() - o1.getValue();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void load(Project project, ProjectMetadata metadata, Properties options, String fileName, InputStream inputStream) throws Exception {
|
|
||||||
Importer importer = ImporterRegistry.guessImporter(null, fileName);
|
|
||||||
internalInvokeImporter(project, importer, metadata, options, inputStream, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
private File save(InputStream is) throws IOException {
|
|
||||||
File temp = this.servlet.getTempFile(Long.toString(System.currentTimeMillis()));
|
|
||||||
temp.deleteOnExit();
|
|
||||||
IOUtils.copy(is,temp);
|
|
||||||
is.close();
|
|
||||||
return temp;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void mapExtension(String name, Map<String,Integer> ext_map) {
|
|
||||||
String ext = getExtension(name)[1];
|
|
||||||
if (ext_map.containsKey(ext)) {
|
|
||||||
ext_map.put(ext, ext_map.get(ext) + 1);
|
|
||||||
} else {
|
|
||||||
ext_map.put(ext, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private InputStream getStream(String fileName, InputStream is) throws IOException {
|
|
||||||
if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
|
|
||||||
return new TarInputStream(new GZIPInputStream(is));
|
|
||||||
} else if (fileName.endsWith(".tar.bz2")) {
|
|
||||||
return new TarInputStream(new CBZip2InputStream(is));
|
|
||||||
} else if (fileName.endsWith(".tar")) {
|
|
||||||
return new TarInputStream(is);
|
|
||||||
} else {
|
|
||||||
return new ZipInputStream(is);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private String[] getExtension(String filename) {
|
|
||||||
String[] result = new String[2];
|
|
||||||
int ext_index = filename.lastIndexOf('.');
|
|
||||||
result[0] = (ext_index == -1) ? filename : filename.substring(0,ext_index);
|
|
||||||
result[1] = (ext_index == -1) ? "" : filename.substring(ext_index + 1);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void internalImportURL(
|
|
||||||
HttpServletRequest request,
|
|
||||||
Project project,
|
|
||||||
ProjectMetadata metadata,
|
|
||||||
Properties options,
|
|
||||||
String urlString) throws Exception {
|
|
||||||
|
|
||||||
// Little dance to get URL properly encoded (e.g. for funky Fusion Tables queries)
|
|
||||||
URL url = new URL(urlString);
|
|
||||||
url = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), null).toURL();
|
|
||||||
|
|
||||||
URLConnection connection = null;
|
|
||||||
|
|
||||||
// Try for a URL importer first
|
|
||||||
Importer importer = ImporterRegistry.guessUrlImporter(url);
|
|
||||||
if (importer instanceof UrlImporter) {
|
|
||||||
((UrlImporter) importer).read(url, project, metadata, options);
|
|
||||||
} else {
|
|
||||||
// If we couldn't find one, try opening URL and treating as a stream
|
|
||||||
try {
|
|
||||||
connection = url.openConnection();
|
|
||||||
connection.setConnectTimeout(5000);
|
|
||||||
connection.connect();
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new Exception("Cannot connect to " + urlString, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
InputStream inputStream = null;
|
|
||||||
try {
|
|
||||||
inputStream = connection.getInputStream();
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new Exception("Cannot retrieve content from " + url, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
String contentType = connection.getContentType();
|
|
||||||
int semicolon = contentType.indexOf(';');
|
|
||||||
if (semicolon >= 0) {
|
|
||||||
contentType = contentType.substring(0, semicolon);
|
|
||||||
}
|
|
||||||
|
|
||||||
importer = ImporterRegistry.guessImporter(contentType, url.getPath());
|
|
||||||
|
|
||||||
internalInvokeImporter(project, importer, metadata, options, inputStream, connection.getContentEncoding());
|
|
||||||
} finally {
|
|
||||||
inputStream.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void internalInvokeImporter(
|
|
||||||
Project project,
|
|
||||||
Importer importer,
|
|
||||||
ProjectMetadata metadata,
|
|
||||||
Properties options,
|
|
||||||
InputStream rawInputStream,
|
|
||||||
String encoding
|
|
||||||
) throws Exception {
|
|
||||||
if (importer instanceof ReaderImporter) {
|
|
||||||
|
|
||||||
// NOTE: The ICU4J char detection code requires the input stream to support mark/reset.
|
|
||||||
InputStream inputStream = rawInputStream;
|
|
||||||
if (!inputStream.markSupported()) {
|
|
||||||
inputStream = new BufferedInputStream(rawInputStream);
|
|
||||||
}
|
|
||||||
|
|
||||||
CharsetDetector detector = new CharsetDetector();
|
|
||||||
detector.setDeclaredEncoding("utf8"); // most of the content on the web is encoded in UTF-8 so start with that
|
|
||||||
options.setProperty("encoding_confidence", "0"); // in case we don't find anything suitable
|
|
||||||
|
|
||||||
InputStreamReader reader = null;
|
|
||||||
CharsetMatch[] charsetMatches = detector.setText(inputStream).detectAll();
|
|
||||||
for (CharsetMatch charsetMatch : charsetMatches) { // matches are ordered - first is best match
|
|
||||||
String matchName = charsetMatch.getName();
|
|
||||||
int confidence = charsetMatch.getConfidence();
|
|
||||||
// Threshold was 50. Do we ever want to not use our best guess even if it's low confidence? - tfmorris
|
|
||||||
if (confidence >= 20) {
|
|
||||||
logger.info("Encoding guess: {} [confidence: {}]", matchName, confidence);
|
|
||||||
try {
|
|
||||||
reader = new InputStreamReader(inputStream, matchName);
|
|
||||||
} catch (UnsupportedEncodingException e) {
|
|
||||||
logger.debug("Unsupported InputStreamReader charset encoding: {} [confidence: {}]; skipping", matchName, confidence);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Encoding will be set later at common exit point
|
|
||||||
options.setProperty("encoding_confidence", Integer.toString(confidence));
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
logger.debug("Poor encoding guess: {} [confidence: {}]; skipping", matchName, confidence);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (reader == null) { // when all else fails
|
|
||||||
if (encoding != null) {
|
|
||||||
reader = new InputStreamReader(inputStream, encoding);
|
|
||||||
} else {
|
|
||||||
reader = new InputStreamReader(inputStream);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Get the actual encoding which will be used and save it for project metadata
|
|
||||||
options.setProperty("encoding", reader.getEncoding());
|
|
||||||
|
|
||||||
((ReaderImporter) importer).read(reader, project, metadata, options);
|
|
||||||
} else {
|
|
||||||
// TODO: How do we set character encoding here?
|
|
||||||
// Things won't work right if it's not set, so pick some arbitrary values
|
|
||||||
if (encoding != null) {
|
|
||||||
options.setProperty("encoding", encoding);
|
|
||||||
}
|
|
||||||
options.setProperty("encoding_confidence", "0");
|
|
||||||
((StreamImporter) importer).read(rawInputStream, project, metadata, options);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void internalInvokeImporter(
|
|
||||||
Project project,
|
|
||||||
ReaderImporter importer,
|
|
||||||
ProjectMetadata metadata,
|
|
||||||
Properties options,
|
|
||||||
Reader reader
|
|
||||||
) throws Exception {
|
|
||||||
importer.read(reader, project, metadata, options);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -44,17 +44,43 @@ import org.json.JSONException;
|
|||||||
import org.json.JSONWriter;
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
import com.google.refine.commands.Command;
|
import com.google.refine.commands.Command;
|
||||||
|
import com.google.refine.commands.HttpUtilities;
|
||||||
import com.google.refine.expr.MetaParser;
|
import com.google.refine.expr.MetaParser;
|
||||||
import com.google.refine.expr.MetaParser.LanguageInfo;
|
import com.google.refine.expr.MetaParser.LanguageInfo;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingManager;
|
||||||
import com.google.refine.model.OverlayModel;
|
import com.google.refine.model.OverlayModel;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
|
||||||
public class GetModelsCommand extends Command {
|
public class GetModelsCommand extends Command {
|
||||||
@Override
|
@Override
|
||||||
public void doGet(HttpServletRequest request, HttpServletResponse response)
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
internalRespond(request, response);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
|
||||||
|
internalRespond(request, response);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void internalRespond(HttpServletRequest request, HttpServletResponse response)
|
||||||
throws ServletException, IOException {
|
throws ServletException, IOException {
|
||||||
|
|
||||||
Project project = getProject(request);
|
Project project = null;
|
||||||
|
|
||||||
|
// This command also supports retrieving rows for an importing job.
|
||||||
|
String importingJobID = request.getParameter("importingJobID");
|
||||||
|
if (importingJobID != null) {
|
||||||
|
long jobID = Long.parseLong(importingJobID);
|
||||||
|
ImportingJob job = ImportingManager.getJob(jobID);
|
||||||
|
if (job != null) {
|
||||||
|
project = job.project;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (project == null) {
|
||||||
|
project = getProject(request);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
response.setCharacterEncoding("UTF-8");
|
response.setCharacterEncoding("UTF-8");
|
||||||
@ -92,7 +118,7 @@ public class GetModelsCommand extends Command {
|
|||||||
|
|
||||||
writer.endObject();
|
writer.endObject();
|
||||||
} catch (JSONException e) {
|
} catch (JSONException e) {
|
||||||
respondException(response, e);
|
HttpUtilities.respondException(response, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,6 +52,8 @@ import com.google.refine.browsing.RecordVisitor;
|
|||||||
import com.google.refine.browsing.RowVisitor;
|
import com.google.refine.browsing.RowVisitor;
|
||||||
import com.google.refine.browsing.Engine.Mode;
|
import com.google.refine.browsing.Engine.Mode;
|
||||||
import com.google.refine.commands.Command;
|
import com.google.refine.commands.Command;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingManager;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Record;
|
import com.google.refine.model.Record;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
@ -77,7 +79,21 @@ public class GetRowsCommand extends Command {
|
|||||||
throws ServletException, IOException {
|
throws ServletException, IOException {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Project project = getProject(request);
|
Project project = null;
|
||||||
|
|
||||||
|
// This command also supports retrieving rows for an importing job.
|
||||||
|
String importingJobID = request.getParameter("importingJobID");
|
||||||
|
if (importingJobID != null) {
|
||||||
|
long jobID = Long.parseLong(importingJobID);
|
||||||
|
ImportingJob job = ImportingManager.getJob(jobID);
|
||||||
|
if (job != null) {
|
||||||
|
project = job.project;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (project == null) {
|
||||||
|
project = getProject(request);
|
||||||
|
}
|
||||||
|
|
||||||
Engine engine = getEngine(request, project);
|
Engine engine = getEngine(request, project);
|
||||||
String callback = request.getParameter("callback");
|
String callback = request.getParameter("callback");
|
||||||
|
|
||||||
|
@ -33,16 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.importers;
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.poi.common.usermodel.Hyperlink;
|
import org.apache.poi.common.usermodel.Hyperlink;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
|
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
|
||||||
@ -51,184 +50,152 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
|||||||
import org.apache.poi.ss.usermodel.Sheet;
|
import org.apache.poi.ss.usermodel.Sheet;
|
||||||
import org.apache.poi.ss.usermodel.Workbook;
|
import org.apache.poi.ss.usermodel.Workbook;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingUtilities;
|
||||||
import com.google.refine.model.Cell;
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Column;
|
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Recon;
|
import com.google.refine.model.Recon;
|
||||||
import com.google.refine.model.ReconCandidate;
|
import com.google.refine.model.ReconCandidate;
|
||||||
import com.google.refine.model.Row;
|
|
||||||
import com.google.refine.model.Recon.Judgment;
|
import com.google.refine.model.Recon.Judgment;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
public class ExcelImporter implements StreamImporter {
|
public class ExcelImporter extends TabularImportingParserBase {
|
||||||
protected boolean _xmlBased;
|
public ExcelImporter() {
|
||||||
|
super(true);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void read(InputStream inputStream, Project project, ProjectMetadata metadata, Properties options) throws ImportException {
|
public JSONObject createParserUIInitializationData(
|
||||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||||
int limit = ImporterUtilities.getIntegerOption("limit", options, -1);
|
|
||||||
int skip = ImporterUtilities.getIntegerOption("skip", options, 0);
|
|
||||||
|
|
||||||
|
boolean xmlBased = "text/xml/xlsx".equals(format);
|
||||||
|
JSONUtilities.safePut(options, "xmlBased", xmlBased);
|
||||||
|
|
||||||
|
JSONArray sheetRecords = new JSONArray();
|
||||||
|
JSONUtilities.safePut(options, "sheetRecords", sheetRecords);
|
||||||
|
try {
|
||||||
|
JSONObject firstFileRecord = fileRecords.get(0);
|
||||||
|
File file = ImportingUtilities.getFile(job, firstFileRecord);
|
||||||
|
InputStream is = new FileInputStream(file);
|
||||||
|
try {
|
||||||
|
Workbook wb = xmlBased ?
|
||||||
|
new XSSFWorkbook(is) :
|
||||||
|
new HSSFWorkbook(new POIFSFileSystem(is));
|
||||||
|
|
||||||
|
int sheetCount = wb.getNumberOfSheets();
|
||||||
|
boolean hasData = false;
|
||||||
|
for (int i = 0; i < sheetCount; i++) {
|
||||||
|
Sheet sheet = wb.getSheetAt(i);
|
||||||
|
int rows = sheet.getLastRowNum() - sheet.getFirstRowNum() + 1;
|
||||||
|
|
||||||
|
JSONObject sheetRecord = new JSONObject();
|
||||||
|
JSONUtilities.safePut(sheetRecord, "name", sheet.getSheetName());
|
||||||
|
JSONUtilities.safePut(sheetRecord, "rows", rows);
|
||||||
|
if (hasData) {
|
||||||
|
JSONUtilities.safePut(sheetRecord, "selected", false);
|
||||||
|
} else if (rows > 1) {
|
||||||
|
JSONUtilities.safePut(sheetRecord, "selected", true);
|
||||||
|
hasData = true;
|
||||||
|
}
|
||||||
|
JSONUtilities.append(sheetRecords, sheetRecord);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
is.close();
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
String fileSource,
|
||||||
|
InputStream inputStream,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) {
|
||||||
|
boolean xmlBased = JSONUtilities.getBoolean(options, "xmlBased", false);
|
||||||
Workbook wb = null;
|
Workbook wb = null;
|
||||||
try {
|
try {
|
||||||
wb = _xmlBased ?
|
wb = xmlBased ?
|
||||||
new XSSFWorkbook(inputStream) :
|
new XSSFWorkbook(inputStream) :
|
||||||
new HSSFWorkbook(new POIFSFileSystem(inputStream));
|
new HSSFWorkbook(new POIFSFileSystem(inputStream));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new ImportException(
|
exceptions.add(new ImportException(
|
||||||
"Attempted to parse as an Excel file but failed. " +
|
"Attempted to parse as an Excel file but failed. " +
|
||||||
"Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.",
|
"Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.",
|
||||||
e
|
e
|
||||||
);
|
));
|
||||||
|
return;
|
||||||
} catch (ArrayIndexOutOfBoundsException e){
|
} catch (ArrayIndexOutOfBoundsException e){
|
||||||
throw new ImportException(
|
exceptions.add(new ImportException(
|
||||||
"Attempted to parse file as an Excel file but failed. " +
|
"Attempted to parse file as an Excel file but failed. " +
|
||||||
"This is probably caused by a corrupt excel file, or due to the file having previously been created or saved by a non-Microsoft application. " +
|
"This is probably caused by a corrupt excel file, or due to the file having previously been created or saved by a non-Microsoft application. " +
|
||||||
"Please try opening the file in Microsoft Excel and resaving it, then try re-uploading the file. " +
|
"Please try opening the file in Microsoft Excel and resaving it, then try re-uploading the file. " +
|
||||||
"See https://issues.apache.org/bugzilla/show_bug.cgi?id=48261 for further details",
|
"See https://issues.apache.org/bugzilla/show_bug.cgi?id=48261 for further details",
|
||||||
e);
|
e
|
||||||
|
));
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Sheet sheet = wb.getSheetAt(0);
|
int[] sheets = JSONUtilities.getIntArray(options, "sheets");
|
||||||
|
for (int sheetIndex : sheets) {
|
||||||
|
final Sheet sheet = wb.getSheetAt(sheetIndex);
|
||||||
|
final int lastRow = sheet.getLastRowNum();
|
||||||
|
|
||||||
int firstRow = sheet.getFirstRowNum();
|
TableDataReader dataReader = new TableDataReader() {
|
||||||
int lastRow = sheet.getLastRowNum();
|
int nextRow = 0;
|
||||||
|
|
||||||
List<String> columnNames = new ArrayList<String>();
|
|
||||||
Set<String> columnNameSet = new HashSet<String>();
|
|
||||||
Map<String, Integer> columnRootNameToIndex = new HashMap<String, Integer>();
|
|
||||||
|
|
||||||
int rowsWithData = 0;
|
|
||||||
Map<String, Recon> reconMap = new HashMap<String, Recon>();
|
Map<String, Recon> reconMap = new HashMap<String, Recon>();
|
||||||
|
|
||||||
for (int r = firstRow; r <= lastRow; r++) {
|
@Override
|
||||||
org.apache.poi.ss.usermodel.Row row = sheet.getRow(r);
|
public List<Object> getNextRowOfCells() throws IOException {
|
||||||
if (row == null) {
|
if (nextRow >= lastRow) {
|
||||||
continue;
|
return null;
|
||||||
} else if (ignoreLines > 0) {
|
|
||||||
ignoreLines--;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
short firstCell = row.getFirstCellNum();
|
List<Object> cells = new ArrayList<Object>();
|
||||||
|
org.apache.poi.ss.usermodel.Row row = sheet.getRow(nextRow++);
|
||||||
|
if (row != null) {
|
||||||
short lastCell = row.getLastCellNum();
|
short lastCell = row.getLastCellNum();
|
||||||
if (firstCell < 0 || firstCell > lastCell) {
|
for (short cellIndex = 0; cellIndex <= lastCell; cellIndex++) {
|
||||||
continue;
|
Cell cell = null;
|
||||||
|
|
||||||
|
org.apache.poi.ss.usermodel.Cell sourceCell = row.getCell(cellIndex);
|
||||||
|
if (sourceCell != null) {
|
||||||
|
cell = extractCell(sourceCell, reconMap);
|
||||||
}
|
}
|
||||||
|
cells.add(cell);
|
||||||
/*
|
|
||||||
* Still processing header lines
|
|
||||||
*/
|
|
||||||
if (headerLines > 0) {
|
|
||||||
headerLines--;
|
|
||||||
|
|
||||||
for (int c = firstCell; c <= lastCell; c++) {
|
|
||||||
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
|
|
||||||
if (cell != null) {
|
|
||||||
Serializable value = extractCell(cell);
|
|
||||||
String text = value != null ? value.toString() : null;
|
|
||||||
if (text != null && text.length() > 0) {
|
|
||||||
while (columnNames.size() < c + 1) {
|
|
||||||
columnNames.add(null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
String existingName = columnNames.get(c);
|
|
||||||
String name = (existingName == null) ? text : (existingName + " " + text);
|
|
||||||
|
|
||||||
columnNames.set(c, name);
|
|
||||||
}
|
}
|
||||||
|
return cells;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
readTable(
|
||||||
|
project,
|
||||||
|
metadata,
|
||||||
|
job,
|
||||||
|
dataReader,
|
||||||
|
fileSource + "#" + sheet.getSheetName(),
|
||||||
|
limit,
|
||||||
|
options,
|
||||||
|
exceptions
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (headerLines == 0) {
|
static protected Serializable extractCell(org.apache.poi.ss.usermodel.Cell cell) {
|
||||||
for (int i = 0; i < columnNames.size(); i++) {
|
|
||||||
String rootName = columnNames.get(i);
|
|
||||||
if (rootName == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
setUnduplicatedColumnName(rootName, columnNames, i, columnNameSet, columnRootNameToIndex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Processing data rows
|
|
||||||
*/
|
|
||||||
} else {
|
|
||||||
Row newRow = new Row(columnNames.size());
|
|
||||||
boolean hasData = false;
|
|
||||||
|
|
||||||
for (int c = firstCell; c <= lastCell; c++) {
|
|
||||||
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
|
|
||||||
if (cell == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
Cell ourCell = extractCell(cell, reconMap);
|
|
||||||
if (ourCell != null) {
|
|
||||||
while (columnNames.size() < c + 1) {
|
|
||||||
columnNames.add(null);
|
|
||||||
}
|
|
||||||
if (columnNames.get(c) == null) {
|
|
||||||
setUnduplicatedColumnName("Column", columnNames, c, columnNameSet, columnRootNameToIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
newRow.setCell(c, ourCell);
|
|
||||||
hasData = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasData) {
|
|
||||||
rowsWithData++;
|
|
||||||
|
|
||||||
if (skip <= 0 || rowsWithData > skip) {
|
|
||||||
project.rows.add(newRow);
|
|
||||||
project.columnModel.setMaxCellIndex(newRow.cells.size());
|
|
||||||
|
|
||||||
if (limit > 0 && project.rows.size() >= limit) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Create columns
|
|
||||||
*/
|
|
||||||
for (int c = 0; c < columnNames.size(); c++) {
|
|
||||||
String name = columnNames.get(c);
|
|
||||||
if (name != null) {
|
|
||||||
Column column = new Column(c, name);
|
|
||||||
project.columnModel.columns.add(column);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setUnduplicatedColumnName(
|
|
||||||
String rootName, List<String> columnNames, int index, Set<String> columnNameSet, Map<String, Integer> columnRootNameToIndex) {
|
|
||||||
if (columnNameSet.contains(rootName)) {
|
|
||||||
int startIndex = columnRootNameToIndex.containsKey(rootName) ? columnRootNameToIndex.get(rootName) : 2;
|
|
||||||
while (true) {
|
|
||||||
String name = rootName + " " + startIndex;
|
|
||||||
if (columnNameSet.contains(name)) {
|
|
||||||
startIndex++;
|
|
||||||
} else {
|
|
||||||
columnNames.set(index, name);
|
|
||||||
columnNameSet.add(name);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
columnRootNameToIndex.put(rootName, startIndex + 1);
|
|
||||||
} else {
|
|
||||||
columnNames.set(index, rootName);
|
|
||||||
columnNameSet.add(rootName);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected Serializable extractCell(org.apache.poi.ss.usermodel.Cell cell) {
|
|
||||||
int cellType = cell.getCellType();
|
int cellType = cell.getCellType();
|
||||||
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_FORMULA) {
|
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_FORMULA) {
|
||||||
cellType = cell.getCachedFormulaResultType();
|
cellType = cell.getCachedFormulaResultType();
|
||||||
@ -259,7 +226,7 @@ public class ExcelImporter implements StreamImporter {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
|
static protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
|
||||||
Serializable value = extractCell(cell);
|
Serializable value = extractCell(cell);
|
||||||
|
|
||||||
if (value != null) {
|
if (value != null) {
|
||||||
@ -312,33 +279,4 @@ public class ExcelImporter implements StreamImporter {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
|
||||||
if (contentType != null) {
|
|
||||||
contentType = contentType.toLowerCase().trim();
|
|
||||||
if ("application/msexcel".equals(contentType) ||
|
|
||||||
"application/x-msexcel".equals(contentType) ||
|
|
||||||
"application/x-ms-excel".equals(contentType) ||
|
|
||||||
"application/vnd.ms-excel".equals(contentType) ||
|
|
||||||
"application/x-excel".equals(contentType) ||
|
|
||||||
"application/xls".equals(contentType)) {
|
|
||||||
this._xmlBased = false;
|
|
||||||
return true;
|
|
||||||
} else if("application/x-xls".equals(contentType)) {
|
|
||||||
this._xmlBased = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else if (fileName != null) {
|
|
||||||
fileName = fileName.toLowerCase();
|
|
||||||
if (fileName.endsWith(".xls")) {
|
|
||||||
this._xmlBased = false;
|
|
||||||
return true;
|
|
||||||
} else if (fileName.endsWith(".xlsx")) {
|
|
||||||
this._xmlBased = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,177 +1,105 @@
|
|||||||
package com.google.refine.importers;
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.LineNumberReader;
|
import java.io.LineNumberReader;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.Serializable;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONObject;
|
||||||
import org.apache.commons.lang.StringUtils;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
import com.google.refine.expr.ExpressionUtils;
|
import com.google.refine.importing.ImportingJob;
|
||||||
import com.google.refine.model.Cell;
|
import com.google.refine.importing.ImportingUtilities;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
public class FixedWidthImporter implements ReaderImporter, StreamImporter { //TODO this class is almost an exact copy of TsvCsvImporter. Could we combine the two, or combine common functions into a common abstract supertype?
|
public class FixedWidthImporter extends TabularImportingParserBase {
|
||||||
|
public FixedWidthImporter() {
|
||||||
final static Logger logger = LoggerFactory.getLogger("FixedWidthImporter");
|
super(false);
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
|
||||||
if (contentType != null) {
|
|
||||||
contentType = contentType.toLowerCase().trim();
|
|
||||||
|
|
||||||
//filter out tree structure data
|
|
||||||
if("application/json".equals(contentType)||
|
|
||||||
"text/json".equals(contentType)||
|
|
||||||
"application/xml".equals(contentType) ||
|
|
||||||
"text/xml".equals(contentType) ||
|
|
||||||
"application/rss+xml".equals(contentType) ||
|
|
||||||
"application/atom+xml".equals(contentType) ||
|
|
||||||
"application/rdf+xml".equals(contentType)) //TODO add more tree data types.
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return
|
|
||||||
"text/plain".equals(contentType)
|
|
||||||
|| "text/fixed-width".equals(contentType); //FIXME Is text/fixed-width a valid contentType?
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void read(InputStream inputStream, Project project,
|
public JSONObject createParserUIInitializationData(
|
||||||
ProjectMetadata metadata, Properties options)
|
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||||
throws ImportException {
|
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||||
read(new InputStreamReader(inputStream), project, metadata, options);
|
JSONArray columnWidths = new JSONArray();
|
||||||
|
|
||||||
|
JSONObject firstFileRecord = fileRecords.get(0);
|
||||||
|
String encoding = ImportingUtilities.getEncoding(firstFileRecord);
|
||||||
|
String location = JSONUtilities.getString(firstFileRecord, "location", null);
|
||||||
|
if (location != null) {
|
||||||
|
File file = new File(job.getRawDataDir(), location);
|
||||||
|
int[] columnWidthsA = guessColumnWidths(file, encoding);
|
||||||
|
if (columnWidthsA != null) {
|
||||||
|
for (int w : columnWidthsA) {
|
||||||
|
JSONUtilities.append(columnWidths, w);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONUtilities.safePut(options, "lineSeparator", "\n");
|
||||||
|
JSONUtilities.safePut(options, "headerLines", 0);
|
||||||
|
JSONUtilities.safePut(options, "columnWidths", columnWidths);
|
||||||
|
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
||||||
|
|
||||||
|
return options;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void read(Reader reader, Project project, ProjectMetadata metadata,
|
public void parseOneFile(
|
||||||
Properties options) throws ImportException {
|
Project project,
|
||||||
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
|
ProjectMetadata metadata,
|
||||||
String columnWidths = options.getProperty("fixed-column-widths");
|
ImportingJob job,
|
||||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
String fileSource,
|
||||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
Reader reader,
|
||||||
|
int limit,
|
||||||
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
JSONObject options,
|
||||||
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
List<Exception> exceptions
|
||||||
boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true);
|
) {
|
||||||
|
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n");
|
||||||
LineNumberReader lnReader = new LineNumberReader(reader);
|
final int[] columnWidths = JSONUtilities.getIntArray(options, "columnWidths");
|
||||||
|
|
||||||
|
|
||||||
read(lnReader, project, columnWidths,
|
|
||||||
limit, skip, ignoreLines, headerLines,
|
|
||||||
guessValueType, splitIntoColumns
|
|
||||||
);
|
|
||||||
|
|
||||||
|
final List<Object> columnNames;
|
||||||
|
if (options.has("columnNames")) {
|
||||||
|
columnNames = new ArrayList<Object>();
|
||||||
|
String[] strings = JSONUtilities.getStringArray(options, "columnNames");
|
||||||
|
for (String s : strings) {
|
||||||
|
columnNames.add(s);
|
||||||
}
|
}
|
||||||
|
JSONUtilities.safePut(options, "headerLines", 1);
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param lnReader
|
|
||||||
* LineNumberReader used to read file or string contents
|
|
||||||
* @param project
|
|
||||||
* The project into which the parsed data will be added
|
|
||||||
* @param columnWidths
|
|
||||||
* Expects a comma separated string of integers which indicate the number of characters in each line
|
|
||||||
* @param limit
|
|
||||||
* The maximum number of rows of data to import
|
|
||||||
* @param skip
|
|
||||||
* The number of initial data rows to skip
|
|
||||||
* @param ignoreLines
|
|
||||||
* The number of initial lines within the data source which should be ignored entirely
|
|
||||||
* @param headerLines
|
|
||||||
* The number of lines in the data source which describe each column
|
|
||||||
* @param guessValueType
|
|
||||||
* Whether the parser should try and guess the type of the value being parsed
|
|
||||||
* @param splitIntoColumns
|
|
||||||
* Whether the parser should try and split the data source into columns
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public void read(LineNumberReader lnReader, Project project,
|
|
||||||
String sep, int limit, int skip, int ignoreLines,
|
|
||||||
int headerLines, boolean guessValueType, boolean splitIntoColumns) throws ImportException{
|
|
||||||
|
|
||||||
int[] columnWidths = null;
|
|
||||||
|
|
||||||
columnWidths = getColumnWidthsFromString( sep );
|
|
||||||
|
|
||||||
if(columnWidths.length < 2)
|
|
||||||
splitIntoColumns = false;
|
|
||||||
|
|
||||||
List<String> columnNames = new ArrayList<String>();
|
|
||||||
String line = null;
|
|
||||||
int rowsWithData = 0;
|
|
||||||
|
|
||||||
try {
|
|
||||||
while ((line = lnReader.readLine()) != null) {
|
|
||||||
if (ignoreLines > 0) {
|
|
||||||
ignoreLines--;
|
|
||||||
continue;
|
|
||||||
} else if (StringUtils.isBlank(line)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (headerLines > 0) {
|
|
||||||
//column headers
|
|
||||||
headerLines--;
|
|
||||||
|
|
||||||
ArrayList<String> cells = getCells(line, columnWidths, splitIntoColumns);
|
|
||||||
|
|
||||||
for (int c = 0; c < cells.size(); c++) {
|
|
||||||
String cell = cells.get(c).trim();
|
|
||||||
//add column even if cell is blank
|
|
||||||
ImporterUtilities.appendColumnName(columnNames, c, cell);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
//data
|
columnNames = null;
|
||||||
Row row = new Row(columnNames.size());
|
|
||||||
|
|
||||||
ArrayList<String> cells = getCells(line, columnWidths, splitIntoColumns);
|
|
||||||
|
|
||||||
if( cells != null && cells.size() > 0 )
|
|
||||||
rowsWithData++;
|
|
||||||
|
|
||||||
if (skip <=0 || rowsWithData > skip){
|
|
||||||
//add parsed data to row
|
|
||||||
for(String s : cells){
|
|
||||||
if (ExpressionUtils.isNonBlankData(s)) {
|
|
||||||
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s) : s;
|
|
||||||
row.cells.add(new Cell(value, null));
|
|
||||||
}else{
|
|
||||||
row.cells.add(null);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
project.rows.add(row);
|
|
||||||
project.columnModel.setMaxCellIndex(row.cells.size());
|
|
||||||
|
|
||||||
ImporterUtilities.ensureColumnsInRowExist(columnNames, row);
|
final LineNumberReader lnReader = new LineNumberReader(reader);
|
||||||
|
|
||||||
if (limit > 0 && project.rows.size() >= limit) {
|
TableDataReader dataReader = new TableDataReader() {
|
||||||
break;
|
boolean usedColumnNames = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Object> getNextRowOfCells() throws IOException {
|
||||||
|
if (columnNames != null && !usedColumnNames) {
|
||||||
|
usedColumnNames = true;
|
||||||
|
return columnNames;
|
||||||
|
} else {
|
||||||
|
String line = lnReader.readLine();
|
||||||
|
if (line == null) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return getCells(line, columnWidths);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
} catch (IOException e) {
|
|
||||||
throw new ImportException("The fixed width importer could not read the next line", e);
|
|
||||||
}
|
|
||||||
|
|
||||||
ImporterUtilities.setupColumns(project, columnNames);
|
|
||||||
|
|
||||||
|
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -181,22 +109,23 @@ public class FixedWidthImporter implements ReaderImporter, StreamImporter { //TO
|
|||||||
* @param splitIntoColumns
|
* @param splitIntoColumns
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
private ArrayList<String> getCells(String line, int[] widths, boolean splitIntoColumns) {
|
static private ArrayList<Object> getCells(String line, int[] widths) {
|
||||||
ArrayList<String> cells = new ArrayList<String>();
|
ArrayList<Object> cells = new ArrayList<Object>();
|
||||||
if(splitIntoColumns){
|
|
||||||
int columnStartCursor = 0;
|
int columnStartCursor = 0;
|
||||||
int columnEndCursor = 0;
|
int columnEndCursor = 0;
|
||||||
for(int width : widths){
|
for (int width : widths) {
|
||||||
if(columnStartCursor >= line.length()){
|
if (columnStartCursor >= line.length()) {
|
||||||
cells.add(null); //FIXME is adding a null cell (to represent no data) OK?
|
cells.add(null); //FIXME is adding a null cell (to represent no data) OK?
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
columnEndCursor = columnStartCursor + width;
|
columnEndCursor = columnStartCursor + width;
|
||||||
|
|
||||||
if(columnEndCursor > line.length())
|
if (columnEndCursor > line.length()) {
|
||||||
columnEndCursor = line.length();
|
columnEndCursor = line.length();
|
||||||
if(columnEndCursor <= columnStartCursor){
|
}
|
||||||
|
if (columnEndCursor <= columnStartCursor) {
|
||||||
cells.add(null); //FIXME is adding a null cell (to represent no data, or a zero width column) OK?
|
cells.add(null); //FIXME is adding a null cell (to represent no data, or a zero width column) OK?
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -205,36 +134,83 @@ public class FixedWidthImporter implements ReaderImporter, StreamImporter { //TO
|
|||||||
|
|
||||||
columnStartCursor = columnEndCursor;
|
columnStartCursor = columnEndCursor;
|
||||||
}
|
}
|
||||||
}else{
|
|
||||||
cells.add(line);
|
// Residual text
|
||||||
|
if (columnStartCursor < line.length()) {
|
||||||
|
cells.add(line.substring(columnStartCursor));
|
||||||
}
|
}
|
||||||
return cells;
|
return cells;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static public int[] guessColumnWidths(File file, String encoding) {
|
||||||
* Converts the expected string of comma separated integers into an array of integers.
|
try {
|
||||||
* Also performs a basic sanity check on the provided data.
|
InputStream is = new FileInputStream(file);
|
||||||
*
|
try {
|
||||||
* @param sep
|
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
|
||||||
* A comma separated string of integers. e.g. 4,2,5,22,19
|
LineNumberReader lineNumberReader = new LineNumberReader(reader);
|
||||||
* @return
|
|
||||||
* @throws ServletException
|
|
||||||
*/
|
|
||||||
public int[] getColumnWidthsFromString(String sep) throws ImportException {
|
|
||||||
String[] splitSep = Pattern.compile(",").split(sep);
|
|
||||||
|
|
||||||
int[] widths = new int[splitSep.length];
|
int[] counts = null;
|
||||||
for(int i = 0; i < splitSep.length; i++){
|
int totalBytes = 0;
|
||||||
try{
|
int lineCount = 0;
|
||||||
int parsedInt = Integer.parseInt(splitSep[i]);
|
String s;
|
||||||
if( parsedInt < 0 )
|
while (totalBytes < 64 * 1024 &&
|
||||||
throw new ImportException("A column cannot have a width of less than zero", null);
|
lineCount < 100 &&
|
||||||
widths[i] = parsedInt;
|
(s = lineNumberReader.readLine()) != null) {
|
||||||
}catch(NumberFormatException e){
|
|
||||||
throw new ImportException("For a fixed column width import, the column widths must be given as a comma separated string of integers. e.g. 1,3,5,22,19", e);
|
totalBytes += s.length() + 1; // count the new line character
|
||||||
|
if (s.length() == 0) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
lineCount++;
|
||||||
|
|
||||||
|
if (counts == null) {
|
||||||
|
counts = new int[s.length()];
|
||||||
|
for (int c = 0; c < counts.length; c++) {
|
||||||
|
counts[c] = 0;
|
||||||
}
|
}
|
||||||
return widths;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (int c = 0; c < counts.length && c < s.length(); c++) {
|
||||||
|
char ch = s.charAt(c);
|
||||||
|
if (ch == ' ') {
|
||||||
|
counts[c]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (counts != null) {
|
||||||
|
List<Integer> widths = new ArrayList<Integer>();
|
||||||
|
|
||||||
|
int startIndex = 0;
|
||||||
|
for (int c = 0; c < counts.length; c++) {
|
||||||
|
int count = counts[c];
|
||||||
|
if (count == lineCount && c > startIndex) {
|
||||||
|
widths.add(c - startIndex + 1);
|
||||||
|
startIndex = c + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = widths.size() - 1; i > 0; i--) {
|
||||||
|
if (widths.get(i) == 1) {
|
||||||
|
widths.remove(i);
|
||||||
|
widths.set(i - 1, widths.get(i - 1) + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int[] widthA = new int[widths.size()];
|
||||||
|
for (int i = 0; i < widthA.length; i++) {
|
||||||
|
widthA[i] = widths.get(i);
|
||||||
|
}
|
||||||
|
return widthA;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
is.close();
|
||||||
|
}
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -38,11 +38,9 @@ package com.google.refine.importers;
|
|||||||
* indicating the underlying cause of the problem.
|
* indicating the underlying cause of the problem.
|
||||||
*/
|
*/
|
||||||
public class ImportException extends Exception {
|
public class ImportException extends Exception {
|
||||||
|
|
||||||
private static final long serialVersionUID = 7077314805989174181L;
|
private static final long serialVersionUID = 7077314805989174181L;
|
||||||
|
|
||||||
public ImportException(String message, Throwable cause) {
|
public ImportException(String message, Throwable cause) {
|
||||||
super(message, cause);
|
super(message, cause);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,138 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
Copyright 2010, Google Inc.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above
|
|
||||||
copyright notice, this list of conditions and the following disclaimer
|
|
||||||
in the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Google Inc. nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.google.refine.importers;
|
|
||||||
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
|
|
||||||
abstract public class ImporterRegistry {
|
|
||||||
final static Logger logger = LoggerFactory.getLogger("importer-registry");
|
|
||||||
|
|
||||||
static final private Map<String, Importer> importers = new HashMap<String, Importer>();
|
|
||||||
|
|
||||||
private static final String[][] importerNames = {
|
|
||||||
{"ExcelImporter", "com.google.refine.importers.ExcelImporter"},
|
|
||||||
{"XmlImporter", "com.google.refine.importers.XmlImporter"},
|
|
||||||
{"RdfTripleImporter", "com.google.refine.importers.RdfTripleImporter"},
|
|
||||||
{"MarcImporter", "com.google.refine.importers.MarcImporter"},
|
|
||||||
{"TsvCsvImporter", "com.google.refine.importers.TsvCsvImporter"},
|
|
||||||
{"JsonImporter", "com.google.refine.importers.JsonImporter"},
|
|
||||||
{"FixedWidthImporter", "com.google.refine.importers.FixedWidthImporter"}
|
|
||||||
};
|
|
||||||
|
|
||||||
static {
|
|
||||||
registerImporters(importerNames);
|
|
||||||
}
|
|
||||||
|
|
||||||
static public boolean registerImporters(String[][] importers) {
|
|
||||||
boolean status = true;
|
|
||||||
for (String[] importer : importerNames) {
|
|
||||||
String importerName = importer[0];
|
|
||||||
String className = importer[1];
|
|
||||||
logger.debug("Loading command " + importerName + " class: " + className);
|
|
||||||
Importer cmd;
|
|
||||||
try {
|
|
||||||
// TODO: May need to use the servlet container's class loader here
|
|
||||||
cmd = (Importer) Class.forName(className).newInstance();
|
|
||||||
} catch (InstantiationException e) {
|
|
||||||
logger.error("Failed to load importer class " + className, e);
|
|
||||||
status = false;
|
|
||||||
continue;
|
|
||||||
} catch (IllegalAccessException e) {
|
|
||||||
logger.error("Failed to load importer class " + className, e);
|
|
||||||
status = false;
|
|
||||||
continue;
|
|
||||||
} catch (ClassNotFoundException e) {
|
|
||||||
logger.error("Failed to load importer class " + className, e);
|
|
||||||
status = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
status |= registerImporter(importerName, cmd);
|
|
||||||
}
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Register a single importer.
|
|
||||||
*
|
|
||||||
* @param name importer verb for importer
|
|
||||||
* @param importerObject object implementing the importer
|
|
||||||
*
|
|
||||||
* @return true if importer was loaded and registered successfully
|
|
||||||
*/
|
|
||||||
static public boolean registerImporter(String name, Importer importerObject) {
|
|
||||||
if (importers.containsKey(name)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
importers.put(name, importerObject);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Currently only for test purposes
|
|
||||||
static protected boolean unregisterImporter(String verb) {
|
|
||||||
return importers.remove(verb) != null;
|
|
||||||
}
|
|
||||||
|
|
||||||
static public Importer guessImporter(String contentType, String fileName, boolean provideDefault) {
|
|
||||||
for (Importer i : importers.values()){
|
|
||||||
if(i.canImportData(contentType, fileName)){
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (provideDefault) {
|
|
||||||
return new TsvCsvImporter(); // default
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static public Importer guessImporter(String contentType, String filename) {
|
|
||||||
return guessImporter(contentType, filename, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
static public Importer guessUrlImporter(URL url) {
|
|
||||||
for (Importer importer : importers.values()){
|
|
||||||
if (importer instanceof UrlImporter
|
|
||||||
&& ((UrlImporter) importer).canImportData(url)) {
|
|
||||||
return importer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
@ -33,15 +33,25 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.importers;
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.InputStream;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingUtilities;
|
||||||
import com.google.refine.model.Column;
|
import com.google.refine.model.Column;
|
||||||
|
import com.google.refine.model.ModelException;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
|
import com.google.refine.util.TrackingInputStream;
|
||||||
|
|
||||||
public class ImporterUtilities {
|
public class ImporterUtilities {
|
||||||
|
|
||||||
@ -118,6 +128,33 @@ public class ImporterUtilities {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public Column getOrAllocateColumn(Project project, List<String> currentFileColumnNames, int index) {
|
||||||
|
if (index < currentFileColumnNames.size()) {
|
||||||
|
return project.columnModel.getColumnByName(currentFileColumnNames.get(index));
|
||||||
|
} else if (index == currentFileColumnNames.size()) {
|
||||||
|
String prefix = "Column ";
|
||||||
|
int i = 1;
|
||||||
|
while (true) {
|
||||||
|
String columnName = prefix + i;
|
||||||
|
if (project.columnModel.getColumnByName(columnName) != null) {
|
||||||
|
// Already taken name
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
Column column = new Column(project.columnModel.allocateNewCellIndex(), columnName);
|
||||||
|
try {
|
||||||
|
project.columnModel.addColumn(project.columnModel.columns.size(), column, false);
|
||||||
|
} catch (ModelException e) {
|
||||||
|
// Ignore: shouldn't get in here since we just checked for duplicate names.
|
||||||
|
}
|
||||||
|
currentFileColumnNames.add(columnName);
|
||||||
|
return column;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("Unexpected code path");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static public void setupColumns(Project project, List<String> columnNames) {
|
static public void setupColumns(Project project, List<String> columnNames) {
|
||||||
Map<String, Integer> nameToIndex = new HashMap<String, Integer>();
|
Map<String, Integer> nameToIndex = new HashMap<String, Integer>();
|
||||||
for (int c = 0; c < columnNames.size(); c++) {
|
for (int c = 0; c < columnNames.size(); c++) {
|
||||||
@ -125,7 +162,8 @@ public class ImporterUtilities {
|
|||||||
if (cell.isEmpty()) {
|
if (cell.isEmpty()) {
|
||||||
cell = "Column";
|
cell = "Column";
|
||||||
} else if (cell.startsWith("\"") && cell.endsWith("\"")) {
|
} else if (cell.startsWith("\"") && cell.endsWith("\"")) {
|
||||||
cell = cell.substring(1, cell.length() - 1).trim(); //FIXME is trimming quotation marks appropriate?
|
// FIXME: is trimming quotation marks appropriate?
|
||||||
|
cell = cell.substring(1, cell.length() - 1).trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nameToIndex.containsKey(cell)) {
|
if (nameToIndex.containsKey(cell)) {
|
||||||
@ -137,10 +175,74 @@ public class ImporterUtilities {
|
|||||||
nameToIndex.put(cell, 2);
|
nameToIndex.put(cell, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
Column column = new Column(c, cell);
|
columnNames.set(c, cell);
|
||||||
|
if (project.columnModel.getColumnByName(cell) == null) {
|
||||||
project.columnModel.columns.add(column);
|
Column column = new Column(project.columnModel.allocateNewCellIndex(), cell);
|
||||||
|
try {
|
||||||
|
project.columnModel.addColumn(project.columnModel.columns.size(), column, false);
|
||||||
|
} catch (ModelException e) {
|
||||||
|
// Ignore: shouldn't get in here since we just checked for duplicate names.
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public interface MultiFileReadingProgress {
|
||||||
|
public void startFile(String fileSource);
|
||||||
|
public void readingFile(String fileSource, long bytesRead);
|
||||||
|
public void endFile(String fileSource, long bytesRead);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public MultiFileReadingProgress createMultiFileReadingProgress(
|
||||||
|
final ImportingJob job, List<JSONObject> fileRecords) {
|
||||||
|
long totalSize = 0;
|
||||||
|
for (JSONObject fileRecord : fileRecords) {
|
||||||
|
File file = ImportingUtilities.getFile(job, fileRecord);
|
||||||
|
totalSize += file.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
final long totalSize2 = totalSize;
|
||||||
|
return new MultiFileReadingProgress() {
|
||||||
|
long totalBytesRead = 0;
|
||||||
|
|
||||||
|
void setProgress(String fileSource, long bytesRead) {
|
||||||
|
ImportingUtilities.setCreatingProjectProgress(
|
||||||
|
job,
|
||||||
|
"Reading " + fileSource,
|
||||||
|
(int) (100 * (totalBytesRead + bytesRead) / totalSize2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startFile(String fileSource) {
|
||||||
|
setProgress(fileSource, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void readingFile(String fileSource, long bytesRead) {
|
||||||
|
setProgress(fileSource, bytesRead);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void endFile(String fileSource, long bytesRead) {
|
||||||
|
totalBytesRead += bytesRead;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static public InputStream openAndTrackFile(
|
||||||
|
final String fileSource,
|
||||||
|
final File file,
|
||||||
|
final MultiFileReadingProgress progress) throws FileNotFoundException {
|
||||||
|
InputStream inputStream = new FileInputStream(file);
|
||||||
|
return progress == null ? inputStream : new TrackingInputStream(inputStream) {
|
||||||
|
@Override
|
||||||
|
protected long track(long bytesRead) {
|
||||||
|
long l = super.track(bytesRead);
|
||||||
|
|
||||||
|
progress.readingFile(fileSource, this.bytesRead);
|
||||||
|
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
138
main/src/com/google/refine/importers/ImportingParserBase.java
Normal file
138
main/src/com/google/refine/importers/ImportingParserBase.java
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.NotImplementedException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingParser;
|
||||||
|
import com.google.refine.importing.ImportingUtilities;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
|
||||||
|
abstract public class ImportingParserBase implements ImportingParser {
|
||||||
|
final protected boolean useInputStream;
|
||||||
|
|
||||||
|
protected ImportingParserBase(boolean useInputStream) {
|
||||||
|
this.useInputStream = useInputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void parse(Project project, ProjectMetadata metadata,
|
||||||
|
final ImportingJob job, List<JSONObject> fileRecords, String format,
|
||||||
|
int limit, JSONObject options, List<Exception> exceptions) {
|
||||||
|
MultiFileReadingProgress progress = ImporterUtilities.createMultiFileReadingProgress(job, fileRecords);
|
||||||
|
for (JSONObject fileRecord : fileRecords) {
|
||||||
|
if (job.canceled) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
parseOneFile(project, metadata, job, fileRecord, limit, options, exceptions, progress);
|
||||||
|
} catch (IOException e) {
|
||||||
|
exceptions.add(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (limit > 0 && project.rows.size() >= limit) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
JSONObject fileRecord,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions,
|
||||||
|
final MultiFileReadingProgress progress
|
||||||
|
) throws IOException {
|
||||||
|
final File file = ImportingUtilities.getFile(job, fileRecord);
|
||||||
|
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
||||||
|
|
||||||
|
progress.startFile(fileSource);
|
||||||
|
try {
|
||||||
|
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
|
||||||
|
try {
|
||||||
|
if (useInputStream) {
|
||||||
|
parseOneFile(project, metadata, job, fileSource, inputStream, limit, options, exceptions);
|
||||||
|
} else {
|
||||||
|
Reader reader = ImportingUtilities.getReaderFromStream(inputStream, fileRecord);
|
||||||
|
|
||||||
|
parseOneFile(project, metadata, job, fileSource, reader, limit, options, exceptions);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
inputStream.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
progress.endFile(fileSource, file.length());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
String fileSource,
|
||||||
|
Reader reader,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) {
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
String fileSource,
|
||||||
|
InputStream inputStream,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) {
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
}
|
@ -33,95 +33,328 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.importers;
|
package com.google.refine.importers;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.PushbackInputStream;
|
import java.io.Reader;
|
||||||
import java.util.Properties;
|
import java.util.List;
|
||||||
|
|
||||||
|
import javax.servlet.ServletException;
|
||||||
|
|
||||||
|
import org.codehaus.jackson.JsonFactory;
|
||||||
|
import org.codehaus.jackson.JsonParseException;
|
||||||
|
import org.codehaus.jackson.JsonParser;
|
||||||
|
import org.codehaus.jackson.JsonToken;
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONObject;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
|
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||||
import com.google.refine.importers.parsers.JSONParser;
|
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||||
import com.google.refine.importers.parsers.TreeParser;
|
import com.google.refine.importers.tree.TreeReader;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingUtilities;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
public class JsonImporter implements StreamImporter{
|
public class JsonImporter extends TreeImportingParserBase {
|
||||||
final static Logger logger = LoggerFactory.getLogger("JsonImporter");
|
public JsonImporter() {
|
||||||
|
super(false);
|
||||||
|
}
|
||||||
|
|
||||||
public static final int BUFFER_SIZE = 64 * 1024;
|
static private class PreviewParsingState {
|
||||||
|
int tokenCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
final static private int PREVIEW_PARSING_LIMIT = 1000;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void read(InputStream inputStream, Project project,
|
public JSONObject createParserUIInitializationData(
|
||||||
ProjectMetadata metadata, Properties options)
|
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||||
throws ImportException {
|
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||||
//FIXME the below is a close duplicate of the XmlImporter code.
|
try {
|
||||||
//Should wrap a lot of the below into methods and put them in a common superclass
|
JSONObject firstFileRecord = fileRecords.get(0);
|
||||||
logger.trace("JsonImporter.read");
|
File file = ImportingUtilities.getFile(job, firstFileRecord);
|
||||||
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
InputStream is = new FileInputStream(file);
|
||||||
|
try {
|
||||||
|
JsonFactory factory = new JsonFactory();
|
||||||
|
JsonParser parser = factory.createJsonParser(is);
|
||||||
|
|
||||||
String[] recordPath = null;
|
PreviewParsingState state = new PreviewParsingState();
|
||||||
{
|
Object rootValue = parseForPreview(parser, state);
|
||||||
byte[] buffer = new byte[BUFFER_SIZE];
|
if (rootValue != null) {
|
||||||
int bytes_read = 0;
|
JSONUtilities.safePut(options, "dom", rootValue);
|
||||||
try {//fill the buffer with data
|
}
|
||||||
while (bytes_read < BUFFER_SIZE) {
|
} finally {
|
||||||
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
|
is.close();
|
||||||
if (c == -1) break;
|
|
||||||
bytes_read +=c ;
|
|
||||||
}
|
}
|
||||||
pis.unread(buffer, 0, bytes_read);
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new ImportException("Read error",e);
|
// Ignore
|
||||||
}
|
}
|
||||||
|
|
||||||
InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read);
|
return options;
|
||||||
TreeParser parser = new JSONParser(iStream);
|
|
||||||
if (options.containsKey("importer-record-tag")) {
|
|
||||||
try{
|
|
||||||
recordPath = XmlImportUtilities.detectPathFromTag(
|
|
||||||
parser,
|
|
||||||
options.getProperty("importer-record-tag"));
|
|
||||||
}catch(Exception e){
|
|
||||||
// silent
|
|
||||||
// e.printStackTrace();
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
recordPath = XmlImportUtilities.detectRecordElement(parser);
|
final static private Object parseForPreview(JsonParser parser, PreviewParsingState state, JsonToken token)
|
||||||
|
throws JsonParseException, IOException {
|
||||||
|
if (token != null) {
|
||||||
|
switch (token) {
|
||||||
|
case START_ARRAY:
|
||||||
|
return parseArrayForPreview(parser, state);
|
||||||
|
case START_OBJECT:
|
||||||
|
return parseObjectForPreview(parser, state);
|
||||||
|
case VALUE_STRING:
|
||||||
|
return parser.getText();
|
||||||
|
case VALUE_NUMBER_INT:
|
||||||
|
return Integer.valueOf(parser.getIntValue());
|
||||||
|
case VALUE_NUMBER_FLOAT:
|
||||||
|
return Float.valueOf(parser.getFloatValue());
|
||||||
|
case VALUE_TRUE:
|
||||||
|
return Boolean.TRUE;
|
||||||
|
case VALUE_FALSE:
|
||||||
|
return Boolean.FALSE;
|
||||||
|
case VALUE_NULL:
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final static private Object parseForPreview(JsonParser parser, PreviewParsingState state) {
|
||||||
|
try {
|
||||||
|
JsonToken token = parser.nextToken();
|
||||||
|
state.tokenCount++;
|
||||||
|
return parseForPreview(parser, state, token);
|
||||||
|
} catch (Exception e) {
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (recordPath == null)
|
final static private JSONObject parseObjectForPreview(JsonParser parser, PreviewParsingState state) {
|
||||||
return;
|
JSONObject result = new JSONObject();
|
||||||
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
loop:while (state.tokenCount < PREVIEW_PARSING_LIMIT) {
|
||||||
XmlImportUtilities.importTreeData(new JSONParser(pis), project, recordPath, rootColumnGroup);
|
try {
|
||||||
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
JsonToken token = parser.nextToken();
|
||||||
|
if (token == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
state.tokenCount++;
|
||||||
|
|
||||||
project.columnModel.update();
|
switch (token) {
|
||||||
|
case FIELD_NAME:
|
||||||
|
String fieldName = parser.getText();
|
||||||
|
Object fieldValue = parseForPreview(parser, state);
|
||||||
|
JSONUtilities.safePut(result, fieldName, fieldValue);
|
||||||
|
break;
|
||||||
|
case END_OBJECT:
|
||||||
|
break loop;
|
||||||
|
default:
|
||||||
|
break loop;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
final static private JSONArray parseArrayForPreview(JsonParser parser, PreviewParsingState state) {
|
||||||
|
JSONArray result = new JSONArray();
|
||||||
|
loop:while (state.tokenCount < PREVIEW_PARSING_LIMIT) {
|
||||||
|
try {
|
||||||
|
JsonToken token = parser.nextToken();
|
||||||
|
if (token == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
state.tokenCount++;
|
||||||
|
|
||||||
|
switch (token) {
|
||||||
|
case END_ARRAY:
|
||||||
|
break loop;
|
||||||
|
default:
|
||||||
|
Object element = parseForPreview(parser, state, token);
|
||||||
|
JSONUtilities.append(result, element);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||||
if (contentType != null) {
|
ImportingJob job, String fileSource, Reader reader,
|
||||||
contentType = contentType.toLowerCase().trim();
|
ImportColumnGroup rootColumnGroup, int limit, JSONObject options, List<Exception> exceptions) {
|
||||||
|
|
||||||
if("application/json".equals(contentType) ||
|
parseOneFile(project, metadata, job, fileSource,
|
||||||
"text/json".equals(contentType)) {
|
new JSONTreeReader(reader), rootColumnGroup, limit, options, exceptions);
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else if (fileName != null) {
|
|
||||||
fileName = fileName.toLowerCase();
|
|
||||||
if (
|
|
||||||
fileName.endsWith(".json") ||
|
|
||||||
fileName.endsWith(".js")
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public class JSONTreeReader implements TreeReader {
|
||||||
|
final static Logger logger = LoggerFactory.getLogger("JsonParser");
|
||||||
|
|
||||||
|
JsonFactory factory = new JsonFactory();
|
||||||
|
JsonParser parser = null;
|
||||||
|
|
||||||
|
//The following is a workaround for inconsistent Jackson JsonParser
|
||||||
|
Boolean lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||||
|
Boolean thisTokenIsAFieldName = false;
|
||||||
|
String lastFieldName = null;
|
||||||
|
//end of workaround
|
||||||
|
|
||||||
|
public JSONTreeReader(Reader reader) {
|
||||||
|
try {
|
||||||
|
parser = factory.createJsonParser(reader);
|
||||||
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does nothing. All Json is treated as elements
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int getAttributeCount() {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does nothing. All Json is treated as elements
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getAttributeLocalName(int index) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does nothing. All Json is treated as elements
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getAttributePrefix(int index) {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does nothing. All Json is treated as elements
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getAttributeValue(int index) {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Token current() throws ServletException {
|
||||||
|
return this.mapToToken(parser.getCurrentToken());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getFieldName() throws ServletException{
|
||||||
|
try {
|
||||||
|
String text = parser.getCurrentName();
|
||||||
|
|
||||||
|
//The following is a workaround for inconsistent Jackson JsonParser
|
||||||
|
if(text == null){
|
||||||
|
if(this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity)
|
||||||
|
text = this.lastFieldName;
|
||||||
|
else
|
||||||
|
text = "__anonymous__";
|
||||||
|
}
|
||||||
|
//end of workaround
|
||||||
|
|
||||||
|
return text;
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does nothing. Json does not have prefixes
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getPrefix() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getFieldValue() throws ServletException {
|
||||||
|
try {
|
||||||
|
return parser.getText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() throws ServletException {
|
||||||
|
return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Token next() throws ServletException {
|
||||||
|
JsonToken next;
|
||||||
|
try {
|
||||||
|
next = parser.nextToken();
|
||||||
|
} catch (JsonParseException e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(next == null)
|
||||||
|
throw new ServletException("No more Json Tokens in stream");
|
||||||
|
|
||||||
|
//The following is a workaround for inconsistent Jackson JsonParser
|
||||||
|
if(next == JsonToken.FIELD_NAME){
|
||||||
|
try {
|
||||||
|
this.thisTokenIsAFieldName = true;
|
||||||
|
this.lastFieldName = parser.getCurrentName();
|
||||||
|
} catch (Exception e) {
|
||||||
|
//silent
|
||||||
|
}
|
||||||
|
}else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){
|
||||||
|
if(this.thisTokenIsAFieldName){
|
||||||
|
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = true;
|
||||||
|
this.thisTokenIsAFieldName = false;
|
||||||
|
}else{
|
||||||
|
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||||
|
this.lastFieldName = null;
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||||
|
this.lastFieldName = null;
|
||||||
|
this.thisTokenIsAFieldName = false;
|
||||||
|
}
|
||||||
|
//end of workaround
|
||||||
|
|
||||||
|
return mapToToken(next);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Token mapToToken(JsonToken token){
|
||||||
|
switch(token){
|
||||||
|
case START_ARRAY: return Token.StartEntity;
|
||||||
|
case END_ARRAY: return Token.EndEntity;
|
||||||
|
case START_OBJECT: return Token.StartEntity;
|
||||||
|
case END_OBJECT: return Token.EndEntity;
|
||||||
|
case VALUE_STRING: return Token.Value;
|
||||||
|
case FIELD_NAME: return Token.Ignorable; //returned by the getLocalName function()
|
||||||
|
case VALUE_NUMBER_INT: return Token.Value;
|
||||||
|
//Json does not have START_DOCUMENT token type (so ignored as default)
|
||||||
|
//Json does not have END_DOCUMENT token type (so ignored as default)
|
||||||
|
case VALUE_TRUE : return Token.Value;
|
||||||
|
case VALUE_NUMBER_FLOAT : return Token.Value;
|
||||||
|
case VALUE_NULL : return Token.Value;
|
||||||
|
case VALUE_FALSE : return Token.Value;
|
||||||
|
case VALUE_EMBEDDED_OBJECT : return Token.Ignorable;
|
||||||
|
case NOT_AVAILABLE : return Token.Ignorable;
|
||||||
|
default: return Token.Ignorable;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,21 @@
|
|||||||
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import com.google.refine.importing.FormatGuesser;
|
||||||
|
|
||||||
|
public class LineBasedFormatGuesser implements FormatGuesser {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String guess(File file, String encoding, String seedFormat) {
|
||||||
|
SeparatorBasedImporter.Separator sep = SeparatorBasedImporter.guessSeparator(file, encoding);
|
||||||
|
if (sep != null) {
|
||||||
|
return "text/line-based/*sv";
|
||||||
|
}
|
||||||
|
int[] widths = FixedWidthImporter.guessColumnWidths(file, encoding);
|
||||||
|
if (widths != null) {
|
||||||
|
return "text/line-based/fixed-width";
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
105
main/src/com/google/refine/importers/LineBasedImporter.java
Normal file
105
main/src/com/google/refine/importers/LineBasedImporter.java
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.LineNumberReader;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
|
public class LineBasedImporter extends TabularImportingParserBase {
|
||||||
|
public LineBasedImporter() {
|
||||||
|
super(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JSONObject createParserUIInitializationData(
|
||||||
|
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||||
|
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||||
|
|
||||||
|
JSONUtilities.safePut(options, "lineSeparator", "\n");
|
||||||
|
JSONUtilities.safePut(options, "linesPerRow", 1);
|
||||||
|
JSONUtilities.safePut(options, "headerLines", 0);
|
||||||
|
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
String fileSource,
|
||||||
|
Reader reader,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) {
|
||||||
|
final int linesPerRow = JSONUtilities.getInt(options, "linesPerRow", 1);
|
||||||
|
|
||||||
|
final List<Object> columnNames;
|
||||||
|
if (options.has("columnNames")) {
|
||||||
|
columnNames = new ArrayList<Object>();
|
||||||
|
String[] strings = JSONUtilities.getStringArray(options, "columnNames");
|
||||||
|
for (String s : strings) {
|
||||||
|
columnNames.add(s);
|
||||||
|
}
|
||||||
|
JSONUtilities.safePut(options, "headerLines", 1);
|
||||||
|
} else {
|
||||||
|
columnNames = null;
|
||||||
|
JSONUtilities.safePut(options, "headerLines", 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
final LineNumberReader lnReader = new LineNumberReader(reader);
|
||||||
|
|
||||||
|
try {
|
||||||
|
int skip = JSONUtilities.getInt(options, "ignoreLines", -1);
|
||||||
|
while (skip > 0) {
|
||||||
|
lnReader.readLine();
|
||||||
|
skip--;
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
JSONUtilities.safePut(options, "ignoreLines", -1);
|
||||||
|
|
||||||
|
TableDataReader dataReader = new TableDataReader() {
|
||||||
|
boolean usedColumnNames = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Object> getNextRowOfCells() throws IOException {
|
||||||
|
if (columnNames != null && !usedColumnNames) {
|
||||||
|
usedColumnNames = true;
|
||||||
|
return columnNames;
|
||||||
|
} else {
|
||||||
|
List<Object> cells = null;
|
||||||
|
for (int i = 0; i < linesPerRow; i++) {
|
||||||
|
String line = lnReader.readLine();
|
||||||
|
if (i == 0) {
|
||||||
|
if (line == null) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
cells = new ArrayList<Object>(linesPerRow);
|
||||||
|
cells.add(line);
|
||||||
|
}
|
||||||
|
} else if (line != null) {
|
||||||
|
cells.add(line);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cells;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||||
|
}
|
||||||
|
}
|
@ -40,56 +40,44 @@ import java.io.FileOutputStream;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.util.Properties;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
import org.marc4j.MarcPermissiveStreamReader;
|
import org.marc4j.MarcPermissiveStreamReader;
|
||||||
import org.marc4j.MarcWriter;
|
import org.marc4j.MarcWriter;
|
||||||
import org.marc4j.MarcXmlWriter;
|
import org.marc4j.MarcXmlWriter;
|
||||||
import org.marc4j.marc.Record;
|
import org.marc4j.marc.Record;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
|
||||||
public class MarcImporter implements StreamImporter {
|
public class MarcImporter extends XmlImporter {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void read(
|
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||||
InputStream inputStream,
|
ImportingJob job, String fileSource, InputStream inputStream,
|
||||||
Project project,
|
ImportColumnGroup rootColumnGroup, int limit, JSONObject options,
|
||||||
ProjectMetadata metadata, Properties options
|
List<Exception> exceptions) {
|
||||||
) throws ImportException {
|
|
||||||
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
|
||||||
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
|
||||||
|
|
||||||
File tempFile;
|
File tempFile;
|
||||||
try {
|
try {
|
||||||
tempFile = File.createTempFile("refine-import-", ".marc.xml");
|
tempFile = File.createTempFile("refine-import-", ".marc.xml");
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new ImportException("Unexpected error creating temp file",e);
|
exceptions.add(new ImportException("Unexpected error creating temp file", e));
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
OutputStream os = new FileOutputStream(tempFile);
|
OutputStream os = new FileOutputStream(tempFile);
|
||||||
try {
|
try {
|
||||||
MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader(
|
|
||||||
inputStream,
|
|
||||||
true,
|
|
||||||
true
|
|
||||||
);
|
|
||||||
MarcWriter writer = new MarcXmlWriter(os, true);
|
MarcWriter writer = new MarcXmlWriter(os, true);
|
||||||
|
|
||||||
int count = 0;
|
MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader(
|
||||||
|
inputStream, true, true);
|
||||||
while (reader.hasNext()) {
|
while (reader.hasNext()) {
|
||||||
Record record = reader.next();
|
Record record = reader.next();
|
||||||
if (skip <= 0) {
|
|
||||||
if (limit == -1 || count < limit) {
|
|
||||||
writer.write(record);
|
writer.write(record);
|
||||||
count++;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
skip--;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
writer.close();
|
writer.close();
|
||||||
} finally {
|
} finally {
|
||||||
@ -102,7 +90,8 @@ public class MarcImporter implements StreamImporter {
|
|||||||
|
|
||||||
InputStream is = new FileInputStream(tempFile);
|
InputStream is = new FileInputStream(tempFile);
|
||||||
try {
|
try {
|
||||||
new XmlImporter().read(is, project, metadata, options);
|
super.parseOneFile(project, metadata, job, fileSource, inputStream,
|
||||||
|
rootColumnGroup, limit, options, exceptions);
|
||||||
} finally {
|
} finally {
|
||||||
try {
|
try {
|
||||||
is.close();
|
is.close();
|
||||||
@ -111,31 +100,10 @@ public class MarcImporter implements StreamImporter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
throw new ImportException("Input file not found", e);
|
exceptions.add(new ImportException("Input file not found", e));
|
||||||
|
return;
|
||||||
} finally {
|
} finally {
|
||||||
tempFile.delete();
|
tempFile.delete();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
|
||||||
if (contentType != null) {
|
|
||||||
contentType = contentType.toLowerCase().trim();
|
|
||||||
|
|
||||||
if ("application/marc".equals(contentType)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else if (fileName != null) {
|
|
||||||
fileName = fileName.toLowerCase();
|
|
||||||
if (
|
|
||||||
fileName.endsWith(".mrc") ||
|
|
||||||
fileName.endsWith(".marc") ||
|
|
||||||
fileName.contains(".mrc.") ||
|
|
||||||
fileName.contains(".marc.")
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -33,64 +33,72 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.importers;
|
package com.google.refine.importers;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.NotImplementedException;
|
||||||
import org.jrdf.JRDFFactory;
|
import org.jrdf.JRDFFactory;
|
||||||
import org.jrdf.SortedMemoryJRDFFactory;
|
import org.jrdf.SortedMemoryJRDFFactory;
|
||||||
import org.jrdf.collection.MemMapFactory;
|
import org.jrdf.collection.MemMapFactory;
|
||||||
import org.jrdf.graph.Graph;
|
import org.jrdf.graph.Graph;
|
||||||
import org.jrdf.graph.Triple;
|
import org.jrdf.graph.Triple;
|
||||||
import org.jrdf.parser.ParseException;
|
|
||||||
import org.jrdf.parser.StatementHandlerException;
|
|
||||||
import org.jrdf.parser.line.GraphLineParser;
|
import org.jrdf.parser.line.GraphLineParser;
|
||||||
import org.jrdf.parser.line.LineHandler;
|
import org.jrdf.parser.line.LineHandler;
|
||||||
import org.jrdf.parser.ntriples.NTriplesParserFactory;
|
import org.jrdf.parser.ntriples.NTriplesParserFactory;
|
||||||
import org.jrdf.util.ClosableIterable;
|
import org.jrdf.util.ClosableIterable;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
import static org.jrdf.graph.AnyObjectNode.ANY_OBJECT_NODE;
|
import static org.jrdf.graph.AnyObjectNode.ANY_OBJECT_NODE;
|
||||||
import static org.jrdf.graph.AnyPredicateNode.ANY_PREDICATE_NODE;
|
import static org.jrdf.graph.AnyPredicateNode.ANY_PREDICATE_NODE;
|
||||||
import static org.jrdf.graph.AnySubjectNode.ANY_SUBJECT_NODE;
|
import static org.jrdf.graph.AnySubjectNode.ANY_SUBJECT_NODE;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
import com.google.refine.expr.ExpressionUtils;
|
import com.google.refine.expr.ExpressionUtils;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
import com.google.refine.model.Cell;
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Column;
|
import com.google.refine.model.Column;
|
||||||
import com.google.refine.model.ModelException;
|
import com.google.refine.model.ModelException;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
public class RdfTripleImporter implements ReaderImporter{
|
public class RdfTripleImporter extends ImportingParserBase {
|
||||||
private JRDFFactory _jrdfFactory;
|
private JRDFFactory _jrdfFactory;
|
||||||
private NTriplesParserFactory _nTriplesParserFactory;
|
private NTriplesParserFactory _nTriplesParserFactory;
|
||||||
private MemMapFactory _newMapFactory;
|
private MemMapFactory _newMapFactory;
|
||||||
|
|
||||||
public RdfTripleImporter(){
|
public RdfTripleImporter() {
|
||||||
|
super(false);
|
||||||
_jrdfFactory = SortedMemoryJRDFFactory.getFactory();
|
_jrdfFactory = SortedMemoryJRDFFactory.getFactory();
|
||||||
_nTriplesParserFactory = new NTriplesParserFactory();
|
_nTriplesParserFactory = new NTriplesParserFactory();
|
||||||
_newMapFactory = new MemMapFactory();
|
_newMapFactory = new MemMapFactory();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) throws ImportException {
|
public JSONObject createParserUIInitializationData(ImportingJob job,
|
||||||
String baseUrl = options.getProperty("base-url");
|
List<JSONObject> fileRecords, String format) {
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||||
|
ImportingJob job, String fileSource, Reader reader, int limit,
|
||||||
|
JSONObject options, List<Exception> exceptions) {
|
||||||
|
|
||||||
|
String baseUrl = JSONUtilities.getString(options, "baseUrl", "");
|
||||||
|
|
||||||
Graph graph = _jrdfFactory.getNewGraph();
|
Graph graph = _jrdfFactory.getNewGraph();
|
||||||
LineHandler lineHandler = _nTriplesParserFactory.createParser(graph, _newMapFactory);
|
LineHandler lineHandler = _nTriplesParserFactory.createParser(graph, _newMapFactory);
|
||||||
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
||||||
try {
|
try {
|
||||||
parser.parse(reader, baseUrl); // fills JRDF graph
|
parser.parse(reader, baseUrl); // fills JRDF graph
|
||||||
} catch (IOException e) {
|
} catch (Exception e) {
|
||||||
throw new ImportException("i/o error while parsing RDF",e);
|
exceptions.add(e);
|
||||||
} catch (ParseException e) {
|
return;
|
||||||
throw new ImportException("error parsing RDF",e);
|
|
||||||
} catch (StatementHandlerException e) {
|
|
||||||
throw new ImportException("error parsing RDF",e);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, List<Row>> subjectToRows = new HashMap<String, List<Row>>();
|
Map<String, List<Row>> subjectToRows = new HashMap<String, List<Row>>();
|
||||||
@ -152,24 +160,4 @@ public class RdfTripleImporter implements ReaderImporter{
|
|||||||
triples.iterator().close();
|
triples.iterator().close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
|
||||||
if (contentType != null) {
|
|
||||||
contentType = contentType.toLowerCase().trim();
|
|
||||||
|
|
||||||
if("application/rdf+xml".equals(contentType)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else if (fileName != null) {
|
|
||||||
fileName = fileName.toLowerCase();
|
|
||||||
if (
|
|
||||||
fileName.endsWith(".rdf")) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
245
main/src/com/google/refine/importers/SeparatorBasedImporter.java
Normal file
245
main/src/com/google/refine/importers/SeparatorBasedImporter.java
Normal file
@ -0,0 +1,245 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2010, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.LineNumberReader;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import au.com.bytecode.opencsv.CSVParser;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingUtilities;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
|
public class SeparatorBasedImporter extends TabularImportingParserBase {
|
||||||
|
public SeparatorBasedImporter() {
|
||||||
|
super(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JSONObject createParserUIInitializationData(ImportingJob job,
|
||||||
|
List<JSONObject> fileRecords, String format) {
|
||||||
|
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||||
|
|
||||||
|
JSONUtilities.safePut(options, "lineSeparator", "\n");
|
||||||
|
|
||||||
|
String separator = guessSeparator(job, fileRecords);
|
||||||
|
JSONUtilities.safePut(options, "separator", separator != null ? separator : "\t");
|
||||||
|
|
||||||
|
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
||||||
|
JSONUtilities.safePut(options, "processQuotes", true);
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
String fileSource,
|
||||||
|
Reader reader,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) {
|
||||||
|
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n");
|
||||||
|
String sep = JSONUtilities.getString(options, "separator", "\t");
|
||||||
|
boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true);
|
||||||
|
|
||||||
|
final CSVParser parser = new CSVParser(
|
||||||
|
sep.toCharArray()[0],//HACK changing string to char - won't work for multi-char separators.
|
||||||
|
CSVParser.DEFAULT_QUOTE_CHARACTER,
|
||||||
|
(char) 0, // escape character
|
||||||
|
CSVParser.DEFAULT_STRICT_QUOTES,
|
||||||
|
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
|
||||||
|
!processQuotes);
|
||||||
|
|
||||||
|
final LineNumberReader lnReader = new LineNumberReader(reader);
|
||||||
|
|
||||||
|
TableDataReader dataReader = new TableDataReader() {
|
||||||
|
long bytesRead = 0;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Object> getNextRowOfCells() throws IOException {
|
||||||
|
String line = lnReader.readLine();
|
||||||
|
if (line == null) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
bytesRead += line.length();
|
||||||
|
return getCells(line, parser, lnReader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
static protected ArrayList<Object> getCells(String line, CSVParser parser, LineNumberReader lnReader)
|
||||||
|
throws IOException{
|
||||||
|
|
||||||
|
ArrayList<Object> cells = new ArrayList<Object>();
|
||||||
|
String[] tokens = parser.parseLineMulti(line);
|
||||||
|
for (String s : tokens){
|
||||||
|
cells.add(s);
|
||||||
|
}
|
||||||
|
while (parser.isPending()) {
|
||||||
|
tokens = parser.parseLineMulti(lnReader.readLine());
|
||||||
|
for (String s : tokens) {
|
||||||
|
cells.add(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cells;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public String guessSeparator(ImportingJob job, List<JSONObject> fileRecords) {
|
||||||
|
for (int i = 0; i < 5 && i < fileRecords.size(); i++) {
|
||||||
|
JSONObject fileRecord = fileRecords.get(i);
|
||||||
|
String encoding = ImportingUtilities.getEncoding(fileRecord);
|
||||||
|
String location = JSONUtilities.getString(fileRecord, "location", null);
|
||||||
|
|
||||||
|
if (location != null) {
|
||||||
|
File file = new File(job.getRawDataDir(), location);
|
||||||
|
Separator separator = guessSeparator(file, encoding);
|
||||||
|
if (separator != null) {
|
||||||
|
return Character.toString(separator.separator);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public class Separator {
|
||||||
|
char separator;
|
||||||
|
int totalCount = 0;
|
||||||
|
int totalOfSquaredCount = 0;
|
||||||
|
int currentLineCount = 0;
|
||||||
|
|
||||||
|
double averagePerLine;
|
||||||
|
double stddev;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public Separator guessSeparator(File file, String encoding) {
|
||||||
|
try {
|
||||||
|
InputStream is = new FileInputStream(file);
|
||||||
|
try {
|
||||||
|
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
|
||||||
|
LineNumberReader lineNumberReader = new LineNumberReader(reader);
|
||||||
|
|
||||||
|
List<Separator> separators = new ArrayList<SeparatorBasedImporter.Separator>();
|
||||||
|
Map<Character, Separator> separatorMap = new HashMap<Character, SeparatorBasedImporter.Separator>();
|
||||||
|
|
||||||
|
int totalBytes = 0;
|
||||||
|
int lineCount = 0;
|
||||||
|
String s;
|
||||||
|
while (totalBytes < 64 * 1024 &&
|
||||||
|
lineCount < 100 &&
|
||||||
|
(s = lineNumberReader.readLine()) != null) {
|
||||||
|
|
||||||
|
totalBytes += s.length() + 1; // count the new line character
|
||||||
|
if (s.length() == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
lineCount++;
|
||||||
|
|
||||||
|
for (int i = 0; i < s.length(); i++) {
|
||||||
|
char c = s.charAt(i);
|
||||||
|
if (!Character.isLetterOrDigit(c) &&
|
||||||
|
!"\"' .-".contains(s.subSequence(i, i + 1))) {
|
||||||
|
Separator separator = separatorMap.get(c);
|
||||||
|
if (separator == null) {
|
||||||
|
separator = new Separator();
|
||||||
|
separator.separator = c;
|
||||||
|
|
||||||
|
separatorMap.put(c, separator);
|
||||||
|
separators.add(separator);
|
||||||
|
}
|
||||||
|
separator.currentLineCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Separator separator : separators) {
|
||||||
|
separator.totalCount += separator.currentLineCount;
|
||||||
|
separator.totalOfSquaredCount += separator.currentLineCount * separator.currentLineCount;
|
||||||
|
separator.currentLineCount = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (separators.size() > 0) {
|
||||||
|
for (Separator separator : separators) {
|
||||||
|
separator.averagePerLine = separator.totalCount / (double) lineCount;
|
||||||
|
separator.stddev = Math.sqrt(
|
||||||
|
separator.totalOfSquaredCount / (double) lineCount -
|
||||||
|
separator.averagePerLine * separator.averagePerLine);
|
||||||
|
}
|
||||||
|
|
||||||
|
Collections.sort(separators, new Comparator<Separator>() {
|
||||||
|
@Override
|
||||||
|
public int compare(Separator sep0, Separator sep1) {
|
||||||
|
return Double.compare(sep0.stddev, sep1.stddev);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
for (Separator separator : separators) {
|
||||||
|
if (separator.stddev / separator.averagePerLine < 0.1) {
|
||||||
|
return separator;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
is.close();
|
||||||
|
}
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,205 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.expr.ExpressionUtils;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.model.Cell;
|
||||||
|
import com.google.refine.model.Column;
|
||||||
|
import com.google.refine.model.ModelException;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.model.Row;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
|
abstract public class TabularImportingParserBase extends ImportingParserBase {
|
||||||
|
static public interface TableDataReader {
|
||||||
|
public List<Object> getNextRowOfCells() throws IOException;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JSONObject createParserUIInitializationData(ImportingJob job,
|
||||||
|
List<JSONObject> fileRecords, String format) {
|
||||||
|
JSONObject options = new JSONObject();
|
||||||
|
|
||||||
|
JSONUtilities.safePut(options, "ignoreLines", -1); // number of blank lines at the beginning to ignore
|
||||||
|
JSONUtilities.safePut(options, "headerLines", 1); // number of header lines
|
||||||
|
|
||||||
|
JSONUtilities.safePut(options, "skipDataLines", 0); // number of initial data lines to skip
|
||||||
|
JSONUtilities.safePut(options, "storeBlankRows", true);
|
||||||
|
JSONUtilities.safePut(options, "storeBlankCellsAsNulls", true);
|
||||||
|
|
||||||
|
JSONUtilities.safePut(options, "includeFileSources", fileRecords.size() > 1);
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected TabularImportingParserBase(boolean useInputStream) {
|
||||||
|
super(useInputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void readTable(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
TableDataReader reader,
|
||||||
|
String fileSource,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) {
|
||||||
|
int ignoreLines = JSONUtilities.getInt(options, "ignoreLines", -1);
|
||||||
|
int headerLines = JSONUtilities.getInt(options, "headerLines", 1);
|
||||||
|
int skipDataLines = JSONUtilities.getInt(options, "skipDataLines", 0);
|
||||||
|
int limit2 = JSONUtilities.getInt(options, "limit", -1);
|
||||||
|
if (limit > 0) {
|
||||||
|
if (limit2 > 0) {
|
||||||
|
limit2 = Math.min(limit, limit2);
|
||||||
|
} else {
|
||||||
|
limit2 = limit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
|
||||||
|
|
||||||
|
boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true);
|
||||||
|
boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true);
|
||||||
|
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
|
||||||
|
|
||||||
|
String fileNameColumnName = "File";
|
||||||
|
if (includeFileSources) {
|
||||||
|
if (project.columnModel.getColumnByName(fileNameColumnName) == null) {
|
||||||
|
try {
|
||||||
|
project.columnModel.addColumn(
|
||||||
|
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
|
||||||
|
} catch (ModelException e) {
|
||||||
|
// Ignore: We already checked for duplicate name.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> columnNames = new ArrayList<String>();
|
||||||
|
|
||||||
|
List<Object> cells = null;
|
||||||
|
int rowsWithData = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (!job.canceled && (cells = reader.getNextRowOfCells()) != null) {
|
||||||
|
if (ignoreLines > 0) {
|
||||||
|
ignoreLines--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (headerLines > 0) { // header lines
|
||||||
|
for (int c = 0; c < cells.size(); c++) {
|
||||||
|
Object cell = cells.get(c);
|
||||||
|
|
||||||
|
String columnName;
|
||||||
|
if (cell == null) {
|
||||||
|
// add column even if cell is blank
|
||||||
|
columnName = "";
|
||||||
|
} else if (cell instanceof Cell) {
|
||||||
|
columnName = ((Cell) cell).value.toString().trim();
|
||||||
|
} else {
|
||||||
|
columnName = cell.toString().trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
ImporterUtilities.appendColumnName(columnNames, c, columnName);
|
||||||
|
}
|
||||||
|
|
||||||
|
headerLines--;
|
||||||
|
if (headerLines == 0) {
|
||||||
|
ImporterUtilities.setupColumns(project, columnNames);
|
||||||
|
}
|
||||||
|
} else { // data lines
|
||||||
|
Row row = new Row(columnNames.size());
|
||||||
|
|
||||||
|
if (storeBlankRows) {
|
||||||
|
rowsWithData++;
|
||||||
|
} else if (cells.size() > 0) {
|
||||||
|
rowsWithData++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skipDataLines <= 0 || rowsWithData > skipDataLines) {
|
||||||
|
boolean rowHasData = false;
|
||||||
|
for (int c = 0; c < cells.size(); c++) {
|
||||||
|
Column column = ImporterUtilities.getOrAllocateColumn(project, columnNames, c);
|
||||||
|
|
||||||
|
Object value = cells.get(c);
|
||||||
|
if (value != null && value instanceof Cell) {
|
||||||
|
row.setCell(column.getCellIndex(), (Cell) value);
|
||||||
|
rowHasData = true;
|
||||||
|
} else if (ExpressionUtils.isNonBlankData(value)) {
|
||||||
|
Serializable storedValue;
|
||||||
|
if (value instanceof String) {
|
||||||
|
storedValue = guessCellValueTypes ?
|
||||||
|
ImporterUtilities.parseCellValue((String) value) : (String) value;
|
||||||
|
} else {
|
||||||
|
storedValue = ExpressionUtils.wrapStorable(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
row.setCell(column.getCellIndex(), new Cell(storedValue, null));
|
||||||
|
rowHasData = true;
|
||||||
|
} else if (!storeBlankCellsAsNulls) {
|
||||||
|
row.setCell(column.getCellIndex(), new Cell("", null));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rowHasData || storeBlankRows) {
|
||||||
|
if (includeFileSources) {
|
||||||
|
row.setCell(
|
||||||
|
project.columnModel.getColumnByName(fileNameColumnName).getCellIndex(),
|
||||||
|
new Cell(fileSource, null));
|
||||||
|
}
|
||||||
|
project.rows.add(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (limit2 > 0 && project.rows.size() >= limit2) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
exceptions.add(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
63
main/src/com/google/refine/importers/TextFormatGuesser.java
Normal file
63
main/src/com/google/refine/importers/TextFormatGuesser.java
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
package com.google.refine.importers;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
|
||||||
|
import com.google.refine.importing.FormatGuesser;
|
||||||
|
|
||||||
|
public class TextFormatGuesser implements FormatGuesser {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String guess(File file, String encoding, String seedFormat) {
|
||||||
|
try {
|
||||||
|
InputStream is = new FileInputStream(file);
|
||||||
|
try {
|
||||||
|
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
|
||||||
|
|
||||||
|
int totalBytes = 0;
|
||||||
|
int bytes;
|
||||||
|
int lineBreaks = 0;
|
||||||
|
|
||||||
|
CharBuffer charBuffer = CharBuffer.allocate(4096);
|
||||||
|
while (totalBytes < 64 * 1024 && (bytes = reader.read(charBuffer)) > 0) {
|
||||||
|
lineBreaks += countSubstrings(charBuffer.toString(), "\n");
|
||||||
|
|
||||||
|
charBuffer.clear();
|
||||||
|
totalBytes += bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lineBreaks > 3) {
|
||||||
|
return "text/line-based";
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
is.close();
|
||||||
|
}
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public int countSubstrings(String s, String sub) {
|
||||||
|
int count = 0;
|
||||||
|
int from = 0;
|
||||||
|
while (from < s.length()) {
|
||||||
|
int i = s.indexOf(sub, from);
|
||||||
|
if (i < 0) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
from = i + sub.length();
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
}
|
@ -1,238 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
Copyright 2010, Google Inc.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above
|
|
||||||
copyright notice, this list of conditions and the following disclaimer
|
|
||||||
in the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Google Inc. nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.google.refine.importers;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.LineNumberReader;
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
|
||||||
|
|
||||||
import au.com.bytecode.opencsv.CSVParser;
|
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
|
||||||
import com.google.refine.expr.ExpressionUtils;
|
|
||||||
import com.google.refine.model.Cell;
|
|
||||||
import com.google.refine.model.Project;
|
|
||||||
import com.google.refine.model.Row;
|
|
||||||
|
|
||||||
public class TsvCsvImporter implements ReaderImporter,StreamImporter {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) throws ImportException {
|
|
||||||
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
|
|
||||||
|
|
||||||
String sep = options.getProperty("separator"); // auto-detect if not present
|
|
||||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
|
||||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
|
||||||
|
|
||||||
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
|
||||||
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
|
||||||
boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true);
|
|
||||||
boolean ignoreQuotes = ImporterUtilities.getBooleanOption("ignore-quotes", options, false);
|
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(reader);
|
|
||||||
|
|
||||||
try {
|
|
||||||
read(lnReader, project, sep,
|
|
||||||
limit, skip, ignoreLines, headerLines,
|
|
||||||
guessValueType, splitIntoColumns, ignoreQuotes
|
|
||||||
);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new ImportException("Import failed",e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param lnReader
|
|
||||||
* LineNumberReader used to read file or string contents
|
|
||||||
* @param project
|
|
||||||
* The project into which the parsed data will be added
|
|
||||||
* @param sep
|
|
||||||
* The character used to denote different the break between data points
|
|
||||||
* @param limit
|
|
||||||
* The maximum number of rows of data to import
|
|
||||||
* @param skip
|
|
||||||
* The number of initial data rows to skip
|
|
||||||
* @param ignoreLines
|
|
||||||
* The number of initial lines within the data source which should be ignored entirely
|
|
||||||
* @param headerLines
|
|
||||||
* The number of lines in the data source which describe each column
|
|
||||||
* @param guessValueType
|
|
||||||
* Whether the parser should try and guess the type of the value being parsed
|
|
||||||
* @param splitIntoColumns
|
|
||||||
* Whether the parser should try and split the data source into columns
|
|
||||||
* @param ignoreQuotes
|
|
||||||
* Quotation marks are ignored, and all separators and newlines treated as such regardless of whether they are within quoted values
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public void read(LineNumberReader lnReader, Project project, String sep, int limit, int skip, int ignoreLines, int headerLines, boolean guessValueType, boolean splitIntoColumns, boolean ignoreQuotes ) throws IOException{
|
|
||||||
CSVParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ?
|
|
||||||
new CSVParser(sep.toCharArray()[0],//HACK changing string to char - won't work for multi-char separators.
|
|
||||||
CSVParser.DEFAULT_QUOTE_CHARACTER,
|
|
||||||
(char) 0, // escape character
|
|
||||||
CSVParser.DEFAULT_STRICT_QUOTES,
|
|
||||||
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
|
|
||||||
ignoreQuotes) : null;
|
|
||||||
List<String> columnNames = new ArrayList<String>();
|
|
||||||
String line = null;
|
|
||||||
int rowsWithData = 0;
|
|
||||||
|
|
||||||
while ((line = lnReader.readLine()) != null) {
|
|
||||||
if (ignoreLines > 0) {
|
|
||||||
ignoreLines--;
|
|
||||||
continue;
|
|
||||||
} else if (StringUtils.isBlank(line)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
//guess separator
|
|
||||||
if (parser == null) {
|
|
||||||
int tab = line.indexOf('\t');
|
|
||||||
if (tab >= 0) {
|
|
||||||
parser = new CSVParser('\t',
|
|
||||||
CSVParser.DEFAULT_QUOTE_CHARACTER,
|
|
||||||
(char) 0, // escape character
|
|
||||||
CSVParser.DEFAULT_STRICT_QUOTES,
|
|
||||||
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
|
|
||||||
ignoreQuotes);
|
|
||||||
} else {
|
|
||||||
parser = new CSVParser(',',
|
|
||||||
CSVParser.DEFAULT_QUOTE_CHARACTER,
|
|
||||||
(char) 0, // escape character
|
|
||||||
CSVParser.DEFAULT_STRICT_QUOTES,
|
|
||||||
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
|
|
||||||
ignoreQuotes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (headerLines > 0) {
|
|
||||||
//column headers
|
|
||||||
headerLines--;
|
|
||||||
|
|
||||||
ArrayList<String> cells = getCells(line, parser, lnReader, splitIntoColumns);
|
|
||||||
|
|
||||||
for (int c = 0; c < cells.size(); c++) {
|
|
||||||
String cell = cells.get(c).trim();
|
|
||||||
//add column even if cell is blank
|
|
||||||
ImporterUtilities.appendColumnName(columnNames, c, cell);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
//data
|
|
||||||
Row row = new Row(columnNames.size());
|
|
||||||
|
|
||||||
ArrayList<String> cells = getCells(line, parser, lnReader, splitIntoColumns);
|
|
||||||
|
|
||||||
if( cells != null && cells.size() > 0 )
|
|
||||||
rowsWithData++;
|
|
||||||
|
|
||||||
if (skip <=0 || rowsWithData > skip){
|
|
||||||
//add parsed data to row
|
|
||||||
for(String s : cells){
|
|
||||||
if (ExpressionUtils.isNonBlankData(s)) {
|
|
||||||
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s) : s;
|
|
||||||
row.cells.add(new Cell(value, null));
|
|
||||||
}else{
|
|
||||||
row.cells.add(null);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
project.rows.add(row);
|
|
||||||
project.columnModel.setMaxCellIndex(row.cells.size());
|
|
||||||
|
|
||||||
ImporterUtilities.ensureColumnsInRowExist(columnNames, row);
|
|
||||||
|
|
||||||
if (limit > 0 && project.rows.size() >= limit) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ImporterUtilities.setupColumns(project, columnNames);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected ArrayList<String> getCells(String line, CSVParser parser, LineNumberReader lnReader, boolean splitIntoColumns) throws IOException{
|
|
||||||
ArrayList<String> cells = new ArrayList<String>();
|
|
||||||
if(splitIntoColumns){
|
|
||||||
String[] tokens = parser.parseLineMulti(line);
|
|
||||||
for(String s : tokens){
|
|
||||||
cells.add(s);
|
|
||||||
}
|
|
||||||
while(parser.isPending()){
|
|
||||||
tokens = parser.parseLineMulti(lnReader.readLine());
|
|
||||||
for(String s : tokens){
|
|
||||||
cells.add(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}else{
|
|
||||||
cells.add(line);
|
|
||||||
}
|
|
||||||
return cells;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void read(InputStream inputStream, Project project,
|
|
||||||
ProjectMetadata metadata, Properties options) throws ImportException {
|
|
||||||
read(new InputStreamReader(inputStream), project, metadata, options);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
|
||||||
if (contentType != null) {
|
|
||||||
contentType = contentType.toLowerCase().trim();
|
|
||||||
return
|
|
||||||
"text/plain".equals(contentType) ||
|
|
||||||
"text/csv".equals(contentType) ||
|
|
||||||
"text/x-csv".equals(contentType) ||
|
|
||||||
"text/tab-separated-value".equals(contentType);
|
|
||||||
|
|
||||||
} else if (fileName != null) {
|
|
||||||
fileName = fileName.toLowerCase();
|
|
||||||
if (fileName.endsWith(".tsv")) {
|
|
||||||
return true;
|
|
||||||
}else if (fileName.endsWith(".csv")){
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
@ -33,99 +33,274 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.importers;
|
package com.google.refine.importers;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.PushbackInputStream;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import javax.servlet.ServletException;
|
||||||
import org.slf4j.LoggerFactory;
|
import javax.xml.stream.XMLInputFactory;
|
||||||
|
import javax.xml.stream.XMLStreamConstants;
|
||||||
|
import javax.xml.stream.XMLStreamException;
|
||||||
|
import javax.xml.stream.XMLStreamReader;
|
||||||
|
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
|
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||||
import com.google.refine.importers.parsers.TreeParser;
|
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||||
import com.google.refine.importers.parsers.XmlParser;
|
import com.google.refine.importers.tree.TreeReader;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingUtilities;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
public class XmlImporter implements StreamImporter {
|
public class XmlImporter extends TreeImportingParserBase {
|
||||||
|
public XmlImporter() {
|
||||||
|
super(true);
|
||||||
|
}
|
||||||
|
|
||||||
final static Logger logger = LoggerFactory.getLogger("XmlImporter");
|
static private class PreviewParsingState {
|
||||||
|
int tokenCount;
|
||||||
|
}
|
||||||
|
|
||||||
public static final int BUFFER_SIZE = 64 * 1024;
|
final static private int PREVIEW_PARSING_LIMIT = 1000;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void read(
|
public JSONObject createParserUIInitializationData(
|
||||||
InputStream inputStream,
|
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||||
Project project,
|
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||||
ProjectMetadata metadata, Properties options
|
try {
|
||||||
) throws ImportException {
|
JSONObject firstFileRecord = fileRecords.get(0);
|
||||||
logger.trace("XmlImporter.read");
|
File file = ImportingUtilities.getFile(job, firstFileRecord);
|
||||||
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
InputStream is = new FileInputStream(file);
|
||||||
|
try {
|
||||||
|
XMLStreamReader parser = createXMLStreamReader(is);
|
||||||
|
PreviewParsingState state = new PreviewParsingState();
|
||||||
|
|
||||||
String[] recordPath = null;
|
while (parser.hasNext() && state.tokenCount < PREVIEW_PARSING_LIMIT) {
|
||||||
{
|
int tokenType = parser.next();
|
||||||
byte[] buffer = new byte[BUFFER_SIZE];
|
state.tokenCount++;
|
||||||
int bytes_read = 0;
|
if (tokenType == XMLStreamConstants.START_ELEMENT) {
|
||||||
try {//fill the buffer with data
|
JSONObject rootElement = descendElement(parser, state);
|
||||||
while (bytes_read < BUFFER_SIZE) {
|
if (rootElement != null) {
|
||||||
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
|
JSONUtilities.safePut(options, "dom", rootElement);
|
||||||
if (c == -1) break;
|
break;
|
||||||
bytes_read +=c ;
|
|
||||||
}
|
|
||||||
pis.unread(buffer, 0, bytes_read);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new ImportException("Read error",e);
|
|
||||||
}
|
|
||||||
|
|
||||||
InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read);
|
|
||||||
TreeParser parser = new XmlParser(iStream);
|
|
||||||
if (options.containsKey("importer-record-tag")) {
|
|
||||||
try{
|
|
||||||
recordPath = XmlImportUtilities.detectPathFromTag(
|
|
||||||
parser,
|
|
||||||
options.getProperty("importer-record-tag"));
|
|
||||||
}catch(Exception e){
|
|
||||||
// silent
|
|
||||||
// e.printStackTrace();
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
recordPath = XmlImportUtilities.detectRecordElement(parser);
|
// ignore everything else
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
is.close();
|
||||||
|
}
|
||||||
|
} catch (XMLStreamException e) {
|
||||||
|
// Ignore
|
||||||
|
} catch (IOException e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
final static private JSONObject descendElement(XMLStreamReader parser, PreviewParsingState state) throws XMLStreamException {
|
||||||
|
JSONObject result = new JSONObject();
|
||||||
|
{
|
||||||
|
String name = parser.getLocalName();
|
||||||
|
JSONUtilities.safePut(result, "n", name);
|
||||||
|
|
||||||
|
String prefix = parser.getPrefix();
|
||||||
|
if (prefix != null) {
|
||||||
|
JSONUtilities.safePut(result, "p", prefix);
|
||||||
|
}
|
||||||
|
String nsUri = parser.getNamespaceURI();
|
||||||
|
if (nsUri != null) {
|
||||||
|
JSONUtilities.safePut(result, "uri", nsUri);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (recordPath == null)
|
int namespaceCount = parser.getNamespaceCount();
|
||||||
return;
|
if (namespaceCount > 0) {
|
||||||
|
JSONArray namespaces = new JSONArray();
|
||||||
|
JSONUtilities.safePut(result, "ns", namespaces);
|
||||||
|
|
||||||
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
for (int i = 0; i < namespaceCount; i++) {
|
||||||
XmlImportUtilities.importTreeData(new XmlParser(pis), project, recordPath, rootColumnGroup);
|
JSONObject namespace = new JSONObject();
|
||||||
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
JSONUtilities.append(namespaces, namespace);
|
||||||
|
JSONUtilities.safePut(namespace, "p", parser.getNamespacePrefix(i));
|
||||||
|
JSONUtilities.safePut(namespace, "uri", parser.getNamespaceURI(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
project.columnModel.update();
|
int attributeCount = parser.getAttributeCount();
|
||||||
|
if (attributeCount > 0) {
|
||||||
|
JSONArray attributes = new JSONArray();
|
||||||
|
JSONUtilities.safePut(result, "a", attributes);
|
||||||
|
|
||||||
|
for (int i = 0; i < attributeCount; i++) {
|
||||||
|
JSONObject attribute = new JSONObject();
|
||||||
|
JSONUtilities.append(attributes, attribute);
|
||||||
|
JSONUtilities.safePut(attribute, "n", parser.getAttributeLocalName(i));
|
||||||
|
JSONUtilities.safePut(attribute, "v", parser.getAttributeValue(i));
|
||||||
|
String prefix = parser.getAttributePrefix(i);
|
||||||
|
if (prefix != null) {
|
||||||
|
JSONUtilities.safePut(attribute, "p", prefix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONArray children = new JSONArray();
|
||||||
|
while (parser.hasNext() && state.tokenCount < PREVIEW_PARSING_LIMIT) {
|
||||||
|
int tokenType = parser.next();
|
||||||
|
state.tokenCount++;
|
||||||
|
if (tokenType == XMLStreamConstants.END_ELEMENT) {
|
||||||
|
break;
|
||||||
|
} else if (tokenType == XMLStreamConstants.START_ELEMENT) {
|
||||||
|
JSONObject childElement = descendElement(parser, state);
|
||||||
|
if (childElement != null) {
|
||||||
|
JSONUtilities.append(children, childElement);
|
||||||
|
}
|
||||||
|
} else if (tokenType == XMLStreamConstants.CHARACTERS ||
|
||||||
|
tokenType == XMLStreamConstants.CDATA ||
|
||||||
|
tokenType == XMLStreamConstants.SPACE) {
|
||||||
|
JSONObject childElement = new JSONObject();
|
||||||
|
JSONUtilities.safePut(childElement, "t", parser.getText());
|
||||||
|
JSONUtilities.append(children, childElement);
|
||||||
|
} else {
|
||||||
|
// ignore everything else
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (children.length() > 0) {
|
||||||
|
JSONUtilities.safePut(result, "c", children);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean canImportData(String contentType, String fileName) {
|
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||||
if (contentType != null) {
|
ImportingJob job, String fileSource, InputStream inputStream,
|
||||||
contentType = contentType.toLowerCase().trim();
|
ImportColumnGroup rootColumnGroup, int limit, JSONObject options,
|
||||||
|
List<Exception> exceptions) {
|
||||||
|
|
||||||
if("application/xml".equals(contentType) ||
|
try {
|
||||||
"text/xml".equals(contentType) ||
|
parseOneFile(project, metadata, job, fileSource,
|
||||||
"application/rss+xml".equals(contentType) ||
|
new XmlParser(inputStream), rootColumnGroup, limit, options, exceptions);
|
||||||
"application/atom+xml".equals(contentType)) {
|
} catch (XMLStreamException e) {
|
||||||
return true;
|
exceptions.add(e);
|
||||||
}
|
|
||||||
} else if (fileName != null) {
|
|
||||||
fileName = fileName.toLowerCase();
|
|
||||||
if (
|
|
||||||
fileName.endsWith(".xml") ||
|
|
||||||
fileName.endsWith(".atom") ||
|
|
||||||
fileName.endsWith(".rss")
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
|
static public class XmlParser implements TreeReader {
|
||||||
|
final protected XMLStreamReader parser;
|
||||||
|
|
||||||
|
public XmlParser(InputStream inputStream) throws XMLStreamException {
|
||||||
|
parser = createXMLStreamReader(inputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Token next() throws ServletException {
|
||||||
|
try {
|
||||||
|
if (!parser.hasNext()) {
|
||||||
|
throw new ServletException("End of XML stream");
|
||||||
|
}
|
||||||
|
} catch (XMLStreamException e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
int currentToken = -1;
|
||||||
|
try {
|
||||||
|
currentToken = parser.next();
|
||||||
|
} catch (XMLStreamException e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return mapToToken(currentToken);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Token mapToToken(int token) throws ServletException {
|
||||||
|
switch(token){
|
||||||
|
case XMLStreamConstants.START_ELEMENT: return Token.StartEntity;
|
||||||
|
case XMLStreamConstants.END_ELEMENT: return Token.EndEntity;
|
||||||
|
case XMLStreamConstants.CHARACTERS: return Token.Value;
|
||||||
|
case XMLStreamConstants.START_DOCUMENT: return Token.Ignorable;
|
||||||
|
case XMLStreamConstants.END_DOCUMENT: return Token.Ignorable;
|
||||||
|
case XMLStreamConstants.SPACE: return Token.Value;
|
||||||
|
case XMLStreamConstants.PROCESSING_INSTRUCTION: return Token.Ignorable;
|
||||||
|
case XMLStreamConstants.NOTATION_DECLARATION: return Token.Ignorable;
|
||||||
|
case XMLStreamConstants.NAMESPACE: return Token.Ignorable;
|
||||||
|
case XMLStreamConstants.ENTITY_REFERENCE: return Token.Ignorable;
|
||||||
|
case XMLStreamConstants.DTD: return Token.Ignorable;
|
||||||
|
case XMLStreamConstants.COMMENT: return Token.Ignorable;
|
||||||
|
case XMLStreamConstants.CDATA: return Token.Ignorable;
|
||||||
|
case XMLStreamConstants.ATTRIBUTE: return Token.Ignorable;
|
||||||
|
default:
|
||||||
|
return Token.Ignorable;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Token current() throws ServletException{
|
||||||
|
return this.mapToToken(parser.getEventType());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() throws ServletException{
|
||||||
|
try {
|
||||||
|
return parser.hasNext();
|
||||||
|
} catch (XMLStreamException e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getFieldName() throws ServletException{
|
||||||
|
try{
|
||||||
|
return parser.getLocalName();
|
||||||
|
}catch(IllegalStateException e){
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getPrefix(){
|
||||||
|
return parser.getPrefix();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getFieldValue(){
|
||||||
|
return parser.getText();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getAttributeCount(){
|
||||||
|
return parser.getAttributeCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAttributeValue(int index){
|
||||||
|
return parser.getAttributeValue(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAttributePrefix(int index){
|
||||||
|
return parser.getAttributePrefix(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAttributeLocalName(int index){
|
||||||
|
return parser.getAttributeLocalName(index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final static private XMLStreamReader createXMLStreamReader(InputStream inputStream) throws XMLStreamException {
|
||||||
|
XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||||
|
factory.setProperty(XMLInputFactory.IS_COALESCING, true);
|
||||||
|
factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true);
|
||||||
|
|
||||||
|
return factory.createXMLStreamReader(inputStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,210 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
Copyright 2010, Google Inc.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above
|
|
||||||
copyright notice, this list of conditions and the following disclaimer
|
|
||||||
in the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Google Inc. nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.google.refine.importers.parsers;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
|
||||||
import org.codehaus.jackson.JsonFactory;
|
|
||||||
import org.codehaus.jackson.JsonParseException;
|
|
||||||
import org.codehaus.jackson.JsonParser;
|
|
||||||
import org.codehaus.jackson.JsonToken;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
public class JSONParser implements TreeParser{
|
|
||||||
final static Logger logger = LoggerFactory.getLogger("JsonParser");
|
|
||||||
|
|
||||||
JsonFactory factory = new JsonFactory();
|
|
||||||
JsonParser parser = null;
|
|
||||||
|
|
||||||
//The following is a workaround for inconsistent Jackson JsonParser
|
|
||||||
Boolean lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
|
||||||
Boolean thisTokenIsAFieldName = false;
|
|
||||||
String lastFieldName = null;
|
|
||||||
//end of workaround
|
|
||||||
|
|
||||||
public JSONParser(InputStream inputStream){
|
|
||||||
try {
|
|
||||||
parser = factory.createJsonParser(inputStream);
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Does nothing. All Json is treated as elements
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int getAttributeCount() {
|
|
||||||
// TODO Auto-generated method stub
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Does nothing. All Json is treated as elements
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String getAttributeLocalName(int index) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Does nothing. All Json is treated as elements
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String getAttributePrefix(int index) {
|
|
||||||
// TODO Auto-generated method stub
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Does nothing. All Json is treated as elements
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String getAttributeValue(int index) {
|
|
||||||
// TODO Auto-generated method stub
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TreeParserToken getEventType() throws ServletException {
|
|
||||||
return this.mapToTreeParserToken(parser.getCurrentToken());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getLocalName() throws ServletException{
|
|
||||||
try {
|
|
||||||
String text = parser.getCurrentName();
|
|
||||||
|
|
||||||
//The following is a workaround for inconsistent Jackson JsonParser
|
|
||||||
if(text == null){
|
|
||||||
if(this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity)
|
|
||||||
text = this.lastFieldName;
|
|
||||||
else
|
|
||||||
text = "__anonymous__";
|
|
||||||
}
|
|
||||||
//end of workaround
|
|
||||||
|
|
||||||
return text;
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ServletException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Does nothing. Json does not have prefixes
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public String getPrefix() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getText() throws ServletException {
|
|
||||||
try {
|
|
||||||
return parser.getText();
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new ServletException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() throws ServletException {
|
|
||||||
return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?)
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TreeParserToken next() throws ServletException {
|
|
||||||
JsonToken next;
|
|
||||||
try {
|
|
||||||
next = parser.nextToken();
|
|
||||||
} catch (JsonParseException e) {
|
|
||||||
throw new ServletException(e);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new ServletException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(next == null)
|
|
||||||
throw new ServletException("No more Json Tokens in stream");
|
|
||||||
|
|
||||||
//The following is a workaround for inconsistent Jackson JsonParser
|
|
||||||
if(next == JsonToken.FIELD_NAME){
|
|
||||||
try {
|
|
||||||
this.thisTokenIsAFieldName = true;
|
|
||||||
this.lastFieldName = parser.getCurrentName();
|
|
||||||
} catch (Exception e) {
|
|
||||||
//silent
|
|
||||||
}
|
|
||||||
}else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){
|
|
||||||
if(this.thisTokenIsAFieldName){
|
|
||||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = true;
|
|
||||||
this.thisTokenIsAFieldName = false;
|
|
||||||
}else{
|
|
||||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
|
||||||
this.lastFieldName = null;
|
|
||||||
}
|
|
||||||
}else{
|
|
||||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
|
||||||
this.lastFieldName = null;
|
|
||||||
this.thisTokenIsAFieldName = false;
|
|
||||||
}
|
|
||||||
//end of workaround
|
|
||||||
|
|
||||||
return mapToTreeParserToken(next);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected TreeParserToken mapToTreeParserToken(JsonToken token){
|
|
||||||
switch(token){
|
|
||||||
case START_ARRAY: return TreeParserToken.StartEntity;
|
|
||||||
case END_ARRAY: return TreeParserToken.EndEntity;
|
|
||||||
case START_OBJECT: return TreeParserToken.StartEntity;
|
|
||||||
case END_OBJECT: return TreeParserToken.EndEntity;
|
|
||||||
case VALUE_STRING: return TreeParserToken.Value;
|
|
||||||
case FIELD_NAME: return TreeParserToken.Ignorable; //returned by the getLocalName function()
|
|
||||||
case VALUE_NUMBER_INT: return TreeParserToken.Value;
|
|
||||||
//Json does not have START_DOCUMENT token type (so ignored as default)
|
|
||||||
//Json does not have END_DOCUMENT token type (so ignored as default)
|
|
||||||
case VALUE_TRUE : return TreeParserToken.Value;
|
|
||||||
case VALUE_NUMBER_FLOAT : return TreeParserToken.Value;
|
|
||||||
case VALUE_NULL : return TreeParserToken.Value;
|
|
||||||
case VALUE_FALSE : return TreeParserToken.Value;
|
|
||||||
case VALUE_EMBEDDED_OBJECT : return TreeParserToken.Ignorable;
|
|
||||||
case NOT_AVAILABLE : return TreeParserToken.Ignorable;
|
|
||||||
default: return TreeParserToken.Ignorable;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,70 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
Copyright 2010, Google Inc.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above
|
|
||||||
copyright notice, this list of conditions and the following disclaimer
|
|
||||||
in the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Google Inc. nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.google.refine.importers.parsers;
|
|
||||||
|
|
||||||
import java.io.LineNumberReader;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import com.google.refine.importers.ImporterUtilities;
|
|
||||||
import com.google.refine.model.Cell;
|
|
||||||
import com.google.refine.model.Row;
|
|
||||||
|
|
||||||
public class NonSplitRowParser extends RowParser {
|
|
||||||
|
|
||||||
public List<String> split(String line, LineNumberReader lineReader) {
|
|
||||||
List<String> results = new ArrayList<String>(1);
|
|
||||||
|
|
||||||
results.add(line.trim());
|
|
||||||
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) {
|
|
||||||
if (line.trim().isEmpty()) {
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(line) : line;
|
|
||||||
if (value != null) {
|
|
||||||
row.cells.add(new Cell(value, null));
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
row.cells.add(null);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,85 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
Copyright 2010, Google Inc.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above
|
|
||||||
copyright notice, this list of conditions and the following disclaimer
|
|
||||||
in the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Google Inc. nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.google.refine.importers.parsers;
|
|
||||||
|
|
||||||
import java.io.LineNumberReader;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
|
||||||
|
|
||||||
import com.google.refine.expr.ExpressionUtils;
|
|
||||||
import com.google.refine.importers.ImporterUtilities;
|
|
||||||
import com.google.refine.model.Cell;
|
|
||||||
import com.google.refine.model.Row;
|
|
||||||
|
|
||||||
public class SeparatorRowParser extends RowParser {
|
|
||||||
|
|
||||||
String sep;
|
|
||||||
|
|
||||||
public SeparatorRowParser(String sep) {
|
|
||||||
this.sep = sep;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> split(String line, LineNumberReader lineReader) {
|
|
||||||
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
|
|
||||||
|
|
||||||
List<String> results = new ArrayList<String>();
|
|
||||||
for (int c = 0; c < cells.length; c++) {
|
|
||||||
results.add(cells[c]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) {
|
|
||||||
boolean hasData = false;
|
|
||||||
|
|
||||||
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
|
|
||||||
for (int c = 0; c < cells.length; c++) {
|
|
||||||
String text = cells[c];
|
|
||||||
|
|
||||||
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(text) : text;
|
|
||||||
if (ExpressionUtils.isNonBlankData(value)) {
|
|
||||||
row.cells.add(new Cell(value, null));
|
|
||||||
hasData = true;
|
|
||||||
} else {
|
|
||||||
row.cells.add(null);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return hasData;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,160 +0,0 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
Copyright 2010, Google Inc.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
* Redistributions in binary form must reproduce the above
|
|
||||||
copyright notice, this list of conditions and the following disclaimer
|
|
||||||
in the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
* Neither the name of Google Inc. nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
||||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
||||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
||||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.google.refine.importers.parsers;
|
|
||||||
|
|
||||||
import java.io.InputStream;
|
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
|
||||||
import javax.xml.stream.FactoryConfigurationError;
|
|
||||||
import javax.xml.stream.XMLInputFactory;
|
|
||||||
import javax.xml.stream.XMLStreamConstants;
|
|
||||||
import javax.xml.stream.XMLStreamException;
|
|
||||||
import javax.xml.stream.XMLStreamReader;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
public class XmlParser implements TreeParser{
|
|
||||||
final static Logger logger = LoggerFactory.getLogger("XmlParser");
|
|
||||||
|
|
||||||
XMLStreamReader parser = null;
|
|
||||||
|
|
||||||
public XmlParser(InputStream inputStream){
|
|
||||||
try {
|
|
||||||
XMLInputFactory factory = XMLInputFactory.newInstance();
|
|
||||||
factory.setProperty(XMLInputFactory.IS_COALESCING, true);
|
|
||||||
factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true);
|
|
||||||
parser = factory.createXMLStreamReader(inputStream);
|
|
||||||
} catch (XMLStreamException e) {
|
|
||||||
// silent
|
|
||||||
// e.printStackTrace();
|
|
||||||
} catch (FactoryConfigurationError e) {
|
|
||||||
// silent
|
|
||||||
// e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TreeParserToken next() throws ServletException{
|
|
||||||
try {
|
|
||||||
if(!parser.hasNext())
|
|
||||||
throw new ServletException("End of XML stream");
|
|
||||||
} catch (XMLStreamException e) {
|
|
||||||
throw new ServletException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
int currentToken = -1;
|
|
||||||
try {
|
|
||||||
currentToken = parser.next();
|
|
||||||
} catch (XMLStreamException e) {
|
|
||||||
throw new ServletException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
return mapToTreeParserToken(currentToken);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected TreeParserToken mapToTreeParserToken(int token) throws ServletException {
|
|
||||||
switch(token){
|
|
||||||
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
|
|
||||||
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
|
|
||||||
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
|
|
||||||
case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.Ignorable;
|
|
||||||
case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.Ignorable;
|
|
||||||
case XMLStreamConstants.SPACE: return TreeParserToken.Value;
|
|
||||||
case XMLStreamConstants.PROCESSING_INSTRUCTION: return TreeParserToken.Ignorable;
|
|
||||||
case XMLStreamConstants.NOTATION_DECLARATION: return TreeParserToken.Ignorable;
|
|
||||||
case XMLStreamConstants.NAMESPACE: return TreeParserToken.Ignorable;
|
|
||||||
case XMLStreamConstants.ENTITY_REFERENCE: return TreeParserToken.Ignorable;
|
|
||||||
case XMLStreamConstants.DTD: return TreeParserToken.Ignorable;
|
|
||||||
case XMLStreamConstants.COMMENT: return TreeParserToken.Ignorable;
|
|
||||||
case XMLStreamConstants.CDATA: return TreeParserToken.Ignorable;
|
|
||||||
case XMLStreamConstants.ATTRIBUTE: return TreeParserToken.Ignorable;
|
|
||||||
default:
|
|
||||||
return TreeParserToken.Ignorable;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TreeParserToken getEventType() throws ServletException{
|
|
||||||
return this.mapToTreeParserToken(parser.getEventType());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() throws ServletException{
|
|
||||||
try {
|
|
||||||
return parser.hasNext();
|
|
||||||
} catch (XMLStreamException e) {
|
|
||||||
throw new ServletException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getLocalName() throws ServletException{
|
|
||||||
try{
|
|
||||||
return parser.getLocalName();
|
|
||||||
}catch(IllegalStateException e){
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getPrefix(){
|
|
||||||
return parser.getPrefix();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getText(){
|
|
||||||
return parser.getText();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getAttributeCount(){
|
|
||||||
return parser.getAttributeCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getAttributeValue(int index){
|
|
||||||
return parser.getAttributeValue(index);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getAttributePrefix(int index){
|
|
||||||
return parser.getAttributePrefix(index);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getAttributeLocalName(int index){
|
|
||||||
return parser.getAttributeLocalName(index);
|
|
||||||
}
|
|
||||||
}
|
|
23
main/src/com/google/refine/importers/tree/ImportColumn.java
Normal file
23
main/src/com/google/refine/importers/tree/ImportColumn.java
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
package com.google.refine.importers.tree;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A column is used to describe a branch-terminating element in a tree structure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class ImportColumn extends ImportVertical {
|
||||||
|
public int cellIndex;
|
||||||
|
public int nextRowIndex;
|
||||||
|
public boolean blankOnFirstRow;
|
||||||
|
|
||||||
|
public ImportColumn() {}
|
||||||
|
|
||||||
|
public ImportColumn(String name) { //required for testing
|
||||||
|
super.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void tabulate() {
|
||||||
|
// already done the tabulation elsewhere
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,33 @@
|
|||||||
|
package com.google.refine.importers.tree;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A column group describes a branch in tree structured data
|
||||||
|
*/
|
||||||
|
public class ImportColumnGroup extends ImportVertical {
|
||||||
|
public Map<String, ImportColumnGroup> subgroups = new HashMap<String, ImportColumnGroup>();
|
||||||
|
public Map<String, ImportColumn> columns = new HashMap<String, ImportColumn>();
|
||||||
|
public int nextRowIndex;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void tabulate() {
|
||||||
|
for (ImportColumn c : columns.values()) {
|
||||||
|
c.tabulate();
|
||||||
|
nonBlankCount = Math.max(nonBlankCount, c.nonBlankCount);
|
||||||
|
}
|
||||||
|
for (ImportColumnGroup g : subgroups.values()) {
|
||||||
|
g.tabulate();
|
||||||
|
nonBlankCount = Math.max(nonBlankCount, g.nonBlankCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("name=%s, columns={%s}, subgroups={{%s}}",
|
||||||
|
name,StringUtils.join(columns.keySet(), ','),
|
||||||
|
StringUtils.join(subgroups.keySet(),','));
|
||||||
|
}
|
||||||
|
}
|
14
main/src/com/google/refine/importers/tree/ImportRecord.java
Normal file
14
main/src/com/google/refine/importers/tree/ImportRecord.java
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
package com.google.refine.importers.tree;
|
||||||
|
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.google.refine.model.Cell;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A record describes a data element in a tree-structure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class ImportRecord {
|
||||||
|
public List<List<Cell>> rows = new LinkedList<List<Cell>>();
|
||||||
|
}
|
@ -0,0 +1,8 @@
|
|||||||
|
package com.google.refine.importers.tree;
|
||||||
|
|
||||||
|
abstract class ImportVertical {
|
||||||
|
public String name = "";
|
||||||
|
public int nonBlankCount;
|
||||||
|
|
||||||
|
abstract void tabulate();
|
||||||
|
}
|
@ -0,0 +1,16 @@
|
|||||||
|
package com.google.refine.importers.tree;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An element which holds sub-elements we
|
||||||
|
* shall import as records
|
||||||
|
*/
|
||||||
|
class RecordElementCandidate {
|
||||||
|
String[] path;
|
||||||
|
int count;
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return Arrays.toString(path);
|
||||||
|
}
|
||||||
|
}
|
@ -31,22 +31,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.google.refine.importers;
|
package com.google.refine.importers.tree;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.refine.importers.ImporterUtilities;
|
||||||
import com.google.refine.model.Cell;
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Column;
|
import com.google.refine.model.Column;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
@ -54,83 +50,6 @@ import com.google.refine.model.Project;
|
|||||||
public abstract class TreeImportUtilities {
|
public abstract class TreeImportUtilities {
|
||||||
final static Logger logger = LoggerFactory.getLogger("TreeImportUtilities");
|
final static Logger logger = LoggerFactory.getLogger("TreeImportUtilities");
|
||||||
|
|
||||||
/**
|
|
||||||
* An element which holds sub-elements we
|
|
||||||
* shall import as records
|
|
||||||
*/
|
|
||||||
static protected class RecordElementCandidate {
|
|
||||||
String[] path;
|
|
||||||
int count;
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return Arrays.toString(path);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static protected abstract class ImportVertical {
|
|
||||||
public String name = "";
|
|
||||||
public int nonBlankCount;
|
|
||||||
|
|
||||||
abstract void tabulate();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A column group describes a branch in tree structured data
|
|
||||||
*/
|
|
||||||
static public class ImportColumnGroup extends ImportVertical {
|
|
||||||
public Map<String, ImportColumnGroup> subgroups = new HashMap<String, ImportColumnGroup>();
|
|
||||||
public Map<String, ImportColumn> columns = new HashMap<String, ImportColumn>();
|
|
||||||
public int nextRowIndex;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
void tabulate() {
|
|
||||||
for (ImportColumn c : columns.values()) {
|
|
||||||
c.tabulate();
|
|
||||||
nonBlankCount = Math.max(nonBlankCount, c.nonBlankCount);
|
|
||||||
}
|
|
||||||
for (ImportColumnGroup g : subgroups.values()) {
|
|
||||||
g.tabulate();
|
|
||||||
nonBlankCount = Math.max(nonBlankCount, g.nonBlankCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return String.format("name=%s, columns={%s}, subgroups={{%s}}",
|
|
||||||
name,StringUtils.join(columns.keySet(), ','),
|
|
||||||
StringUtils.join(subgroups.keySet(),','));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A column is used to describe a branch-terminating element in a tree structure
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static public class ImportColumn extends ImportVertical {
|
|
||||||
public int cellIndex;
|
|
||||||
public int nextRowIndex;
|
|
||||||
public boolean blankOnFirstRow;
|
|
||||||
|
|
||||||
public ImportColumn() {}
|
|
||||||
|
|
||||||
public ImportColumn(String name) { //required for testing
|
|
||||||
super.name = name;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
void tabulate() {
|
|
||||||
// already done the tabulation elsewhere
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A record describes a data element in a tree-structure
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static public class ImportRecord {
|
|
||||||
public List<List<Cell>> rows = new LinkedList<List<Cell>>();
|
|
||||||
}
|
|
||||||
|
|
||||||
static protected void sortRecordElementCandidates(List<RecordElementCandidate> list) {
|
static protected void sortRecordElementCandidates(List<RecordElementCandidate> list) {
|
||||||
Collections.sort(list, new Comparator<RecordElementCandidate>() {
|
Collections.sort(list, new Comparator<RecordElementCandidate>() {
|
||||||
public int compare(RecordElementCandidate o1, RecordElementCandidate o2) {
|
public int compare(RecordElementCandidate o1, RecordElementCandidate o2) {
|
@ -0,0 +1,169 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.importers.tree;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.NotImplementedException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.importers.ImporterUtilities;
|
||||||
|
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingParser;
|
||||||
|
import com.google.refine.importing.ImportingUtilities;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
|
abstract public class TreeImportingParserBase implements ImportingParser {
|
||||||
|
final protected boolean useInputStream;
|
||||||
|
|
||||||
|
protected TreeImportingParserBase(boolean useInputStream) {
|
||||||
|
this.useInputStream = useInputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JSONObject createParserUIInitializationData(ImportingJob job,
|
||||||
|
List<JSONObject> fileRecords, String format) {
|
||||||
|
JSONObject options = new JSONObject();
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void parse(Project project, ProjectMetadata metadata,
|
||||||
|
ImportingJob job, List<JSONObject> fileRecords, String format,
|
||||||
|
int limit, JSONObject options, List<Exception> exceptions) {
|
||||||
|
|
||||||
|
MultiFileReadingProgress progress = ImporterUtilities.createMultiFileReadingProgress(job, fileRecords);
|
||||||
|
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
||||||
|
|
||||||
|
for (JSONObject fileRecord : fileRecords) {
|
||||||
|
try {
|
||||||
|
parseOneFile(project, metadata, job, fileRecord, rootColumnGroup, limit, options, exceptions, progress);
|
||||||
|
} catch (IOException e) {
|
||||||
|
exceptions.add(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (limit > 0 && project.rows.size() >= limit) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
||||||
|
project.columnModel.update();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
JSONObject fileRecord,
|
||||||
|
ImportColumnGroup rootColumnGroup,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions,
|
||||||
|
final MultiFileReadingProgress progress
|
||||||
|
) throws IOException {
|
||||||
|
final File file = ImportingUtilities.getFile(job, fileRecord);
|
||||||
|
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
||||||
|
|
||||||
|
progress.startFile(fileSource);
|
||||||
|
try {
|
||||||
|
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
|
||||||
|
try {
|
||||||
|
if (useInputStream) {
|
||||||
|
parseOneFile(project, metadata, job, fileSource, inputStream,
|
||||||
|
rootColumnGroup, limit, options, exceptions);
|
||||||
|
} else {
|
||||||
|
Reader reader = ImportingUtilities.getFileReader(file, fileRecord);
|
||||||
|
parseOneFile(project, metadata, job, fileSource, reader,
|
||||||
|
rootColumnGroup, limit, options, exceptions);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
inputStream.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
progress.endFile(fileSource, file.length());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
String fileSource,
|
||||||
|
Reader reader,
|
||||||
|
ImportColumnGroup rootColumnGroup,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) {
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
String fileSource,
|
||||||
|
InputStream inputStream,
|
||||||
|
ImportColumnGroup rootColumnGroup,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) {
|
||||||
|
throw new NotImplementedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void parseOneFile(
|
||||||
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
String fileSource,
|
||||||
|
TreeReader treeParser,
|
||||||
|
ImportColumnGroup rootColumnGroup,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
) {
|
||||||
|
String[] recordPath = JSONUtilities.getStringArray(options, "recordPath");
|
||||||
|
|
||||||
|
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit);
|
||||||
|
}
|
||||||
|
}
|
@ -31,17 +31,26 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.google.refine.importers.parsers;
|
package com.google.refine.importers.tree;
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
public interface TreeReader {
|
||||||
|
public enum Token {
|
||||||
|
Ignorable,
|
||||||
|
StartEntity,
|
||||||
|
EndEntity,
|
||||||
|
Value
|
||||||
|
//append additional tokens only if necessary (most should be just mapped to Value or Ignorable)
|
||||||
|
}
|
||||||
|
|
||||||
public interface TreeParser {
|
public Token current() throws Exception; //aka getCurrentToken
|
||||||
public TreeParserToken next() throws ServletException;
|
|
||||||
public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken
|
public boolean hasNext() throws Exception;
|
||||||
public boolean hasNext() throws ServletException;
|
public Token next() throws Exception;
|
||||||
public String getLocalName() throws ServletException; //aka getFieldName
|
|
||||||
|
public String getFieldName() throws Exception; //aka getFieldName
|
||||||
public String getPrefix();
|
public String getPrefix();
|
||||||
public String getText() throws ServletException;
|
public String getFieldValue() throws Exception;
|
||||||
|
|
||||||
public int getAttributeCount();
|
public int getAttributeCount();
|
||||||
public String getAttributeValue(int index);
|
public String getAttributeValue(int index);
|
||||||
public String getAttributePrefix(int index);
|
public String getAttributePrefix(int index);
|
@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.google.refine.importers;
|
package com.google.refine.importers.tree;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -40,13 +40,10 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.refine.importers.parsers.TreeParser;
|
import com.google.refine.importers.tree.TreeReader.Token;
|
||||||
import com.google.refine.importers.parsers.TreeParserToken;
|
|
||||||
import com.google.refine.model.Cell;
|
import com.google.refine.model.Cell;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
@ -54,11 +51,11 @@ import com.google.refine.model.Row;
|
|||||||
public class XmlImportUtilities extends TreeImportUtilities {
|
public class XmlImportUtilities extends TreeImportUtilities {
|
||||||
final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities");
|
final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities");
|
||||||
|
|
||||||
static public String[] detectPathFromTag(TreeParser parser, String tag) {
|
static public String[] detectPathFromTag(TreeReader parser, String tag) {
|
||||||
try {
|
try {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
TreeParserToken eventType = parser.next();
|
Token eventType = parser.next();
|
||||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||||
List<String> path = detectRecordElement(parser, tag);
|
List<String> path = detectRecordElement(parser, tag);
|
||||||
if (path != null) {
|
if (path != null) {
|
||||||
String[] path2 = new String[path.size()];
|
String[] path2 = new String[path.size()];
|
||||||
@ -90,14 +87,14 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
* null if the the tag is not found.
|
* null if the the tag is not found.
|
||||||
* @throws ServletException
|
* @throws ServletException
|
||||||
*/
|
*/
|
||||||
static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
|
static protected List<String> detectRecordElement(TreeReader parser, String tag) throws Exception {
|
||||||
try{
|
try{
|
||||||
if(parser.getEventType() == TreeParserToken.Ignorable)//XMLStreamConstants.START_DOCUMENT)
|
if(parser.current() == Token.Ignorable)//XMLStreamConstants.START_DOCUMENT)
|
||||||
parser.next();
|
parser.next();
|
||||||
|
|
||||||
String localName = parser.getLocalName();
|
String localName = parser.getFieldName();
|
||||||
String fullName = composeName(parser.getPrefix(), localName);
|
String fullName = composeName(parser.getPrefix(), localName);
|
||||||
if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) {
|
if (tag.equals(parser.getFieldName()) || tag.equals(fullName)) {
|
||||||
List<String> path = new LinkedList<String>();
|
List<String> path = new LinkedList<String>();
|
||||||
path.add(localName);
|
path.add(localName);
|
||||||
|
|
||||||
@ -105,10 +102,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
TreeParserToken eventType = parser.next();
|
Token eventType = parser.next();
|
||||||
if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
if (eventType == Token.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
||||||
break;
|
break;
|
||||||
} else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
} else if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||||
List<String> path = detectRecordElement(parser, tag);
|
List<String> path = detectRecordElement(parser, tag);
|
||||||
if (path != null) {
|
if (path != null) {
|
||||||
path.add(0, localName);
|
path.add(0, localName);
|
||||||
@ -116,7 +113,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}catch(ServletException e){
|
} catch (Exception e) {
|
||||||
// silent
|
// silent
|
||||||
// e.printStackTrace();
|
// e.printStackTrace();
|
||||||
}
|
}
|
||||||
@ -136,18 +133,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
* The path to the most numerous of the possible candidates.
|
* The path to the most numerous of the possible candidates.
|
||||||
* null if no candidates were found (less than 6 recurrences)
|
* null if no candidates were found (less than 6 recurrences)
|
||||||
*/
|
*/
|
||||||
static public String[] detectRecordElement(TreeParser parser) {
|
static public String[] detectRecordElement(TreeReader parser) {
|
||||||
logger.trace("detectRecordElement(inputStream)");
|
logger.trace("detectRecordElement(inputStream)");
|
||||||
List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
|
List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
TreeParserToken eventType = parser.next();
|
Token eventType = parser.next();
|
||||||
if (eventType == TreeParserToken.StartEntity) {
|
if (eventType == Token.StartEntity) {
|
||||||
RecordElementCandidate candidate =
|
RecordElementCandidate candidate =
|
||||||
detectRecordElement(
|
detectRecordElement(
|
||||||
parser,
|
parser,
|
||||||
new String[] { parser.getLocalName() });
|
new String[] { parser.getFieldName() });
|
||||||
|
|
||||||
if (candidate != null) {
|
if (candidate != null) {
|
||||||
candidates.add(candidate);
|
candidates.add(candidate);
|
||||||
@ -168,8 +165,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) {
|
static protected RecordElementCandidate detectRecordElement(TreeReader parser, String[] path) {
|
||||||
logger.trace("detectRecordElement(TreeParser, String[])");
|
logger.trace("detectRecordElement(TreeReader, String[])");
|
||||||
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
|
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
|
||||||
|
|
||||||
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
|
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
|
||||||
@ -178,21 +175,21 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
TreeParserToken eventType = parser.next();
|
Token eventType = parser.next();
|
||||||
if (eventType == TreeParserToken.EndEntity ) {
|
if (eventType == Token.EndEntity ) {
|
||||||
break;
|
break;
|
||||||
} else if (eventType == TreeParserToken.Value) {
|
} else if (eventType == Token.Value) {
|
||||||
try{
|
try{
|
||||||
if (parser.getText().trim().length() > 0) {
|
if (parser.getFieldValue().trim().length() > 0) {
|
||||||
textNodeCount++;
|
textNodeCount++;
|
||||||
}
|
}
|
||||||
}catch(Exception e){
|
}catch(Exception e){
|
||||||
//silent
|
//silent
|
||||||
}
|
}
|
||||||
} else if (eventType == TreeParserToken.StartEntity) {
|
} else if (eventType == Token.StartEntity) {
|
||||||
childElementNodeCount++;
|
childElementNodeCount++;
|
||||||
|
|
||||||
String tagName = parser.getLocalName();
|
String tagName = parser.getFieldName();
|
||||||
|
|
||||||
immediateChildCandidateMap.put(
|
immediateChildCandidateMap.put(
|
||||||
tagName,
|
tagName,
|
||||||
@ -261,17 +258,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
|
|
||||||
|
|
||||||
static public void importTreeData(
|
static public void importTreeData(
|
||||||
TreeParser parser,
|
TreeReader parser,
|
||||||
Project project,
|
Project project,
|
||||||
String[] recordPath,
|
String[] recordPath,
|
||||||
ImportColumnGroup rootColumnGroup
|
ImportColumnGroup rootColumnGroup,
|
||||||
|
int limit
|
||||||
) {
|
) {
|
||||||
logger.trace("importTreeData(TreeParser, Project, String[], ImportColumnGroup)");
|
logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)");
|
||||||
try {
|
try {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext() && (limit <= 0 || project.rows.size() < limit)) {
|
||||||
TreeParserToken eventType = parser.next();
|
Token eventType = parser.next();
|
||||||
if (eventType == TreeParserToken.StartEntity) {
|
if (eventType == Token.StartEntity) {
|
||||||
findRecord(project, parser, recordPath, 0, rootColumnGroup);
|
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
@ -292,26 +290,30 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
*/
|
*/
|
||||||
static protected void findRecord(
|
static protected void findRecord(
|
||||||
Project project,
|
Project project,
|
||||||
TreeParser parser,
|
TreeReader parser,
|
||||||
String[] recordPath,
|
String[] recordPath,
|
||||||
int pathIndex,
|
int pathIndex,
|
||||||
ImportColumnGroup rootColumnGroup
|
ImportColumnGroup rootColumnGroup,
|
||||||
) throws ServletException {
|
int limit
|
||||||
logger.trace("findRecord(Project, TreeParser, String[], int, ImportColumnGroup");
|
) throws Exception {
|
||||||
|
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup");
|
||||||
|
|
||||||
if(parser.getEventType() == TreeParserToken.Ignorable){//XMLStreamConstants.START_DOCUMENT){
|
if(parser.current() == Token.Ignorable){//XMLStreamConstants.START_DOCUMENT){
|
||||||
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
|
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
String tagName = parser.getLocalName();
|
String recordPathSegment = recordPath[pathIndex];
|
||||||
if (tagName.equals(recordPath[pathIndex])) {
|
|
||||||
|
String localName = parser.getFieldName();
|
||||||
|
String fullName = composeName(parser.getPrefix(), localName);
|
||||||
|
if (recordPathSegment.equals(localName) || recordPathSegment.equals(fullName)) {
|
||||||
if (pathIndex < recordPath.length - 1) {
|
if (pathIndex < recordPath.length - 1) {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext() && (limit <= 0 || project.rows.size() < limit)) {
|
||||||
TreeParserToken eventType = parser.next();
|
Token eventType = parser.next();
|
||||||
if (eventType == TreeParserToken.StartEntity) {
|
if (eventType == Token.StartEntity) {
|
||||||
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup);
|
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit);
|
||||||
} else if (eventType == TreeParserToken.EndEntity ) {
|
} else if (eventType == Token.EndEntity ) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -323,12 +325,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static protected void skip(TreeParser parser) throws ServletException {
|
static protected void skip(TreeReader parser) throws Exception {
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
TreeParserToken eventType = parser.next();
|
Token eventType = parser.next();
|
||||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||||
skip(parser);
|
skip(parser);
|
||||||
} else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
|
} else if (eventType == Token.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -344,10 +346,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
*/
|
*/
|
||||||
static protected void processRecord(
|
static protected void processRecord(
|
||||||
Project project,
|
Project project,
|
||||||
TreeParser parser,
|
TreeReader parser,
|
||||||
ImportColumnGroup rootColumnGroup
|
ImportColumnGroup rootColumnGroup
|
||||||
) throws ServletException {
|
) throws Exception {
|
||||||
logger.trace("processRecord(Project,TreeParser,ImportColumnGroup)");
|
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)");
|
||||||
ImportRecord record = new ImportRecord();
|
ImportRecord record = new ImportRecord();
|
||||||
|
|
||||||
processSubRecord(project, parser, rootColumnGroup, record);
|
processSubRecord(project, parser, rootColumnGroup, record);
|
||||||
@ -382,19 +384,19 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
*/
|
*/
|
||||||
static protected void processSubRecord(
|
static protected void processSubRecord(
|
||||||
Project project,
|
Project project,
|
||||||
TreeParser parser,
|
TreeReader parser,
|
||||||
ImportColumnGroup columnGroup,
|
ImportColumnGroup columnGroup,
|
||||||
ImportRecord record
|
ImportRecord record
|
||||||
) throws ServletException {
|
) throws Exception {
|
||||||
logger.trace("processSubRecord(Project,TreeParser,ImportColumnGroup,ImportRecord)");
|
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord)");
|
||||||
|
|
||||||
if(parser.getEventType() == TreeParserToken.Ignorable)
|
if(parser.current() == Token.Ignorable)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
ImportColumnGroup thisColumnGroup = getColumnGroup(
|
ImportColumnGroup thisColumnGroup = getColumnGroup(
|
||||||
project,
|
project,
|
||||||
columnGroup,
|
columnGroup,
|
||||||
composeName(parser.getPrefix(), parser.getLocalName()));
|
composeName(parser.getPrefix(), parser.getFieldName()));
|
||||||
|
|
||||||
thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex);
|
thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex);
|
||||||
|
|
||||||
@ -413,8 +415,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
while (parser.hasNext()) {
|
while (parser.hasNext()) {
|
||||||
TreeParserToken eventType = parser.next();
|
Token eventType = parser.next();
|
||||||
if (eventType == TreeParserToken.StartEntity) {
|
if (eventType == Token.StartEntity) {
|
||||||
processSubRecord(
|
processSubRecord(
|
||||||
project,
|
project,
|
||||||
parser,
|
parser,
|
||||||
@ -422,9 +424,9 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
record
|
record
|
||||||
);
|
);
|
||||||
} else if (//eventType == XMLStreamConstants.CDATA ||
|
} else if (//eventType == XMLStreamConstants.CDATA ||
|
||||||
eventType == TreeParserToken.Value) { //XMLStreamConstants.CHARACTERS) {
|
eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) {
|
||||||
String text = parser.getText();
|
String text = parser.getFieldValue();
|
||||||
String colName = parser.getLocalName();
|
String colName = parser.getFieldName();
|
||||||
if(text != null){
|
if(text != null){
|
||||||
text = text.trim();
|
text = text.trim();
|
||||||
if (text.length() > 0) {
|
if (text.length() > 0) {
|
||||||
@ -437,7 +439,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (eventType == TreeParserToken.EndEntity) {
|
} else if (eventType == Token.EndEntity) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -451,8 +453,4 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
|||||||
}
|
}
|
||||||
thisColumnGroup.nextRowIndex = nextRowIndex;
|
thisColumnGroup.nextRowIndex = nextRowIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
@ -0,0 +1,264 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.importing;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Writer;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import javax.servlet.ServletException;
|
||||||
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.google.refine.RefineServlet;
|
||||||
|
import com.google.refine.commands.HttpUtilities;
|
||||||
|
import com.google.refine.importing.ImportingManager.Format;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
import com.google.refine.util.ParsingUtilities;
|
||||||
|
|
||||||
|
public class DefaultImportingController implements ImportingController {
|
||||||
|
|
||||||
|
protected RefineServlet servlet;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(RefineServlet servlet) {
|
||||||
|
this.servlet = servlet;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void doGet(HttpServletRequest request, HttpServletResponse response)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
// TODO Auto-generated method stub
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The uploaded file is in the POST body as a "file part". If
|
||||||
|
* we call request.getParameter() then the POST body will get
|
||||||
|
* read and we won't have a chance to parse the body ourselves.
|
||||||
|
* This is why we have to parse the URL for parameters ourselves.
|
||||||
|
*/
|
||||||
|
Properties parameters = ParsingUtilities.parseUrlParameters(request);
|
||||||
|
String subCommand = parameters.getProperty("subCommand");
|
||||||
|
if ("load-raw-data".equals(subCommand)) {
|
||||||
|
doLoadRawData(request, response, parameters);
|
||||||
|
} else if ("update-file-selection".equals(subCommand)) {
|
||||||
|
doUpdateFileSelection(request, response, parameters);
|
||||||
|
} else if ("initialize-parser-ui".equals(subCommand)) {
|
||||||
|
doInitializeParserUI(request, response, parameters);
|
||||||
|
} else if ("update-format-and-options".equals(subCommand)) {
|
||||||
|
doUpdateFormatAndOptions(request, response, parameters);
|
||||||
|
} else if ("create-project".equals(subCommand)) {
|
||||||
|
doCreateProject(request, response, parameters);
|
||||||
|
} else {
|
||||||
|
HttpUtilities.respond(response, "error", "No such sub command");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doLoadRawData(HttpServletRequest request, HttpServletResponse response, Properties parameters)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
long jobID = Long.parseLong(parameters.getProperty("jobID"));
|
||||||
|
ImportingJob job = ImportingManager.getJob(jobID);
|
||||||
|
if (job == null) {
|
||||||
|
HttpUtilities.respond(response, "error", "No such import job");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
final JSONObject config = getConfig(job);
|
||||||
|
if (!("new".equals(config.getString("state")))) {
|
||||||
|
HttpUtilities.respond(response, "error", "Job already started; cannot load more data");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ImportingUtilities.loadDataAndPrepareJob(
|
||||||
|
request, response, parameters, job, config);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doUpdateFileSelection(HttpServletRequest request, HttpServletResponse response, Properties parameters)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
long jobID = Long.parseLong(parameters.getProperty("jobID"));
|
||||||
|
ImportingJob job = ImportingManager.getJob(jobID);
|
||||||
|
if (job == null) {
|
||||||
|
HttpUtilities.respond(response, "error", "No such import job");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
JSONObject config = getConfig(job);
|
||||||
|
if (!("ready".equals(config.getString("state")))) {
|
||||||
|
HttpUtilities.respond(response, "error", "Job not ready");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONArray fileSelectionArray = ParsingUtilities.evaluateJsonStringToArray(
|
||||||
|
request.getParameter("fileSelection"));
|
||||||
|
|
||||||
|
ImportingUtilities.updateJobWithNewFileSelection(job, fileSelectionArray);
|
||||||
|
|
||||||
|
replyWithJobData(request, response, job);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doUpdateFormatAndOptions(HttpServletRequest request, HttpServletResponse response, Properties parameters)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
long jobID = Long.parseLong(parameters.getProperty("jobID"));
|
||||||
|
ImportingJob job = ImportingManager.getJob(jobID);
|
||||||
|
if (job == null) {
|
||||||
|
HttpUtilities.respond(response, "error", "No such import job");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
JSONObject config = getConfig(job);
|
||||||
|
if (!("ready".equals(config.getString("state")))) {
|
||||||
|
HttpUtilities.respond(response, "error", "Job not ready");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String format = request.getParameter("format");
|
||||||
|
JSONObject optionObj = ParsingUtilities.evaluateJsonStringToObject(
|
||||||
|
request.getParameter("options"));
|
||||||
|
|
||||||
|
List<Exception> exceptions = new LinkedList<Exception>();
|
||||||
|
|
||||||
|
ImportingUtilities.previewParse(job, format, optionObj, exceptions);
|
||||||
|
|
||||||
|
HttpUtilities.respond(response, "ok", "done");
|
||||||
|
} catch (JSONException e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doInitializeParserUI(HttpServletRequest request, HttpServletResponse response, Properties parameters)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
long jobID = Long.parseLong(parameters.getProperty("jobID"));
|
||||||
|
ImportingJob job = ImportingManager.getJob(jobID);
|
||||||
|
if (job == null) {
|
||||||
|
HttpUtilities.respond(response, "error", "No such import job");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String format = request.getParameter("format");
|
||||||
|
Format formatRecord = ImportingManager.formatToRecord.get(format);
|
||||||
|
if (formatRecord != null && formatRecord.parser != null) {
|
||||||
|
JSONObject options = formatRecord.parser.createParserUIInitializationData(
|
||||||
|
job, ImportingUtilities.getSelectedFileRecords(job), format);
|
||||||
|
JSONObject result = new JSONObject();
|
||||||
|
JSONUtilities.safePut(result, "status", "ok");
|
||||||
|
JSONUtilities.safePut(result, "options", options);
|
||||||
|
|
||||||
|
HttpUtilities.respond(response, result.toString());
|
||||||
|
} else {
|
||||||
|
HttpUtilities.respond(response, "error", "Unrecognized format or format has no parser");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doCreateProject(HttpServletRequest request, HttpServletResponse response, Properties parameters)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
long jobID = Long.parseLong(parameters.getProperty("jobID"));
|
||||||
|
ImportingJob job = ImportingManager.getJob(jobID);
|
||||||
|
if (job == null) {
|
||||||
|
HttpUtilities.respond(response, "error", "No such import job");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
JSONObject config = getConfig(job);
|
||||||
|
if (!("ready".equals(config.getString("state")))) {
|
||||||
|
HttpUtilities.respond(response, "error", "Job not ready");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String format = request.getParameter("format");
|
||||||
|
JSONObject optionObj = ParsingUtilities.evaluateJsonStringToObject(
|
||||||
|
request.getParameter("options"));
|
||||||
|
|
||||||
|
List<Exception> exceptions = new LinkedList<Exception>();
|
||||||
|
|
||||||
|
ImportingUtilities.createProject(job, format, optionObj, exceptions);
|
||||||
|
|
||||||
|
HttpUtilities.respond(response, "ok", "done");
|
||||||
|
} catch (JSONException e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject getConfig(ImportingJob job) {
|
||||||
|
if (job.config == null) {
|
||||||
|
job.config = new JSONObject();
|
||||||
|
JSONUtilities.safePut(job.config, "state", "new");
|
||||||
|
JSONUtilities.safePut(job.config, "hasData", false);
|
||||||
|
}
|
||||||
|
return job.config;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void replyWithJobData(HttpServletRequest request, HttpServletResponse response, ImportingJob job)
|
||||||
|
throws ServletException, IOException {
|
||||||
|
|
||||||
|
Writer w = response.getWriter();
|
||||||
|
JSONWriter writer = new JSONWriter(w);
|
||||||
|
try {
|
||||||
|
writer.object();
|
||||||
|
writer.key("code"); writer.value("ok");
|
||||||
|
writer.key("job"); job.write(writer, new Properties());
|
||||||
|
writer.endObject();
|
||||||
|
} catch (JSONException e) {
|
||||||
|
throw new ServletException(e);
|
||||||
|
} finally {
|
||||||
|
w.flush();
|
||||||
|
w.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
|
|
||||||
Copyright 2010, Google Inc.
|
Copyright 2011, Google Inc.
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -31,13 +31,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.google.refine.importers.parsers;
|
package com.google.refine.importing;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
public enum TreeParserToken {
|
public interface FormatGuesser {
|
||||||
Ignorable,
|
public String guess(File file, String encoding, String seedFormat);
|
||||||
StartEntity,
|
|
||||||
EndEntity,
|
|
||||||
Value
|
|
||||||
//append additional tokens only if necessary (most should be just mapped to Value or Ignorable)
|
|
||||||
}
|
}
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
|
|
||||||
Copyright 2010, Google Inc.
|
Copyright 2011, Google Inc.
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -31,17 +31,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.google.refine.importers;
|
package com.google.refine.importing;
|
||||||
|
|
||||||
|
import com.google.refine.HttpResponder;
|
||||||
|
|
||||||
public interface Importer {
|
public interface ImportingController extends HttpResponder {
|
||||||
|
|
||||||
/**
|
|
||||||
* Determine whether importer can handle given contentType and filename.
|
|
||||||
*
|
|
||||||
* @param contentType
|
|
||||||
* @param fileName
|
|
||||||
* @return true if the importer can handle this
|
|
||||||
*/
|
|
||||||
public boolean canImportData(String contentType, String fileName);
|
|
||||||
}
|
}
|
106
main/src/com/google/refine/importing/ImportingJob.java
Normal file
106
main/src/com/google/refine/importing/ImportingJob.java
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.importing;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.google.refine.Jsonizable;
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
|
||||||
|
|
||||||
|
public class ImportingJob implements Jsonizable {
|
||||||
|
final public long id;
|
||||||
|
final public File dir; // Temporary directory where the data about this job is stored
|
||||||
|
|
||||||
|
public long lastTouched;
|
||||||
|
public JSONObject config = null;
|
||||||
|
|
||||||
|
public Project project;
|
||||||
|
public ProjectMetadata metadata;
|
||||||
|
public boolean canceled;
|
||||||
|
|
||||||
|
public ImportingJob(long id, File dir) {
|
||||||
|
this.id = id;
|
||||||
|
this.dir = dir;
|
||||||
|
|
||||||
|
dir.mkdirs();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void touch() {
|
||||||
|
lastTouched = System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void prepareNewProject() {
|
||||||
|
if (project != null) {
|
||||||
|
project.dispose();
|
||||||
|
}
|
||||||
|
project = new Project();
|
||||||
|
metadata = new ProjectMetadata();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void dispose() {
|
||||||
|
if (project != null) {
|
||||||
|
project.dispose();
|
||||||
|
project = null;
|
||||||
|
}
|
||||||
|
metadata = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
FileUtils.deleteDirectory(dir);
|
||||||
|
} catch (IOException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public File getRawDataDir() {
|
||||||
|
File dir2 = new File(dir, "raw-data");
|
||||||
|
dir2.mkdirs();
|
||||||
|
return dir2;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(JSONWriter writer, Properties options)
|
||||||
|
throws JSONException {
|
||||||
|
writer.object();
|
||||||
|
writer.key("config"); writer.value(config);
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
}
|
257
main/src/com/google/refine/importing/ImportingManager.java
Normal file
257
main/src/com/google/refine/importing/ImportingManager.java
Normal file
@ -0,0 +1,257 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.importing;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
|
import com.google.refine.RefineServlet;
|
||||||
|
|
||||||
|
import edu.mit.simile.butterfly.ButterflyModule;
|
||||||
|
|
||||||
|
public class ImportingManager {
|
||||||
|
static public class Format {
|
||||||
|
final public String id;
|
||||||
|
final public String label;
|
||||||
|
final public boolean download;
|
||||||
|
final public String uiClass;
|
||||||
|
final public ImportingParser parser;
|
||||||
|
|
||||||
|
private Format(
|
||||||
|
String id,
|
||||||
|
String label,
|
||||||
|
boolean download,
|
||||||
|
String uiClass,
|
||||||
|
ImportingParser parser
|
||||||
|
) {
|
||||||
|
this.id = id;
|
||||||
|
this.label = label;
|
||||||
|
this.download = download;
|
||||||
|
this.uiClass = uiClass;
|
||||||
|
this.parser = parser;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static private RefineServlet servlet;
|
||||||
|
static private File importDir;
|
||||||
|
final static private Map<Long, ImportingJob> jobs = new HashMap<Long, ImportingJob>();
|
||||||
|
|
||||||
|
// Mapping from format to label, e.g., "text" to "Text files", "text/xml" to "XML files"
|
||||||
|
final static public Map<String, Format> formatToRecord = new HashMap<String, Format>();
|
||||||
|
|
||||||
|
// Mapping from format to guessers
|
||||||
|
final static public Map<String, List<FormatGuesser>> formatToGuessers = new HashMap<String, List<FormatGuesser>>();
|
||||||
|
|
||||||
|
// Mapping from file extension to format, e.g., ".xml" to "text/xml"
|
||||||
|
final static public Map<String, String> extensionToFormat = new HashMap<String, String>();
|
||||||
|
|
||||||
|
// Mapping from mime type to format, e.g., "application/json" to "text/json"
|
||||||
|
final static public Map<String, String> mimeTypeToFormat = new HashMap<String, String>();
|
||||||
|
|
||||||
|
// URL rewriters
|
||||||
|
final static public Set<UrlRewriter> urlRewriters = new HashSet<UrlRewriter>();
|
||||||
|
|
||||||
|
// Mapping from controller name to controller
|
||||||
|
final static public Map<String, ImportingController> controllers = new HashMap<String, ImportingController>();
|
||||||
|
|
||||||
|
static public void initialize(RefineServlet servlet) {
|
||||||
|
ImportingManager.servlet = servlet;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void registerFormat(String format, String label) {
|
||||||
|
registerFormat(format, label, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void registerFormat(String format, String label, String uiClass, ImportingParser parser) {
|
||||||
|
formatToRecord.put(format, new Format(format, label, true, uiClass, parser));
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void registerFormat(
|
||||||
|
String format, String label, boolean download, String uiClass, ImportingParser parser) {
|
||||||
|
formatToRecord.put(format, new Format(format, label, download, uiClass, parser));
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void registerFormatGuesser(String format, FormatGuesser guesser) {
|
||||||
|
List<FormatGuesser> guessers = formatToGuessers.get(format);
|
||||||
|
if (guessers == null) {
|
||||||
|
guessers = new LinkedList<FormatGuesser>();
|
||||||
|
formatToGuessers.put(format, guessers);
|
||||||
|
}
|
||||||
|
guessers.add(0, guesser); // prepend so that newer guessers take priority
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void registerExtension(String extension, String format) {
|
||||||
|
extensionToFormat.put(extension.startsWith(".") ? extension : ("." + extension), format);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void registerMimeType(String mimeType, String format) {
|
||||||
|
mimeTypeToFormat.put(mimeType, format);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void registerUrlRewriter(UrlRewriter urlRewriter) {
|
||||||
|
urlRewriters.add(urlRewriter);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void registerController(ButterflyModule module, String name, ImportingController controller) {
|
||||||
|
String key = module.getName() + "/" + name;
|
||||||
|
controllers.put(key, controller);
|
||||||
|
|
||||||
|
controller.init(servlet);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public File getImportDir() {
|
||||||
|
if (importDir == null) {
|
||||||
|
File tempDir = servlet.getTempDir();
|
||||||
|
importDir = tempDir == null ? new File(".import-temp") : new File(tempDir, "import");
|
||||||
|
|
||||||
|
if (importDir.exists()) {
|
||||||
|
try {
|
||||||
|
// start fresh
|
||||||
|
FileUtils.deleteDirectory(importDir);
|
||||||
|
} catch (IOException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
importDir.mkdirs();
|
||||||
|
}
|
||||||
|
return importDir;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public ImportingJob createJob() {
|
||||||
|
long id = System.currentTimeMillis() + (long) (Math.random() * 1000000);
|
||||||
|
File jobDir = new File(getImportDir(), Long.toString(id));
|
||||||
|
|
||||||
|
ImportingJob job = new ImportingJob(id, jobDir);
|
||||||
|
jobs.put(id, job);
|
||||||
|
|
||||||
|
return job;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public ImportingJob getJob(long id) {
|
||||||
|
return jobs.get(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void disposeJob(long id) {
|
||||||
|
ImportingJob job = getJob(id);
|
||||||
|
if (job != null) {
|
||||||
|
job.dispose();
|
||||||
|
jobs.remove(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void writeConfiguration(JSONWriter writer, Properties options) throws JSONException {
|
||||||
|
writer.object();
|
||||||
|
|
||||||
|
writer.key("formats");
|
||||||
|
writer.object();
|
||||||
|
for (String format : formatToRecord.keySet()) {
|
||||||
|
Format record = formatToRecord.get(format);
|
||||||
|
|
||||||
|
writer.key(format);
|
||||||
|
writer.object();
|
||||||
|
writer.key("id"); writer.value(record.id);
|
||||||
|
writer.key("label"); writer.value(record.label);
|
||||||
|
writer.key("download"); writer.value(record.download);
|
||||||
|
writer.key("uiClass"); writer.value(record.uiClass);
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
writer.endObject();
|
||||||
|
|
||||||
|
writer.key("mimeTypeToFormat");
|
||||||
|
writer.object();
|
||||||
|
for (String mimeType : mimeTypeToFormat.keySet()) {
|
||||||
|
writer.key(mimeType);
|
||||||
|
writer.value(mimeTypeToFormat.get(mimeType));
|
||||||
|
}
|
||||||
|
writer.endObject();
|
||||||
|
|
||||||
|
writer.key("extensionToFormat");
|
||||||
|
writer.object();
|
||||||
|
for (String extension : extensionToFormat.keySet()) {
|
||||||
|
writer.key(extension);
|
||||||
|
writer.value(extensionToFormat.get(extension));
|
||||||
|
}
|
||||||
|
writer.endObject();
|
||||||
|
|
||||||
|
writer.endObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
static public String getFormatFromFileName(String fileName) {
|
||||||
|
int start = 0;
|
||||||
|
while (true) {
|
||||||
|
int dot = fileName.indexOf('.', start);
|
||||||
|
if (dot < 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
String extension = fileName.substring(dot);
|
||||||
|
String format = extensionToFormat.get(extension);
|
||||||
|
if (format != null) {
|
||||||
|
return format;
|
||||||
|
} else {
|
||||||
|
start = dot + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public String getFormatFromMimeType(String mimeType) {
|
||||||
|
return mimeTypeToFormat.get(mimeType);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public String getFormat(String fileName, String mimeType) {
|
||||||
|
String fileNameFormat = getFormatFromFileName(fileName);
|
||||||
|
String mimeTypeFormat = mimeType == null ? null : getFormatFromMimeType(mimeType);
|
||||||
|
if (mimeTypeFormat == null) {
|
||||||
|
return fileNameFormat;
|
||||||
|
} else if (fileNameFormat == null) {
|
||||||
|
return mimeTypeFormat;
|
||||||
|
} else if (fileNameFormat.startsWith(mimeTypeFormat)) {
|
||||||
|
// file name-based format is more specific
|
||||||
|
return fileNameFormat;
|
||||||
|
} else {
|
||||||
|
return mimeTypeFormat;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
|
|
||||||
Copyright 2010, Google Inc.
|
Copyright 2011, Google Inc.
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
@ -31,33 +31,51 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.google.refine.importers;
|
package com.google.refine.importing;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.ProjectMetadata;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
|
||||||
/**
|
public interface ImportingParser {
|
||||||
* Interface for importers which take a Reader as input.
|
/**
|
||||||
|
* Create data sufficient for the parser UI on the client side to do its work.
|
||||||
|
* For example, an XML parser UI would need to know some sample elements so it
|
||||||
|
* can let the user pick which the path to the record elements.
|
||||||
|
*
|
||||||
|
* @param job
|
||||||
|
* @param fileRecords
|
||||||
|
* @param format
|
||||||
|
* @return JSONObject options
|
||||||
*/
|
*/
|
||||||
public interface ReaderImporter extends Importer {
|
public JSONObject createParserUIInitializationData(
|
||||||
|
ImportingJob job,
|
||||||
|
List<JSONObject> fileRecords,
|
||||||
|
String format
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read data from a input reader into project.
|
|
||||||
*
|
*
|
||||||
* @param reader
|
|
||||||
* reader to import data from. It is assumed to be positioned at
|
|
||||||
* the correct point and ready to go.
|
|
||||||
* @param project
|
* @param project
|
||||||
* project which will contain data
|
|
||||||
* @param metadata
|
* @param metadata
|
||||||
* metadata of new project
|
* @param fileRecords
|
||||||
* @param options
|
* @param format
|
||||||
* set of properties with import options
|
* @param limit maximum number of rows to create
|
||||||
* @throws ImportException
|
* @param options custom options put together by the UI corresponding to this parser,
|
||||||
|
* which the parser should understand
|
||||||
|
* @param exceptions
|
||||||
*/
|
*/
|
||||||
public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options)
|
public void parse(
|
||||||
throws ImportException;
|
Project project,
|
||||||
|
ProjectMetadata metadata,
|
||||||
|
ImportingJob job,
|
||||||
|
List<JSONObject> fileRecords,
|
||||||
|
String format,
|
||||||
|
int limit,
|
||||||
|
JSONObject options,
|
||||||
|
List<Exception> exceptions
|
||||||
|
);
|
||||||
}
|
}
|
895
main/src/com/google/refine/importing/ImportingUtilities.java
Normal file
895
main/src/com/google/refine/importing/ImportingUtilities.java
Normal file
@ -0,0 +1,895 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.google.refine.importing;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLConnection;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Properties;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
import java.util.zip.ZipEntry;
|
||||||
|
import java.util.zip.ZipInputStream;
|
||||||
|
|
||||||
|
import javax.servlet.ServletException;
|
||||||
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
|
import org.apache.commons.fileupload.FileItem;
|
||||||
|
import org.apache.commons.fileupload.FileUploadException;
|
||||||
|
import org.apache.commons.fileupload.ProgressListener;
|
||||||
|
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
||||||
|
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
||||||
|
import org.apache.commons.fileupload.util.Streams;
|
||||||
|
import org.apache.commons.io.FileCleaningTracker;
|
||||||
|
import org.apache.tools.bzip2.CBZip2InputStream;
|
||||||
|
import org.apache.tools.tar.TarEntry;
|
||||||
|
import org.apache.tools.tar.TarInputStream;
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectManager;
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.importing.ImportingManager.Format;
|
||||||
|
import com.google.refine.importing.UrlRewriter.Result;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
import com.ibm.icu.text.NumberFormat;
|
||||||
|
|
||||||
|
public class ImportingUtilities {
|
||||||
|
final static protected Logger logger = LoggerFactory.getLogger("importing-utilities");
|
||||||
|
|
||||||
|
static public interface Progress {
|
||||||
|
public void setProgress(String message, int percent);
|
||||||
|
public boolean isCanceled();
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void loadDataAndPrepareJob(
|
||||||
|
HttpServletRequest request,
|
||||||
|
HttpServletResponse response,
|
||||||
|
Properties parameters,
|
||||||
|
final ImportingJob job,
|
||||||
|
JSONObject config) throws IOException, ServletException {
|
||||||
|
|
||||||
|
JSONObject retrievalRecord = new JSONObject();
|
||||||
|
JSONUtilities.safePut(config, "retrievalRecord", retrievalRecord);
|
||||||
|
JSONUtilities.safePut(config, "state", "loading-raw-data");
|
||||||
|
|
||||||
|
final JSONObject progress = new JSONObject();
|
||||||
|
JSONUtilities.safePut(config, "progress", progress);
|
||||||
|
try {
|
||||||
|
ImportingUtilities.retrieveContentFromPostRequest(
|
||||||
|
request,
|
||||||
|
parameters,
|
||||||
|
job.getRawDataDir(),
|
||||||
|
retrievalRecord,
|
||||||
|
new Progress() {
|
||||||
|
@Override
|
||||||
|
public void setProgress(String message, int percent) {
|
||||||
|
if (message != null) {
|
||||||
|
JSONUtilities.safePut(progress, "message", message);
|
||||||
|
}
|
||||||
|
JSONUtilities.safePut(progress, "percent", percent);
|
||||||
|
}
|
||||||
|
public boolean isCanceled() {
|
||||||
|
return job.canceled;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
} catch (FileUploadException e) {
|
||||||
|
JSONUtilities.safePut(config, "state", "error");
|
||||||
|
JSONUtilities.safePut(config, "error", "Error uploading data");
|
||||||
|
|
||||||
|
throw new ServletException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONArray fileSelectionIndexes = new JSONArray();
|
||||||
|
JSONUtilities.safePut(config, "fileSelection", fileSelectionIndexes);
|
||||||
|
|
||||||
|
String bestFormat = ImportingUtilities.autoSelectFiles(job, retrievalRecord, fileSelectionIndexes);
|
||||||
|
bestFormat = ImportingUtilities.guessBetterFormat(job, bestFormat);
|
||||||
|
|
||||||
|
JSONArray rankedFormats = new JSONArray();
|
||||||
|
JSONUtilities.safePut(config, "rankedFormats", rankedFormats);
|
||||||
|
ImportingUtilities.rankFormats(job, bestFormat, rankedFormats);
|
||||||
|
|
||||||
|
JSONUtilities.safePut(config, "state", "ready");
|
||||||
|
JSONUtilities.safePut(config, "hasData", true);
|
||||||
|
config.remove("progress");
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void updateJobWithNewFileSelection(ImportingJob job, JSONArray fileSelectionArray) {
|
||||||
|
JSONUtilities.safePut(job.config, "fileSelection", fileSelectionArray);
|
||||||
|
|
||||||
|
String bestFormat = ImportingUtilities.getCommonFormatForSelectedFiles(job, fileSelectionArray);
|
||||||
|
bestFormat = ImportingUtilities.guessBetterFormat(job, bestFormat);
|
||||||
|
|
||||||
|
JSONArray rankedFormats = new JSONArray();
|
||||||
|
JSONUtilities.safePut(job.config, "rankedFormats", rankedFormats);
|
||||||
|
ImportingUtilities.rankFormats(job, bestFormat, rankedFormats);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void retrieveContentFromPostRequest(
|
||||||
|
HttpServletRequest request,
|
||||||
|
Properties parameters,
|
||||||
|
File rawDataDir,
|
||||||
|
JSONObject retrievalRecord,
|
||||||
|
final Progress progress
|
||||||
|
) throws FileUploadException, IOException {
|
||||||
|
JSONArray fileRecords = new JSONArray();
|
||||||
|
JSONUtilities.safePut(retrievalRecord, "files", fileRecords);
|
||||||
|
|
||||||
|
int clipboardCount = 0;
|
||||||
|
int uploadCount = 0;
|
||||||
|
int downloadCount = 0;
|
||||||
|
int archiveCount = 0;
|
||||||
|
|
||||||
|
// This tracks the total progress, which involves uploading data from the client
|
||||||
|
// as well as downloading data from URLs.
|
||||||
|
final SavingUpdate update = new SavingUpdate() {
|
||||||
|
@Override
|
||||||
|
public void savedMore() {
|
||||||
|
progress.setProgress(null, calculateProgressPercent(totalExpectedSize, totalRetrievedSize));
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public boolean isCanceled() {
|
||||||
|
return progress.isCanceled();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
DiskFileItemFactory fileItemFactory = new DiskFileItemFactory();
|
||||||
|
fileItemFactory.setFileCleaningTracker(new FileCleaningTracker());
|
||||||
|
|
||||||
|
ServletFileUpload upload = new ServletFileUpload(fileItemFactory);
|
||||||
|
upload.setProgressListener(new ProgressListener() {
|
||||||
|
boolean setContentLength = false;
|
||||||
|
long lastBytesRead = 0;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void update(long bytesRead, long contentLength, int itemCount) {
|
||||||
|
if (!setContentLength) {
|
||||||
|
// Only try to set the content length if we really know it.
|
||||||
|
if (contentLength >= 0) {
|
||||||
|
update.totalExpectedSize += contentLength;
|
||||||
|
setContentLength = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (setContentLength) {
|
||||||
|
update.totalRetrievedSize += (bytesRead - lastBytesRead);
|
||||||
|
lastBytesRead = bytesRead;
|
||||||
|
|
||||||
|
update.savedMore();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
progress.setProgress("Uploading data ...", -1);
|
||||||
|
for (Object obj : upload.parseRequest(request)) {
|
||||||
|
if (progress.isCanceled()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
FileItem fileItem = (FileItem) obj;
|
||||||
|
InputStream stream = fileItem.getInputStream();
|
||||||
|
|
||||||
|
String name = fileItem.getFieldName().toLowerCase();
|
||||||
|
if (fileItem.isFormField()) {
|
||||||
|
if (name.equals("clipboard")) {
|
||||||
|
File file = allocateFile(rawDataDir, "clipboard.txt");
|
||||||
|
|
||||||
|
JSONObject fileRecord = new JSONObject();
|
||||||
|
JSONUtilities.safePut(fileRecord, "origin", "clipboard");
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredEncoding", request.getCharacterEncoding());
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredMimeType", (String) null);
|
||||||
|
JSONUtilities.safePut(fileRecord, "format", "text");
|
||||||
|
JSONUtilities.safePut(fileRecord, "fileName", "(clipboard)");
|
||||||
|
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
|
||||||
|
|
||||||
|
progress.setProgress("Uploading pasted clipboard text",
|
||||||
|
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||||
|
|
||||||
|
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
|
||||||
|
|
||||||
|
clipboardCount++;
|
||||||
|
|
||||||
|
JSONUtilities.append(fileRecords, fileRecord);
|
||||||
|
} else if (name.equals("download")) {
|
||||||
|
String urlString = Streams.asString(stream);
|
||||||
|
URL url = new URL(urlString);
|
||||||
|
|
||||||
|
JSONObject fileRecord = new JSONObject();
|
||||||
|
JSONUtilities.safePut(fileRecord, "origin", "download");
|
||||||
|
JSONUtilities.safePut(fileRecord, "url", urlString);
|
||||||
|
|
||||||
|
for (UrlRewriter rewriter : ImportingManager.urlRewriters) {
|
||||||
|
Result result = rewriter.rewrite(urlString);
|
||||||
|
if (result != null) {
|
||||||
|
urlString = result.rewrittenUrl;
|
||||||
|
url = new URL(urlString);
|
||||||
|
|
||||||
|
JSONUtilities.safePut(fileRecord, "url", urlString);
|
||||||
|
JSONUtilities.safePut(fileRecord, "format", result.format);
|
||||||
|
if (!result.download) {
|
||||||
|
downloadCount++;
|
||||||
|
JSONUtilities.append(fileRecords, fileRecord);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
URLConnection urlConnection = url.openConnection();
|
||||||
|
InputStream stream2 = urlConnection.getInputStream();
|
||||||
|
try {
|
||||||
|
String fileName = url.getFile();
|
||||||
|
File file = allocateFile(rawDataDir, fileName);
|
||||||
|
|
||||||
|
int contentLength = urlConnection.getContentLength();
|
||||||
|
if (contentLength >= 0) {
|
||||||
|
update.totalExpectedSize += contentLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredEncoding", urlConnection.getContentEncoding());
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredMimeType", urlConnection.getContentType());
|
||||||
|
JSONUtilities.safePut(fileRecord, "fileName", fileName);
|
||||||
|
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
|
||||||
|
|
||||||
|
progress.setProgress("Downloading " + urlString,
|
||||||
|
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||||
|
|
||||||
|
long actualLength = saveStreamToFile(stream, file, update);
|
||||||
|
JSONUtilities.safePut(fileRecord, "size", actualLength);
|
||||||
|
if (contentLength >= 0) {
|
||||||
|
update.totalExpectedSize += (actualLength - contentLength);
|
||||||
|
} else {
|
||||||
|
update.totalExpectedSize += actualLength;
|
||||||
|
}
|
||||||
|
progress.setProgress("Saving " + urlString + " locally",
|
||||||
|
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||||
|
|
||||||
|
if (postProcessRetrievedFile(file, fileRecord, fileRecords, progress)) {
|
||||||
|
archiveCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
downloadCount++;
|
||||||
|
} finally {
|
||||||
|
stream2.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else { // is file content
|
||||||
|
String fileName = fileItem.getName();
|
||||||
|
if (fileName.length() > 0) {
|
||||||
|
long fileSize = fileItem.getSize();
|
||||||
|
|
||||||
|
File file = allocateFile(rawDataDir, fileName);
|
||||||
|
|
||||||
|
JSONObject fileRecord = new JSONObject();
|
||||||
|
JSONUtilities.safePut(fileRecord, "origin", "upload");
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredEncoding", request.getCharacterEncoding());
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredMimeType", fileItem.getContentType());
|
||||||
|
JSONUtilities.safePut(fileRecord, "fileName", fileName);
|
||||||
|
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
|
||||||
|
|
||||||
|
progress.setProgress(
|
||||||
|
"Saving file " + fileName + " locally (" + formatBytes(fileSize) + " bytes)",
|
||||||
|
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||||
|
|
||||||
|
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
|
||||||
|
if (postProcessRetrievedFile(file, fileRecord, fileRecords, progress)) {
|
||||||
|
archiveCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
uploadCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONUtilities.safePut(retrievalRecord, "uploadCount", uploadCount);
|
||||||
|
JSONUtilities.safePut(retrievalRecord, "downloadCount", downloadCount);
|
||||||
|
JSONUtilities.safePut(retrievalRecord, "clipboardCount", clipboardCount);
|
||||||
|
JSONUtilities.safePut(retrievalRecord, "archiveCount", archiveCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public String getRelativePath(File file, File dir) {
|
||||||
|
String location = file.getAbsolutePath().substring(dir.getAbsolutePath().length());
|
||||||
|
return (location.startsWith(File.separator)) ? location.substring(1) : location;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public File allocateFile(File dir, String name) {
|
||||||
|
File file = new File(dir, name);
|
||||||
|
|
||||||
|
int dot = name.indexOf('.');
|
||||||
|
String prefix = dot < 0 ? name : name.substring(0, dot);
|
||||||
|
String suffix = dot < 0 ? "" : name.substring(dot);
|
||||||
|
int index = 2;
|
||||||
|
while (file.exists()) {
|
||||||
|
file = new File(dir, prefix + "-" + index++ + suffix);
|
||||||
|
}
|
||||||
|
|
||||||
|
file.getParentFile().mkdirs();
|
||||||
|
|
||||||
|
return file;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public Reader getFileReader(ImportingJob job, JSONObject fileRecord)
|
||||||
|
throws FileNotFoundException {
|
||||||
|
|
||||||
|
return getFileReader(getFile(job, JSONUtilities.getString(fileRecord, "location", "")), fileRecord);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public Reader getFileReader(File file, JSONObject fileRecord) throws FileNotFoundException {
|
||||||
|
return getReaderFromStream(new FileInputStream(file), fileRecord);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public Reader getReaderFromStream(InputStream inputStream, JSONObject fileRecord) {
|
||||||
|
String encoding = getEncoding(fileRecord);
|
||||||
|
if (encoding != null) {
|
||||||
|
try {
|
||||||
|
return new InputStreamReader(inputStream, encoding);
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
// Ignore and fall through
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new InputStreamReader(inputStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public File getFile(ImportingJob job, JSONObject fileRecord) {
|
||||||
|
return getFile(job, JSONUtilities.getString(fileRecord, "location", ""));
|
||||||
|
}
|
||||||
|
|
||||||
|
static public File getFile(ImportingJob job, String location) {
|
||||||
|
return new File(job.getRawDataDir(), location);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public String getFileSource(JSONObject fileRecord) {
|
||||||
|
return JSONUtilities.getString(
|
||||||
|
fileRecord,
|
||||||
|
"url",
|
||||||
|
JSONUtilities.getString(fileRecord, "fileName", "unknown")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
static private abstract class SavingUpdate {
|
||||||
|
public long totalExpectedSize = 0;
|
||||||
|
public long totalRetrievedSize = 0;
|
||||||
|
|
||||||
|
abstract public void savedMore();
|
||||||
|
abstract public boolean isCanceled();
|
||||||
|
}
|
||||||
|
static public long saveStreamToFile(InputStream stream, File file, SavingUpdate update) throws IOException {
|
||||||
|
long length = 0;
|
||||||
|
FileOutputStream fos = new FileOutputStream(file);
|
||||||
|
try {
|
||||||
|
byte[] bytes = new byte[4096];
|
||||||
|
int c;
|
||||||
|
while ((update == null || !update.isCanceled()) && (c = stream.read(bytes)) > 0) {
|
||||||
|
fos.write(bytes, 0, c);
|
||||||
|
length += c;
|
||||||
|
|
||||||
|
if (update != null) {
|
||||||
|
update.totalRetrievedSize += c;
|
||||||
|
update.savedMore();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
} finally {
|
||||||
|
fos.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public boolean postProcessRetrievedFile(
|
||||||
|
File file, JSONObject fileRecord, JSONArray fileRecords, final Progress progress) {
|
||||||
|
|
||||||
|
String mimeType = JSONUtilities.getString(fileRecord, "declaredMimeType", null);
|
||||||
|
File rawDataDir = file.getParentFile();
|
||||||
|
|
||||||
|
InputStream archiveIS = tryOpenAsArchive(file, mimeType);
|
||||||
|
if (archiveIS != null) {
|
||||||
|
try {
|
||||||
|
if (explodeArchive(rawDataDir, archiveIS, fileRecord, fileRecords, progress)) {
|
||||||
|
file.delete();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
archiveIS.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// TODO: what to do?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
InputStream uncompressedIS = tryOpenAsCompressedFile(file, mimeType);
|
||||||
|
if (uncompressedIS != null) {
|
||||||
|
try {
|
||||||
|
File file2 = uncompressFile(rawDataDir, uncompressedIS, fileRecord, progress);
|
||||||
|
|
||||||
|
file.delete();
|
||||||
|
file = file2;
|
||||||
|
} catch (IOException e) {
|
||||||
|
// TODO: what to do?
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
archiveIS.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// TODO: what to do?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
postProcessSingleRetrievedFile(file, fileRecord);
|
||||||
|
JSONUtilities.append(fileRecords, fileRecord);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void postProcessSingleRetrievedFile(File file, JSONObject fileRecord) {
|
||||||
|
if (!fileRecord.has("format")) {
|
||||||
|
JSONUtilities.safePut(fileRecord, "format",
|
||||||
|
ImportingManager.getFormat(
|
||||||
|
file.getName(),
|
||||||
|
JSONUtilities.getString(fileRecord, "declaredMimeType", null)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public InputStream tryOpenAsArchive(File file, String mimeType) {
|
||||||
|
String fileName = file.getName();
|
||||||
|
try {
|
||||||
|
if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
|
||||||
|
return new TarInputStream(new GZIPInputStream(new FileInputStream(file)));
|
||||||
|
} else if (fileName.endsWith(".tar.bz2")) {
|
||||||
|
return new TarInputStream(new CBZip2InputStream(new FileInputStream(file)));
|
||||||
|
} else if (fileName.endsWith(".tar")) {
|
||||||
|
return new TarInputStream(new FileInputStream(file));
|
||||||
|
} else if (fileName.endsWith(".zip")) {
|
||||||
|
return new ZipInputStream(new FileInputStream(file));
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public boolean explodeArchive(
|
||||||
|
File rawDataDir,
|
||||||
|
InputStream archiveIS,
|
||||||
|
JSONObject archiveFileRecord,
|
||||||
|
JSONArray fileRecords,
|
||||||
|
final Progress progress
|
||||||
|
) {
|
||||||
|
if (archiveIS instanceof TarInputStream) {
|
||||||
|
TarInputStream tis = (TarInputStream) archiveIS;
|
||||||
|
try {
|
||||||
|
TarEntry te;
|
||||||
|
while (!progress.isCanceled() && (te = tis.getNextEntry()) != null) {
|
||||||
|
if (!te.isDirectory()) {
|
||||||
|
String fileName2 = te.getName();
|
||||||
|
File file2 = allocateFile(rawDataDir, fileName2);
|
||||||
|
|
||||||
|
progress.setProgress("Extracting " + fileName2, -1);
|
||||||
|
|
||||||
|
JSONObject fileRecord2 = new JSONObject();
|
||||||
|
JSONUtilities.safePut(fileRecord2, "origin", JSONUtilities.getString(archiveFileRecord, "origin", null));
|
||||||
|
JSONUtilities.safePut(fileRecord2, "declaredEncoding", (String) null);
|
||||||
|
JSONUtilities.safePut(fileRecord2, "declaredMimeType", (String) null);
|
||||||
|
JSONUtilities.safePut(fileRecord2, "fileName", fileName2);
|
||||||
|
JSONUtilities.safePut(fileRecord2, "archiveFileName", JSONUtilities.getString(archiveFileRecord, "fileName", null));
|
||||||
|
JSONUtilities.safePut(fileRecord2, "location", getRelativePath(file2, rawDataDir));
|
||||||
|
|
||||||
|
JSONUtilities.safePut(fileRecord2, "size", saveStreamToFile(tis, file2, null));
|
||||||
|
postProcessSingleRetrievedFile(file2, fileRecord2);
|
||||||
|
|
||||||
|
JSONUtilities.append(fileRecords, fileRecord2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
// TODO: what to do?
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else if (archiveIS instanceof ZipInputStream) {
|
||||||
|
ZipInputStream zis = (ZipInputStream) archiveIS;
|
||||||
|
try {
|
||||||
|
ZipEntry ze;
|
||||||
|
while (!progress.isCanceled() && (ze = zis.getNextEntry()) != null) {
|
||||||
|
if (!ze.isDirectory()) {
|
||||||
|
String fileName2 = ze.getName();
|
||||||
|
File file2 = allocateFile(rawDataDir, fileName2);
|
||||||
|
|
||||||
|
progress.setProgress("Extracting " + fileName2, -1);
|
||||||
|
|
||||||
|
JSONObject fileRecord2 = new JSONObject();
|
||||||
|
JSONUtilities.safePut(fileRecord2, "origin", JSONUtilities.getString(archiveFileRecord, "origin", null));
|
||||||
|
JSONUtilities.safePut(fileRecord2, "declaredEncoding", (String) null);
|
||||||
|
JSONUtilities.safePut(fileRecord2, "declaredMimeType", (String) null);
|
||||||
|
JSONUtilities.safePut(fileRecord2, "fileName", fileName2);
|
||||||
|
JSONUtilities.safePut(fileRecord2, "archiveFileName", JSONUtilities.getString(archiveFileRecord, "fileName", null));
|
||||||
|
JSONUtilities.safePut(fileRecord2, "location", getRelativePath(file2, rawDataDir));
|
||||||
|
|
||||||
|
JSONUtilities.safePut(fileRecord2, "size", saveStreamToFile(zis, file2, null));
|
||||||
|
postProcessSingleRetrievedFile(file2, fileRecord2);
|
||||||
|
|
||||||
|
JSONUtilities.append(fileRecords, fileRecord2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
// TODO: what to do?
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public InputStream tryOpenAsCompressedFile(File file, String mimeType) {
|
||||||
|
String fileName = file.getName();
|
||||||
|
try {
|
||||||
|
if (fileName.endsWith(".gz")) {
|
||||||
|
return new GZIPInputStream(new FileInputStream(file));
|
||||||
|
} else if (fileName.endsWith(".bz2")) {
|
||||||
|
return new CBZip2InputStream(new FileInputStream(file));
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public File uncompressFile(
|
||||||
|
File rawDataDir,
|
||||||
|
InputStream uncompressedIS,
|
||||||
|
JSONObject fileRecord,
|
||||||
|
final Progress progress
|
||||||
|
) throws IOException {
|
||||||
|
String fileName = JSONUtilities.getString(fileRecord, "fileName", "unknown");
|
||||||
|
File file2 = allocateFile(rawDataDir, fileName);
|
||||||
|
|
||||||
|
progress.setProgress("Uncompressing " + fileName, -1);
|
||||||
|
|
||||||
|
saveStreamToFile(uncompressedIS, file2, null);
|
||||||
|
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredEncoding", (String) null);
|
||||||
|
JSONUtilities.safePut(fileRecord, "declaredMimeType", (String) null);
|
||||||
|
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file2, rawDataDir));
|
||||||
|
|
||||||
|
return file2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static private int calculateProgressPercent(long totalExpectedSize, long totalRetrievedSize) {
|
||||||
|
return totalExpectedSize == 0 ? -1 : (int) (totalRetrievedSize * 100 / totalExpectedSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
static private String formatBytes(long bytes) {
|
||||||
|
return NumberFormat.getIntegerInstance().format(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public String getEncoding(JSONObject fileRecord) {
|
||||||
|
String encoding = JSONUtilities.getString(fileRecord, "encoding", null);
|
||||||
|
if (encoding == null) {
|
||||||
|
encoding = JSONUtilities.getString(fileRecord, "declaredEncoding", null);
|
||||||
|
}
|
||||||
|
return encoding;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public String autoSelectFiles(ImportingJob job, JSONObject retrievalRecord, JSONArray fileSelectionIndexes) {
|
||||||
|
final Map<String, Integer> formatToCount = new HashMap<String, Integer>();
|
||||||
|
List<String> formats = new ArrayList<String>();
|
||||||
|
|
||||||
|
JSONArray fileRecords = JSONUtilities.getArray(retrievalRecord, "files");
|
||||||
|
int count = fileRecords.length();
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
JSONObject fileRecord = JSONUtilities.getObjectElement(fileRecords, i);
|
||||||
|
String format = JSONUtilities.getString(fileRecord, "format", null);
|
||||||
|
if (format != null) {
|
||||||
|
if (formatToCount.containsKey(format)) {
|
||||||
|
formatToCount.put(format, formatToCount.get(format) + 1);
|
||||||
|
} else {
|
||||||
|
formatToCount.put(format, 1);
|
||||||
|
formats.add(format);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Collections.sort(formats, new Comparator<String>() {
|
||||||
|
@Override
|
||||||
|
public int compare(String o1, String o2) {
|
||||||
|
return formatToCount.get(o2) - formatToCount.get(o1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
String bestFormat = formats.size() > 0 ? formats.get(0) : null;
|
||||||
|
if (JSONUtilities.getInt(retrievalRecord, "archiveCount", 0) == 0) {
|
||||||
|
// If there's no archive, then select everything
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
JSONUtilities.append(fileSelectionIndexes, i);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Otherwise, select files matching the best format
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
JSONObject fileRecord = JSONUtilities.getObjectElement(fileRecords, i);
|
||||||
|
String format = JSONUtilities.getString(fileRecord, "format", null);
|
||||||
|
if (format != null && format.equals(bestFormat)) {
|
||||||
|
JSONUtilities.append(fileSelectionIndexes, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bestFormat;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public String getCommonFormatForSelectedFiles(ImportingJob job, JSONArray fileSelectionIndexes) {
|
||||||
|
JSONObject retrievalRecord = JSONUtilities.getObject(job.config, "retrievalRecord");
|
||||||
|
|
||||||
|
final Map<String, Integer> formatToCount = new HashMap<String, Integer>();
|
||||||
|
List<String> formats = new ArrayList<String>();
|
||||||
|
|
||||||
|
JSONArray fileRecords = JSONUtilities.getArray(retrievalRecord, "files");
|
||||||
|
int count = fileSelectionIndexes.length();
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
int index = JSONUtilities.getIntElement(fileSelectionIndexes, i, -1);
|
||||||
|
if (index >= 0 && index < fileRecords.length()) {
|
||||||
|
JSONObject fileRecord = JSONUtilities.getObjectElement(fileRecords, index);
|
||||||
|
String format = JSONUtilities.getString(fileRecord, "format", null);
|
||||||
|
if (format != null) {
|
||||||
|
if (formatToCount.containsKey(format)) {
|
||||||
|
formatToCount.put(format, formatToCount.get(format) + 1);
|
||||||
|
} else {
|
||||||
|
formatToCount.put(format, 1);
|
||||||
|
formats.add(format);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Collections.sort(formats, new Comparator<String>() {
|
||||||
|
@Override
|
||||||
|
public int compare(String o1, String o2) {
|
||||||
|
return formatToCount.get(o2) - formatToCount.get(o1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return formats.size() > 0 ? formats.get(0) : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String guessBetterFormat(ImportingJob job, String bestFormat) {
|
||||||
|
JSONObject retrievalRecord = JSONUtilities.getObject(job.config, "retrievalRecord");
|
||||||
|
return retrievalRecord != null ? guessBetterFormat(job, retrievalRecord, bestFormat) : bestFormat;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String guessBetterFormat(ImportingJob job, JSONObject retrievalRecord, String bestFormat) {
|
||||||
|
JSONArray fileRecords = JSONUtilities.getArray(retrievalRecord, "files");
|
||||||
|
return fileRecords != null ? guessBetterFormat(job, fileRecords, bestFormat) : bestFormat;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String guessBetterFormat(ImportingJob job, JSONArray fileRecords, String bestFormat) {
|
||||||
|
if (bestFormat != null && fileRecords != null && fileRecords.length() > 0) {
|
||||||
|
JSONObject firstFileRecord = JSONUtilities.getObjectElement(fileRecords, 0);
|
||||||
|
String encoding = getEncoding(firstFileRecord);
|
||||||
|
String location = JSONUtilities.getString(firstFileRecord, "location", null);
|
||||||
|
|
||||||
|
if (location != null) {
|
||||||
|
File file = new File(job.getRawDataDir(), location);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
String betterFormat = null;
|
||||||
|
|
||||||
|
List<FormatGuesser> guessers = ImportingManager.formatToGuessers.get(bestFormat);
|
||||||
|
if (guessers != null) {
|
||||||
|
for (FormatGuesser guesser : guessers) {
|
||||||
|
betterFormat = guesser.guess(file, encoding, bestFormat);
|
||||||
|
if (betterFormat != null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (betterFormat != null && !betterFormat.equals(bestFormat)) {
|
||||||
|
bestFormat = betterFormat;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bestFormat;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void rankFormats(ImportingJob job, final String bestFormat, JSONArray rankedFormats) {
|
||||||
|
final Map<String, String[]> formatToSegments = new HashMap<String, String[]>();
|
||||||
|
|
||||||
|
boolean download = bestFormat == null ? true :
|
||||||
|
ImportingManager.formatToRecord.get(bestFormat).download;
|
||||||
|
|
||||||
|
List<String> formats = new ArrayList<String>(ImportingManager.formatToRecord.keySet().size());
|
||||||
|
for (String format : ImportingManager.formatToRecord.keySet()) {
|
||||||
|
Format record = ImportingManager.formatToRecord.get(format);
|
||||||
|
if (record.uiClass != null && record.parser != null && record.download == download) {
|
||||||
|
formats.add(format);
|
||||||
|
formatToSegments.put(format, format.split("/"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bestFormat == null) {
|
||||||
|
Collections.sort(formats);
|
||||||
|
} else {
|
||||||
|
Collections.sort(formats, new Comparator<String>() {
|
||||||
|
@Override
|
||||||
|
public int compare(String format1, String format2) {
|
||||||
|
if (format1.equals(bestFormat)) {
|
||||||
|
return -1;
|
||||||
|
} else if (format2.equals(bestFormat)) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return compareBySegments(format1, format2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int compareBySegments(String format1, String format2) {
|
||||||
|
int c = commonSegments(format2) - commonSegments(format1);
|
||||||
|
return c != 0 ? c : format1.compareTo(format2);
|
||||||
|
}
|
||||||
|
|
||||||
|
int commonSegments(String format) {
|
||||||
|
String[] bestSegments = formatToSegments.get(bestFormat);
|
||||||
|
String[] segments = formatToSegments.get(format);
|
||||||
|
if (bestSegments == null || segments == null) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < bestSegments.length && i < segments.length; i++) {
|
||||||
|
if (!bestSegments[i].equals(segments[i])) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String format : formats) {
|
||||||
|
JSONUtilities.append(rankedFormats, format);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public List<JSONObject> getSelectedFileRecords(ImportingJob job) {
|
||||||
|
List<JSONObject> results = new ArrayList<JSONObject>();
|
||||||
|
|
||||||
|
JSONObject retrievalRecord = JSONUtilities.getObject(job.config, "retrievalRecord");
|
||||||
|
if (retrievalRecord != null) {
|
||||||
|
JSONArray fileRecordArray = JSONUtilities.getArray(retrievalRecord, "files");
|
||||||
|
if (fileRecordArray != null) {
|
||||||
|
JSONArray fileSelectionArray = JSONUtilities.getArray(job.config, "fileSelection");
|
||||||
|
if (fileSelectionArray != null) {
|
||||||
|
for (int i = 0; i < fileSelectionArray.length(); i++) {
|
||||||
|
int index = JSONUtilities.getIntElement(fileSelectionArray, i, -1);
|
||||||
|
if (index >= 0 && index < fileRecordArray.length()) {
|
||||||
|
results.add(JSONUtilities.getObjectElement(fileRecordArray, index));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void previewParse(ImportingJob job, String format, JSONObject optionObj, List<Exception> exceptions) {
|
||||||
|
Format record = ImportingManager.formatToRecord.get(format);
|
||||||
|
if (record == null || record.parser == null) {
|
||||||
|
// TODO: what to do?
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
job.prepareNewProject();
|
||||||
|
|
||||||
|
record.parser.parse(
|
||||||
|
job.project,
|
||||||
|
job.metadata,
|
||||||
|
job,
|
||||||
|
getSelectedFileRecords(job),
|
||||||
|
format,
|
||||||
|
100,
|
||||||
|
optionObj,
|
||||||
|
exceptions
|
||||||
|
);
|
||||||
|
|
||||||
|
job.project.update(); // update all internal models, indexes, caches, etc.
|
||||||
|
}
|
||||||
|
|
||||||
|
static public long createProject(
|
||||||
|
final ImportingJob job,
|
||||||
|
final String format,
|
||||||
|
final JSONObject optionObj,
|
||||||
|
final List<Exception> exceptions) {
|
||||||
|
final Format record = ImportingManager.formatToRecord.get(format);
|
||||||
|
if (record == null || record.parser == null) {
|
||||||
|
// TODO: what to do?
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
JSONUtilities.safePut(job.config, "state", "creating-project");
|
||||||
|
|
||||||
|
final Project project = new Project();
|
||||||
|
new Thread() {
|
||||||
|
public void run() {
|
||||||
|
ProjectMetadata pm = new ProjectMetadata();
|
||||||
|
pm.setName(JSONUtilities.getString(optionObj, "projectName", "Untitled"));
|
||||||
|
pm.setEncoding(JSONUtilities.getString(optionObj, "encoding", "UTF-8"));
|
||||||
|
|
||||||
|
record.parser.parse(
|
||||||
|
project,
|
||||||
|
pm,
|
||||||
|
job,
|
||||||
|
getSelectedFileRecords(job),
|
||||||
|
format,
|
||||||
|
-1,
|
||||||
|
optionObj,
|
||||||
|
exceptions
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!job.canceled) {
|
||||||
|
project.update(); // update all internal models, indexes, caches, etc.
|
||||||
|
|
||||||
|
ProjectManager.singleton.registerProject(project, pm);
|
||||||
|
|
||||||
|
JSONUtilities.safePut(job.config, "projectID", project.id);
|
||||||
|
JSONUtilities.safePut(job.config, "state", "created-project");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}.start();
|
||||||
|
|
||||||
|
return project.id;
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void setCreatingProjectProgress(ImportingJob job, String message, int percent) {
|
||||||
|
JSONObject progress = JSONUtilities.getObject(job.config, "progress");
|
||||||
|
if (progress == null) {
|
||||||
|
progress = new JSONObject();
|
||||||
|
JSONUtilities.safePut(job.config, "progress", progress);
|
||||||
|
}
|
||||||
|
JSONUtilities.safePut(progress, "message", message);
|
||||||
|
JSONUtilities.safePut(progress, "percent", percent);
|
||||||
|
}
|
||||||
|
}
|
@ -31,23 +31,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
var theImportJob = {};
|
package com.google.refine.importing;
|
||||||
var ui = {};
|
|
||||||
|
|
||||||
var Refine = {
|
public interface UrlRewriter {
|
||||||
};
|
static public class Result {
|
||||||
|
public String rewrittenUrl;
|
||||||
function resize() {
|
public String format;
|
||||||
var header = $("#header");
|
public boolean download;
|
||||||
|
}
|
||||||
var leftPanelWidth = 300;
|
|
||||||
var width = $(window).width();
|
|
||||||
var top = $("#header").outerHeight();
|
|
||||||
var height = $(window).height() - top;
|
|
||||||
|
|
||||||
|
public Result rewrite(String url);
|
||||||
}
|
}
|
||||||
|
|
||||||
function onLoad() {
|
|
||||||
$(window).bind("resize", resize);
|
|
||||||
}
|
|
||||||
$(onLoad);
|
|
@ -1,62 +0,0 @@
|
|||||||
package com.google.refine.model.meta;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import javax.servlet.http.HttpServletRequest;
|
|
||||||
|
|
||||||
import org.apache.commons.fileupload.FileItemIterator;
|
|
||||||
import org.apache.commons.fileupload.FileItemStream;
|
|
||||||
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
|
||||||
import org.json.JSONException;
|
|
||||||
import org.json.JSONObject;
|
|
||||||
import org.json.JSONWriter;
|
|
||||||
|
|
||||||
import com.google.refine.commands.importing.ImportJob;
|
|
||||||
|
|
||||||
public class FileUploadImportSource extends ImportSource {
|
|
||||||
public String originalFileName;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void customWrite(JSONWriter writer, Properties options)
|
|
||||||
throws JSONException {
|
|
||||||
writer.key("originalFileName"); writer.value(originalFileName);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void customReconstruct(JSONObject obj) throws JSONException {
|
|
||||||
if (obj.has("originalFileName")) {
|
|
||||||
originalFileName = obj.getString("originalFileName");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job) throws Exception {
|
|
||||||
ServletFileUpload upload = new ServletFileUpload();
|
|
||||||
FileItemIterator iter = upload.getItemIterator(request);
|
|
||||||
while (iter.hasNext()) {
|
|
||||||
FileItemStream item = iter.next();
|
|
||||||
if (!item.isFormField()) {
|
|
||||||
String fileName = item.getName();
|
|
||||||
if (fileName.length() > 0) {
|
|
||||||
InputStream stream = item.openStream();
|
|
||||||
try {
|
|
||||||
File file = new File(job.dir, "data");
|
|
||||||
|
|
||||||
this.accessTime = new Date();
|
|
||||||
this.contentType = item.getContentType();
|
|
||||||
this.encoding = request.getCharacterEncoding();
|
|
||||||
this.originalFileName = fileName;
|
|
||||||
this.size = saveStreamToFileOrDir(
|
|
||||||
item.openStream(), file, this.contentType, fileName, job, request.getContentLength());
|
|
||||||
this.isArchive = file.isDirectory();
|
|
||||||
} finally {
|
|
||||||
stream.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,5 +0,0 @@
|
|||||||
package com.google.refine.model.meta;
|
|
||||||
|
|
||||||
public class ImportConfig {
|
|
||||||
|
|
||||||
}
|
|
@ -1,167 +0,0 @@
|
|||||||
package com.google.refine.model.meta;
|
|
||||||
|
|
||||||
import java.io.BufferedOutputStream;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.zip.GZIPInputStream;
|
|
||||||
import java.util.zip.ZipEntry;
|
|
||||||
import java.util.zip.ZipInputStream;
|
|
||||||
|
|
||||||
import javax.servlet.http.HttpServletRequest;
|
|
||||||
|
|
||||||
import org.apache.tools.bzip2.CBZip2InputStream;
|
|
||||||
import org.apache.tools.tar.TarEntry;
|
|
||||||
import org.apache.tools.tar.TarInputStream;
|
|
||||||
import org.json.JSONException;
|
|
||||||
import org.json.JSONObject;
|
|
||||||
import org.json.JSONWriter;
|
|
||||||
|
|
||||||
import com.google.refine.Jsonizable;
|
|
||||||
import com.google.refine.commands.importing.ImportJob;
|
|
||||||
import com.google.refine.commands.importing.ImportManager;
|
|
||||||
import com.google.refine.util.ParsingUtilities;
|
|
||||||
|
|
||||||
abstract public class ImportSource implements Jsonizable {
|
|
||||||
public Date accessTime;
|
|
||||||
public long size;
|
|
||||||
public boolean isArchive = false;
|
|
||||||
|
|
||||||
public String contentType;
|
|
||||||
public String encoding;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void write(JSONWriter writer, Properties options)
|
|
||||||
throws JSONException {
|
|
||||||
writer.object();
|
|
||||||
writer.key("type"); writer.value(ImportManager.getImportSourceClassName(this.getClass()));
|
|
||||||
writer.key("accessTime"); writer.value(ParsingUtilities.dateToString(accessTime));
|
|
||||||
writer.key("size"); writer.value(size);
|
|
||||||
writer.key("isArchive"); writer.value(isArchive);
|
|
||||||
writer.key("contentType"); writer.value(contentType);
|
|
||||||
writer.key("encoding"); writer.value(encoding);
|
|
||||||
writer.endObject();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void reconstruct(JSONObject obj) throws JSONException {
|
|
||||||
if (obj.has("accessTime")) {
|
|
||||||
accessTime = ParsingUtilities.stringToDate(obj.getString("accessTime"));
|
|
||||||
}
|
|
||||||
if (obj.has("size")) {
|
|
||||||
size = obj.getLong("size");
|
|
||||||
}
|
|
||||||
if (obj.has("isArchive")) {
|
|
||||||
isArchive = obj.getBoolean("isArchive");
|
|
||||||
}
|
|
||||||
if (obj.has("contentType")) {
|
|
||||||
contentType = obj.getString("contentType");
|
|
||||||
}
|
|
||||||
if (obj.has("encoding")) {
|
|
||||||
encoding = obj.getString("encoding");
|
|
||||||
}
|
|
||||||
customReconstruct(obj);
|
|
||||||
}
|
|
||||||
|
|
||||||
abstract public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job)
|
|
||||||
throws Exception;
|
|
||||||
|
|
||||||
abstract protected void customWrite(JSONWriter writer, Properties options) throws JSONException;
|
|
||||||
abstract protected void customReconstruct(JSONObject obj) throws JSONException;
|
|
||||||
|
|
||||||
static protected long saveStreamToFileOrDir(
|
|
||||||
InputStream is,
|
|
||||||
File file,
|
|
||||||
String contentType,
|
|
||||||
String fileNameOrUrl,
|
|
||||||
ImportJob job,
|
|
||||||
long expectedSize
|
|
||||||
) throws IOException {
|
|
||||||
InputStream archiveIS = null;
|
|
||||||
if (fileNameOrUrl != null) {
|
|
||||||
try {
|
|
||||||
if (fileNameOrUrl.endsWith(".tar.gz") ||
|
|
||||||
fileNameOrUrl.endsWith(".tar.gz.gz") ||
|
|
||||||
fileNameOrUrl.endsWith(".tgz")) {
|
|
||||||
archiveIS = new TarInputStream(new GZIPInputStream(is));
|
|
||||||
} else if (fileNameOrUrl.endsWith(".tar.bz2")) {
|
|
||||||
archiveIS = new TarInputStream(new CBZip2InputStream(is));
|
|
||||||
} else if (fileNameOrUrl.endsWith(".tar")) {
|
|
||||||
archiveIS = new TarInputStream(is);
|
|
||||||
} else if (fileNameOrUrl.endsWith(".zip")) {
|
|
||||||
archiveIS = new ZipInputStream(is);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
archiveIS = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
job.bytesSaved = 0;
|
|
||||||
if (archiveIS == null) {
|
|
||||||
saveStreamToFile(is, file, job, true, expectedSize);
|
|
||||||
} else {
|
|
||||||
job.retrievingProgress = -1;
|
|
||||||
|
|
||||||
// NOTE(SM): unfortunately, java.io does not provide any generalized class for
|
|
||||||
// archive-like input streams so while both TarInputStream and ZipInputStream
|
|
||||||
// behave precisely the same, there is no polymorphic behavior so we have
|
|
||||||
// to treat each instance explicitly... one of those times you wish you had
|
|
||||||
// closures
|
|
||||||
|
|
||||||
if (archiveIS instanceof TarInputStream) {
|
|
||||||
TarInputStream tis = (TarInputStream) archiveIS;
|
|
||||||
TarEntry te;
|
|
||||||
while ((te = tis.getNextEntry()) != null) {
|
|
||||||
if (!te.isDirectory()) {
|
|
||||||
saveStreamToFile(tis, new File(file, te.getName()), job, false, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (archiveIS instanceof ZipInputStream) {
|
|
||||||
ZipInputStream zis = (ZipInputStream) archiveIS;
|
|
||||||
ZipEntry ze;
|
|
||||||
long compressedSize = 0;
|
|
||||||
while ((ze = zis.getNextEntry()) != null) {
|
|
||||||
if (!ze.isDirectory()) {
|
|
||||||
saveStreamToFile(zis, new File(file, ze.getName()), job, false, 0);
|
|
||||||
|
|
||||||
compressedSize += ze.getCompressedSize(); // this might be negative if not known
|
|
||||||
if (compressedSize > 0) {
|
|
||||||
job.retrievingProgress = (int) (compressedSize * 100 / expectedSize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return job.bytesSaved;
|
|
||||||
}
|
|
||||||
|
|
||||||
static private void saveStreamToFile(
|
|
||||||
InputStream is,
|
|
||||||
File file,
|
|
||||||
ImportJob job,
|
|
||||||
boolean updateProgress,
|
|
||||||
long expectedSize
|
|
||||||
) throws IOException {
|
|
||||||
byte data[] = new byte[4096];
|
|
||||||
|
|
||||||
file.getParentFile().mkdirs();
|
|
||||||
|
|
||||||
FileOutputStream fos = new FileOutputStream(file);
|
|
||||||
BufferedOutputStream bos = new BufferedOutputStream(fos, data.length);
|
|
||||||
|
|
||||||
int count;
|
|
||||||
while ((count = is.read(data, 0, data.length)) != -1) {
|
|
||||||
bos.write(data, 0, count);
|
|
||||||
|
|
||||||
job.bytesSaved += count;
|
|
||||||
if (updateProgress) {
|
|
||||||
job.retrievingProgress = (int) (job.bytesSaved * 100 / expectedSize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bos.flush();
|
|
||||||
bos.close();
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,28 +0,0 @@
|
|||||||
package com.google.refine.model.meta;
|
|
||||||
|
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import javax.servlet.http.HttpServletRequest;
|
|
||||||
|
|
||||||
import org.json.JSONException;
|
|
||||||
import org.json.JSONObject;
|
|
||||||
import org.json.JSONWriter;
|
|
||||||
|
|
||||||
import com.google.refine.commands.importing.ImportJob;
|
|
||||||
|
|
||||||
public class TextImportSource extends ImportSource {
|
|
||||||
@Override
|
|
||||||
protected void customWrite(JSONWriter writer, Properties options)
|
|
||||||
throws JSONException {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void customReconstruct(JSONObject obj) throws JSONException {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job) throws Exception {
|
|
||||||
// TODO Auto-generated method stub
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,34 +0,0 @@
|
|||||||
package com.google.refine.model.meta;
|
|
||||||
|
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import javax.servlet.http.HttpServletRequest;
|
|
||||||
|
|
||||||
import org.json.JSONException;
|
|
||||||
import org.json.JSONObject;
|
|
||||||
import org.json.JSONWriter;
|
|
||||||
|
|
||||||
import com.google.refine.commands.importing.ImportJob;
|
|
||||||
|
|
||||||
public class WebImportSource extends ImportSource {
|
|
||||||
public String url;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void customWrite(JSONWriter writer, Properties options)
|
|
||||||
throws JSONException {
|
|
||||||
writer.key("url"); writer.value(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void customReconstruct(JSONObject obj) throws JSONException {
|
|
||||||
if (obj.has("url")) {
|
|
||||||
url = obj.getString("url");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job) throws Exception {
|
|
||||||
// TODO Auto-generated method stub
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
@ -35,8 +35,10 @@ package com.google.refine.util;
|
|||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.json.JSONArray;
|
import org.json.JSONArray;
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
@ -44,6 +46,14 @@ import org.json.JSONObject;
|
|||||||
import org.json.JSONWriter;
|
import org.json.JSONWriter;
|
||||||
|
|
||||||
public class JSONUtilities {
|
public class JSONUtilities {
|
||||||
|
static public JSONObject getObject(JSONObject obj, String key) {
|
||||||
|
try {
|
||||||
|
return obj.getJSONObject(key);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static public String getString(JSONObject obj, String key, String def) {
|
static public String getString(JSONObject obj, String key, String def) {
|
||||||
try {
|
try {
|
||||||
return obj.getString(key);
|
return obj.getString(key);
|
||||||
@ -94,6 +104,14 @@ public class JSONUtilities {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public JSONArray getArray(JSONObject obj, String key) {
|
||||||
|
try {
|
||||||
|
return obj.getJSONArray(key);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static public int[] getIntArray(JSONObject obj, String key) {
|
static public int[] getIntArray(JSONObject obj, String key) {
|
||||||
try {
|
try {
|
||||||
JSONArray a = obj.getJSONArray(key);
|
JSONArray a = obj.getJSONArray(key);
|
||||||
@ -144,6 +162,14 @@ public class JSONUtilities {
|
|||||||
writer.endArray();
|
writer.endArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public void writeStringArray(JSONWriter writer, String[] strings) throws JSONException {
|
||||||
|
writer.array();
|
||||||
|
for (String s : strings) {
|
||||||
|
writer.value(s);
|
||||||
|
}
|
||||||
|
writer.endArray();
|
||||||
|
}
|
||||||
|
|
||||||
static public void putField(JSONObject obj, String key, Object value) throws JSONException {
|
static public void putField(JSONObject obj, String key, Object value) throws JSONException {
|
||||||
if (value instanceof Integer) {
|
if (value instanceof Integer) {
|
||||||
obj.put(key, ((Integer) value).intValue());
|
obj.put(key, ((Integer) value).intValue());
|
||||||
@ -164,6 +190,135 @@ public class JSONUtilities {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public JSONObject getObjectElement(JSONArray a, int i) {
|
||||||
|
try {
|
||||||
|
return a.getJSONObject(i);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public int getIntElement(JSONArray a, int i, int def) {
|
||||||
|
try {
|
||||||
|
return a.getInt(i);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
return def;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void append(JSONArray a, JSONObject element) {
|
||||||
|
try {
|
||||||
|
a.put(a.length(), element);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void append(JSONArray a, Object element) {
|
||||||
|
try {
|
||||||
|
a.put(a.length(), element);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void append(JSONArray a, int element) {
|
||||||
|
try {
|
||||||
|
a.put(a.length(), element);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void append(JSONArray a, long element) {
|
||||||
|
try {
|
||||||
|
a.put(a.length(), element);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void append(JSONArray a, double element) {
|
||||||
|
try {
|
||||||
|
a.put(a.length(), element);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void append(JSONArray a, boolean element) {
|
||||||
|
try {
|
||||||
|
a.put(a.length(), element);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void append(JSONArray a, String element) {
|
||||||
|
try {
|
||||||
|
a.put(a.length(), element);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void safePut(JSONObject obj, String key, int value) {
|
||||||
|
try {
|
||||||
|
obj.put(key, value);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void safePut(JSONObject obj, String key, long value) {
|
||||||
|
try {
|
||||||
|
obj.put(key, value);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void safePut(JSONObject obj, String key, double value) {
|
||||||
|
try {
|
||||||
|
obj.put(key, value);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void safePut(JSONObject obj, String key, boolean value) {
|
||||||
|
try {
|
||||||
|
obj.put(key, value);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void safePut(JSONObject obj, String key, String value) {
|
||||||
|
try {
|
||||||
|
obj.put(key, value);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void safePut(JSONObject obj, String key, Collection<?> value) {
|
||||||
|
try {
|
||||||
|
obj.put(key, value);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void safePut(JSONObject obj, String key, Map<?, ?> value) {
|
||||||
|
try {
|
||||||
|
obj.put(key, value);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void safePut(JSONObject obj, String key, Object value) {
|
||||||
|
try {
|
||||||
|
obj.put(key, value);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static public Object[] toArray(JSONArray a) throws JSONException {
|
static public Object[] toArray(JSONArray a) throws JSONException {
|
||||||
int l = a.length();
|
int l = a.length();
|
||||||
|
|
||||||
|
64
main/src/com/google/refine/util/TrackingInputStream.java
Normal file
64
main/src/com/google/refine/util/TrackingInputStream.java
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
package com.google.refine.util;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
public class TrackingInputStream extends InputStream {
|
||||||
|
final private InputStream is;
|
||||||
|
protected long bytesRead;
|
||||||
|
|
||||||
|
public TrackingInputStream(InputStream is) {
|
||||||
|
this.is = is;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getBytesRead() {
|
||||||
|
return bytesRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read() throws IOException {
|
||||||
|
return (int) track(is.read());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(byte[] b) throws IOException {
|
||||||
|
return (int) track(is.read(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(byte[] b, int off, int len) throws IOException {
|
||||||
|
return (int) track(is.read(b, off, len));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long skip(long n) throws IOException {
|
||||||
|
return track(is.skip(n));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void mark(int readlimit) {
|
||||||
|
is.mark(readlimit);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
is.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean markSupported() {
|
||||||
|
return is.markSupported();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
is.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected long track(long bytesRead) {
|
||||||
|
if (bytesRead > 0) {
|
||||||
|
this.bytesRead += bytesRead;
|
||||||
|
}
|
||||||
|
return bytesRead;
|
||||||
|
}
|
||||||
|
}
|
@ -33,6 +33,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.tests;
|
package com.google.refine.tests;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.times;
|
||||||
|
import static org.mockito.Mockito.verify;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONException;
|
||||||
|
import org.json.JSONObject;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeSuite;
|
import org.testng.annotations.BeforeSuite;
|
||||||
@ -41,6 +48,7 @@ import com.google.refine.model.Cell;
|
|||||||
import com.google.refine.model.Column;
|
import com.google.refine.model.Column;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
|
import com.google.refine.util.JSONUtilities;
|
||||||
|
|
||||||
public class RefineTest {
|
public class RefineTest {
|
||||||
|
|
||||||
@ -82,4 +90,41 @@ public class RefineTest {
|
|||||||
logger.info(sb.toString());
|
logger.info(sb.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//----helpers----
|
||||||
|
|
||||||
|
static public void whenGetBooleanOption(String name, JSONObject options, Boolean def){
|
||||||
|
when(options.has(name)).thenReturn(true);
|
||||||
|
when(JSONUtilities.getBoolean(options, name, def)).thenReturn(def);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void whenGetIntegerOption(String name, JSONObject options, int def){
|
||||||
|
when(options.has(name)).thenReturn(true);
|
||||||
|
when(JSONUtilities.getInt(options, name, def)).thenReturn(def);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void whenGetStringOption(String name, JSONObject options, String def){
|
||||||
|
when(options.has(name)).thenReturn(true);
|
||||||
|
when(JSONUtilities.getString(options, name, def)).thenReturn(def);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void whenGetObjectOption(String name, JSONObject options, JSONObject def){
|
||||||
|
when(options.has(name)).thenReturn(true);
|
||||||
|
when(JSONUtilities.getObject(options, name)).thenReturn(def);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void whenGetArrayOption(String name, JSONObject options, JSONArray def){
|
||||||
|
when(options.has(name)).thenReturn(true);
|
||||||
|
when(JSONUtilities.getArray(options, name)).thenReturn(def);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void verifyGetOption(String name, JSONObject options){
|
||||||
|
verify(options, times(1)).has(name);
|
||||||
|
try {
|
||||||
|
verify(options, times(1)).get(name);
|
||||||
|
} catch (JSONException e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,12 @@
|
|||||||
package com.google.refine.tests.importers;
|
package com.google.refine.tests.importers;
|
||||||
|
|
||||||
|
|
||||||
import static org.mockito.Mockito.mock;
|
|
||||||
import static org.mockito.Mockito.times;
|
import static org.mockito.Mockito.times;
|
||||||
import static org.mockito.Mockito.verify;
|
import static org.mockito.Mockito.verify;
|
||||||
import static org.mockito.Mockito.when;
|
|
||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
|
import org.json.JSONArray;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.AfterMethod;
|
import org.testng.annotations.AfterMethod;
|
||||||
@ -16,13 +14,10 @@ import org.testng.annotations.BeforeMethod;
|
|||||||
import org.testng.annotations.BeforeTest;
|
import org.testng.annotations.BeforeTest;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
|
||||||
import com.google.refine.importers.FixedWidthImporter;
|
import com.google.refine.importers.FixedWidthImporter;
|
||||||
import com.google.refine.importers.ImportException;
|
import com.google.refine.util.JSONUtilities;
|
||||||
import com.google.refine.model.Project;
|
|
||||||
import com.google.refine.tests.RefineTest;
|
|
||||||
|
|
||||||
public class FixedWidthImporterTests extends RefineTest {
|
public class FixedWidthImporterTests extends ImporterTest {
|
||||||
@BeforeTest
|
@BeforeTest
|
||||||
public void init() {
|
public void init() {
|
||||||
logger = LoggerFactory.getLogger(this.getClass());
|
logger = LoggerFactory.getLogger(this.getClass());
|
||||||
@ -30,45 +25,20 @@ public class FixedWidthImporterTests extends RefineTest {
|
|||||||
|
|
||||||
//constants
|
//constants
|
||||||
String SAMPLE_ROW = "NDB_NoShrt_DescWater";
|
String SAMPLE_ROW = "NDB_NoShrt_DescWater";
|
||||||
String SAMPLE_ROW_WIDTHS = "6,9,5";
|
|
||||||
|
|
||||||
//System Under Test
|
//System Under Test
|
||||||
FixedWidthImporter SUT = null;
|
FixedWidthImporter SUT = null;
|
||||||
|
|
||||||
//mock dependencies
|
|
||||||
Project project = null;
|
|
||||||
Properties properties = null;
|
|
||||||
|
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void SetUp(){
|
public void SetUp(){
|
||||||
|
super.SetUp();
|
||||||
SUT = new FixedWidthImporter();
|
SUT = new FixedWidthImporter();
|
||||||
project = new Project(); //FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
|
|
||||||
properties = mock(Properties.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterMethod
|
@AfterMethod
|
||||||
public void TearDown(){
|
public void TearDown(){
|
||||||
SUT = null;
|
SUT = null;
|
||||||
project = null;
|
super.TearDown();
|
||||||
properties = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
//TODO a lot of these tests are very similar to the TsvCsvImporterTests. It might be possible to overlap them
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void canParseSeparator(){
|
|
||||||
int[] i = null;
|
|
||||||
try {
|
|
||||||
i = SUT.getColumnWidthsFromString("1,2,3");
|
|
||||||
} catch (ImportException e) {
|
|
||||||
Assert.fail(e.getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
Assert.assertNotNull(i);
|
|
||||||
Assert.assertEquals(i[0], 1);
|
|
||||||
Assert.assertEquals(i[1], 2);
|
|
||||||
Assert.assertEquals(i[2], 3);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------read tests------------------------
|
//---------------------read tests------------------------
|
||||||
@ -76,19 +46,23 @@ public class FixedWidthImporterTests extends RefineTest {
|
|||||||
public void readFixedWidth(){
|
public void readFixedWidth(){
|
||||||
StringReader reader = new StringReader(SAMPLE_ROW + "\nTooShort");
|
StringReader reader = new StringReader(SAMPLE_ROW + "\nTooShort");
|
||||||
|
|
||||||
when(properties.getProperty("fixed-column-widths")).thenReturn(SAMPLE_ROW_WIDTHS);
|
JSONArray columnWidths = new JSONArray();
|
||||||
whenGetIntegerOption("ignore",properties,0);
|
JSONUtilities.append(columnWidths, 6);
|
||||||
whenGetIntegerOption("header-lines",properties,0);
|
JSONUtilities.append(columnWidths, 9);
|
||||||
whenGetIntegerOption("limit",properties,-1);
|
JSONUtilities.append(columnWidths, 5);
|
||||||
whenGetIntegerOption("skip",properties,0);
|
|
||||||
|
whenGetArrayOption("columnWidths", options, columnWidths);
|
||||||
|
whenGetIntegerOption("ignoreLines", options, 0);
|
||||||
|
whenGetIntegerOption("headerLines", options, 0);
|
||||||
|
whenGetIntegerOption("skipDataLines", options, 0);
|
||||||
|
whenGetIntegerOption("limit", options, -1);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.read(reader, project, new ProjectMetadata(), properties);
|
parseOneFile(SUT, reader);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Assert.fail(e.getMessage());
|
Assert.fail(e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Assert.assertEquals(project.rows.size(), 2);
|
Assert.assertEquals(project.rows.size(), 2);
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||||
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "NDB_No");
|
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "NDB_No");
|
||||||
@ -99,27 +73,10 @@ public class FixedWidthImporterTests extends RefineTest {
|
|||||||
Assert.assertEquals((String)project.rows.get(1).cells.get(1).value, "rt");
|
Assert.assertEquals((String)project.rows.get(1).cells.get(1).value, "rt");
|
||||||
Assert.assertNull(project.rows.get(1).cells.get(2));
|
Assert.assertNull(project.rows.get(1).cells.get(2));
|
||||||
|
|
||||||
verify(properties, times(1)).getProperty("fixed-column-widths");
|
JSONUtilities.getIntArray(verify(options, times(1)), "columnWidths");
|
||||||
verifyGetOption("ignore",properties);
|
verifyGetOption("ignore", options);
|
||||||
verifyGetOption("header-lines",properties);
|
verifyGetOption("header-lines", options);
|
||||||
verifyGetOption("limit",properties);
|
verifyGetOption("limit", options);
|
||||||
verifyGetOption("skip",properties);
|
verifyGetOption("skip", options);
|
||||||
}
|
|
||||||
|
|
||||||
//----helpers----
|
|
||||||
|
|
||||||
public void whenGetBooleanOption(String name, Properties properties, Boolean def){
|
|
||||||
when(properties.containsKey(name)).thenReturn(true);
|
|
||||||
when(properties.getProperty(name)).thenReturn(Boolean.toString(def));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void whenGetIntegerOption(String name, Properties properties, int def){
|
|
||||||
when(properties.containsKey(name)).thenReturn(true);
|
|
||||||
when(properties.getProperty(name)).thenReturn(Integer.toString(def));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void verifyGetOption(String name, Properties properties){
|
|
||||||
verify(properties, times(1)).containsKey(name);
|
|
||||||
verify(properties, times(1)).getProperty(name);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,109 @@
|
|||||||
|
package com.google.refine.tests.importers;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import com.google.refine.ProjectMetadata;
|
||||||
|
import com.google.refine.importers.ImportingParserBase;
|
||||||
|
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||||
|
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||||
|
import com.google.refine.importers.tree.XmlImportUtilities;
|
||||||
|
import com.google.refine.importing.ImportingJob;
|
||||||
|
import com.google.refine.importing.ImportingManager;
|
||||||
|
import com.google.refine.model.Project;
|
||||||
|
import com.google.refine.tests.RefineTest;
|
||||||
|
|
||||||
|
abstract class ImporterTest extends RefineTest {
|
||||||
|
//mock dependencies
|
||||||
|
protected Project project;
|
||||||
|
protected ProjectMetadata metadata;
|
||||||
|
protected ImportingJob job;
|
||||||
|
|
||||||
|
protected JSONObject options;
|
||||||
|
|
||||||
|
public void SetUp(){
|
||||||
|
//FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
|
||||||
|
project = new Project();
|
||||||
|
metadata = new ProjectMetadata();
|
||||||
|
job = ImportingManager.createJob();
|
||||||
|
|
||||||
|
options = mock(JSONObject.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void TearDown(){
|
||||||
|
project = null;
|
||||||
|
metadata = null;
|
||||||
|
|
||||||
|
ImportingManager.disposeJob(job.id);
|
||||||
|
job = null;
|
||||||
|
|
||||||
|
options = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void parseOneFile(ImportingParserBase parser, Reader reader) {
|
||||||
|
parser.parseOneFile(
|
||||||
|
project,
|
||||||
|
metadata,
|
||||||
|
job,
|
||||||
|
"file-source",
|
||||||
|
reader,
|
||||||
|
-1,
|
||||||
|
options,
|
||||||
|
new ArrayList<Exception>()
|
||||||
|
);
|
||||||
|
project.update();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void parseOneFile(ImportingParserBase parser, InputStream inputStream) {
|
||||||
|
parser.parseOneFile(
|
||||||
|
project,
|
||||||
|
metadata,
|
||||||
|
job,
|
||||||
|
"file-source",
|
||||||
|
inputStream,
|
||||||
|
-1,
|
||||||
|
options,
|
||||||
|
new ArrayList<Exception>()
|
||||||
|
);
|
||||||
|
project.update();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void parseOneFile(TreeImportingParserBase parser, Reader reader) {
|
||||||
|
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
||||||
|
parser.parseOneFile(
|
||||||
|
project,
|
||||||
|
metadata,
|
||||||
|
job,
|
||||||
|
"file-source",
|
||||||
|
reader,
|
||||||
|
rootColumnGroup,
|
||||||
|
-1,
|
||||||
|
options,
|
||||||
|
new ArrayList<Exception>()
|
||||||
|
);
|
||||||
|
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
||||||
|
project.columnModel.update();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void parseOneFile(TreeImportingParserBase parser, InputStream inputStream) {
|
||||||
|
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
||||||
|
parser.parseOneFile(
|
||||||
|
project,
|
||||||
|
metadata,
|
||||||
|
job,
|
||||||
|
"file-source",
|
||||||
|
inputStream,
|
||||||
|
rootColumnGroup,
|
||||||
|
-1,
|
||||||
|
options,
|
||||||
|
new ArrayList<Exception>()
|
||||||
|
);
|
||||||
|
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
||||||
|
project.columnModel.update();
|
||||||
|
}
|
||||||
|
}
|
@ -33,12 +33,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.tests.importers;
|
package com.google.refine.tests.importers;
|
||||||
|
|
||||||
import static org.mockito.Mockito.mock;
|
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
@ -47,15 +45,12 @@ import org.testng.annotations.BeforeMethod;
|
|||||||
import org.testng.annotations.BeforeTest;
|
import org.testng.annotations.BeforeTest;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
|
||||||
import com.google.refine.importers.JsonImporter;
|
import com.google.refine.importers.JsonImporter;
|
||||||
import com.google.refine.importers.parsers.JSONParser;
|
import com.google.refine.importers.JsonImporter.JSONTreeReader;
|
||||||
import com.google.refine.importers.parsers.TreeParserToken;
|
import com.google.refine.importers.tree.TreeReader.Token;
|
||||||
import com.google.refine.model.Project;
|
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
import com.google.refine.tests.RefineTest;
|
|
||||||
|
|
||||||
public class JsonImporterTests extends RefineTest {
|
public class JsonImporterTests extends ImporterTest {
|
||||||
@BeforeTest
|
@BeforeTest
|
||||||
public void init() {
|
public void init() {
|
||||||
logger = LoggerFactory.getLogger(this.getClass());
|
logger = LoggerFactory.getLogger(this.getClass());
|
||||||
@ -63,29 +58,30 @@ public class JsonImporterTests extends RefineTest {
|
|||||||
|
|
||||||
|
|
||||||
//dependencies
|
//dependencies
|
||||||
Project project = null;
|
|
||||||
Properties options = null;
|
|
||||||
ByteArrayInputStream inputStream = null;
|
ByteArrayInputStream inputStream = null;
|
||||||
|
|
||||||
//System Under Test
|
//System Under Test
|
||||||
JsonImporter SUT = null;
|
JsonImporter SUT = null;
|
||||||
|
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void SetUp(){
|
public void SetUp(){
|
||||||
|
super.SetUp();
|
||||||
SUT = new JsonImporter();
|
SUT = new JsonImporter();
|
||||||
project = new Project();
|
|
||||||
options = mock(Properties.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterMethod
|
@AfterMethod
|
||||||
public void TearDown() throws IOException{
|
public void TearDown() {
|
||||||
SUT = null;
|
SUT = null;
|
||||||
project = null;
|
if (inputStream != null) {
|
||||||
options = null;
|
try {
|
||||||
if (inputStream != null) inputStream.close();
|
inputStream.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
inputStream = null;
|
inputStream = null;
|
||||||
}
|
}
|
||||||
|
super.TearDown();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void canParseSample(){
|
public void canParseSample(){
|
||||||
@ -181,8 +177,8 @@ public class JsonImporterTests extends RefineTest {
|
|||||||
String sampleJson2 = "{\"field\":{}}";
|
String sampleJson2 = "{\"field\":{}}";
|
||||||
String sampleJson3 = "{\"field\":[{},{}]}";
|
String sampleJson3 = "{\"field\":[{},{}]}";
|
||||||
|
|
||||||
JSONParser parser = new JSONParser(new ByteArrayInputStream( sampleJson.getBytes( "UTF-8" ) ));
|
JSONTreeReader parser = new JSONTreeReader(new StringReader(sampleJson));
|
||||||
TreeParserToken token = TreeParserToken.Ignorable;
|
Token token = Token.Ignorable;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
try{
|
try{
|
||||||
while(token != null){
|
while(token != null){
|
||||||
@ -191,8 +187,8 @@ public class JsonImporterTests extends RefineTest {
|
|||||||
break;
|
break;
|
||||||
i++;
|
i++;
|
||||||
if(i == 3){
|
if(i == 3){
|
||||||
Assert.assertEquals(TreeParserToken.Value, token);
|
Assert.assertEquals(Token.Value, token);
|
||||||
Assert.assertEquals("field", parser.getLocalName());
|
Assert.assertEquals("field", parser.getFieldName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}catch(Exception e){
|
}catch(Exception e){
|
||||||
@ -200,8 +196,8 @@ public class JsonImporterTests extends RefineTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
parser = new JSONParser(new ByteArrayInputStream( sampleJson2.getBytes( "UTF-8" ) ) );
|
parser = new JSONTreeReader(new StringReader(sampleJson2));
|
||||||
token = TreeParserToken.Ignorable;
|
token = Token.Ignorable;
|
||||||
i = 0;
|
i = 0;
|
||||||
try{
|
try{
|
||||||
while(token != null){
|
while(token != null){
|
||||||
@ -210,16 +206,16 @@ public class JsonImporterTests extends RefineTest {
|
|||||||
break;
|
break;
|
||||||
i++;
|
i++;
|
||||||
if(i == 3){
|
if(i == 3){
|
||||||
Assert.assertEquals(TreeParserToken.StartEntity, token);
|
Assert.assertEquals(Token.StartEntity, token);
|
||||||
Assert.assertEquals(parser.getLocalName(), "field");
|
Assert.assertEquals(parser.getFieldName(), "field");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}catch(Exception e){
|
}catch(Exception e){
|
||||||
//silent
|
//silent
|
||||||
}
|
}
|
||||||
|
|
||||||
parser = new JSONParser(new ByteArrayInputStream( sampleJson3.getBytes( "UTF-8" ) ) );
|
parser = new JSONTreeReader(new StringReader(sampleJson3));
|
||||||
token = TreeParserToken.Ignorable;
|
token = Token.Ignorable;
|
||||||
i = 0;
|
i = 0;
|
||||||
try{
|
try{
|
||||||
while(token != null){
|
while(token != null){
|
||||||
@ -228,16 +224,16 @@ public class JsonImporterTests extends RefineTest {
|
|||||||
break;
|
break;
|
||||||
i++;
|
i++;
|
||||||
if(i == 3){
|
if(i == 3){
|
||||||
Assert.assertEquals(token, TreeParserToken.StartEntity);
|
Assert.assertEquals(token, Token.StartEntity);
|
||||||
Assert.assertEquals(parser.getLocalName(), "field");
|
Assert.assertEquals(parser.getFieldName(), "field");
|
||||||
}
|
}
|
||||||
if(i == 4){
|
if(i == 4){
|
||||||
Assert.assertEquals(token, TreeParserToken.StartEntity);
|
Assert.assertEquals(token, Token.StartEntity);
|
||||||
Assert.assertEquals(parser.getLocalName(), "__anonymous__");
|
Assert.assertEquals(parser.getFieldName(), "__anonymous__");
|
||||||
}
|
}
|
||||||
if(i == 6){
|
if(i == 6){
|
||||||
Assert.assertEquals(token, TreeParserToken.StartEntity);
|
Assert.assertEquals(token, Token.StartEntity);
|
||||||
Assert.assertEquals(parser.getLocalName(), "__anonymous__");
|
Assert.assertEquals(parser.getFieldName(), "__anonymous__");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}catch(Exception e){
|
}catch(Exception e){
|
||||||
@ -352,7 +348,7 @@ public class JsonImporterTests extends RefineTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.read(inputStream, project, new ProjectMetadata(), options);
|
parseOneFile(SUT, inputStream);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
@ -34,7 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
package com.google.refine.tests.importers;
|
package com.google.refine.tests.importers;
|
||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
@ -42,13 +41,10 @@ import org.testng.annotations.BeforeMethod;
|
|||||||
import org.testng.annotations.BeforeTest;
|
import org.testng.annotations.BeforeTest;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
|
||||||
import com.google.refine.importers.RdfTripleImporter;
|
import com.google.refine.importers.RdfTripleImporter;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.util.JSONUtilities;
|
||||||
import com.google.refine.tests.RefineTest;
|
|
||||||
|
|
||||||
|
public class RdfTripleImporterTests extends ImporterTest {
|
||||||
public class RdfTripleImporterTests extends RefineTest {
|
|
||||||
|
|
||||||
@BeforeTest
|
@BeforeTest
|
||||||
public void init() {
|
public void init() {
|
||||||
@ -58,15 +54,12 @@ public class RdfTripleImporterTests extends RefineTest {
|
|||||||
|
|
||||||
//System Under Test
|
//System Under Test
|
||||||
RdfTripleImporter SUT = null;
|
RdfTripleImporter SUT = null;
|
||||||
Project project = null;
|
|
||||||
Properties options = null;
|
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void SetUp(){
|
public void SetUp(){
|
||||||
|
super.SetUp();
|
||||||
SUT = new RdfTripleImporter();
|
SUT = new RdfTripleImporter();
|
||||||
project = new Project();
|
JSONUtilities.safePut(options, "base-url", "http://rdf.freebase.com");
|
||||||
options = new Properties();
|
|
||||||
options.put("base-url", "http://rdf.freebase.com");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled=false)
|
@Test(enabled=false)
|
||||||
@ -75,8 +68,7 @@ public class RdfTripleImporterTests extends RefineTest {
|
|||||||
StringReader reader = new StringReader(sampleRdf);
|
StringReader reader = new StringReader(sampleRdf);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.read(reader, project, new ProjectMetadata(), options);
|
parseOneFile(SUT, reader);
|
||||||
project.update();
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
@ -98,8 +90,7 @@ public class RdfTripleImporterTests extends RefineTest {
|
|||||||
StringReader reader = new StringReader(sampleRdf);
|
StringReader reader = new StringReader(sampleRdf);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.read(reader, project, new ProjectMetadata(), options);
|
parseOneFile(SUT, reader);
|
||||||
project.update();
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
@ -140,8 +131,7 @@ public class RdfTripleImporterTests extends RefineTest {
|
|||||||
StringReader reader = new StringReader(sampleRdf);
|
StringReader reader = new StringReader(sampleRdf);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.read(reader, project, new ProjectMetadata(), options);
|
parseOneFile(SUT, reader);
|
||||||
project.update();
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
@ -175,8 +165,7 @@ public class RdfTripleImporterTests extends RefineTest {
|
|||||||
StringReader reader = new StringReader(sampleRdf);
|
StringReader reader = new StringReader(sampleRdf);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.read(reader, project, new ProjectMetadata(), options);
|
parseOneFile(SUT, reader);
|
||||||
project.update();
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
@ -33,15 +33,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.tests.importers;
|
package com.google.refine.tests.importers;
|
||||||
|
|
||||||
import static org.mockito.Mockito.mock;
|
|
||||||
import static org.mockito.Mockito.times;
|
import static org.mockito.Mockito.times;
|
||||||
import static org.mockito.Mockito.verify;
|
import static org.mockito.Mockito.verify;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.LineNumberReader;
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
@ -51,12 +47,10 @@ import org.testng.annotations.BeforeTest;
|
|||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
import com.google.refine.importers.SeparatorBasedImporter;
|
||||||
import com.google.refine.importers.TsvCsvImporter;
|
import com.google.refine.util.JSONUtilities;
|
||||||
import com.google.refine.model.Project;
|
|
||||||
import com.google.refine.tests.RefineTest;
|
|
||||||
|
|
||||||
public class TsvCsvImporterTests extends RefineTest {
|
public class TsvCsvImporterTests extends ImporterTest {
|
||||||
|
|
||||||
@BeforeTest
|
@BeforeTest
|
||||||
public void init() {
|
public void init() {
|
||||||
@ -67,25 +61,18 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String SAMPLE_ROW = "NDB_No,Shrt_Desc,Water";
|
String SAMPLE_ROW = "NDB_No,Shrt_Desc,Water";
|
||||||
|
|
||||||
//System Under Test
|
//System Under Test
|
||||||
TsvCsvImporter SUT = null;
|
SeparatorBasedImporter SUT = null;
|
||||||
|
|
||||||
//mock dependencies
|
|
||||||
Project project = null;
|
|
||||||
Properties properties = null;
|
|
||||||
|
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void SetUp(){
|
public void SetUp() {
|
||||||
SUT = new TsvCsvImporter();
|
super.SetUp();
|
||||||
project = new Project(); //FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
|
SUT = new SeparatorBasedImporter();
|
||||||
properties = mock(Properties.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterMethod
|
@AfterMethod
|
||||||
public void TearDown(){
|
public void TearDown(){
|
||||||
SUT = null;
|
SUT = null;
|
||||||
project = null;
|
super.TearDown();
|
||||||
properties = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "CSV-TSV-AutoDetermine")
|
@Test(dataProvider = "CSV-TSV-AutoDetermine")
|
||||||
@ -94,11 +81,10 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3";
|
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
@ -113,11 +99,10 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||||
String input = "value1" + inputSeparator + "value2" + inputSeparator + "value3";
|
String input = "value1" + inputSeparator + "value2" + inputSeparator + "value3";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, false, false);
|
prepareOptions(sep, -1, 0, 0, 0, false, false, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 1);
|
Assert.assertEquals(project.columnModel.columns.size(), 1);
|
||||||
@ -135,10 +120,10 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||||
|
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,13 +145,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||||
"data1" + inputSeparator + "234" + inputSeparator + "data3";
|
"data1" + inputSeparator + "234" + inputSeparator + "data3";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, true, true, false);
|
prepareOptions(sep, -1, 0, 0, 1, true, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||||
@ -185,13 +169,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||||
String input = "data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
String input = "data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, false);
|
prepareOptions(sep, -1, 0, 0, 0, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Column");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Column");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "Column2");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "Column2");
|
||||||
@ -209,13 +192,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||||
String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 ";
|
String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 ";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, false);
|
prepareOptions(sep, -1, 0, 0, 0, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.rows.size(), 1);
|
Assert.assertEquals(project.rows.size(), 1);
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||||
@ -230,13 +212,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||||
String input = " data1" + inputSeparator + " 12" + inputSeparator + " data3";
|
String input = " data1" + inputSeparator + " 12" + inputSeparator + " data3";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false);
|
prepareOptions(sep, -1, 0, 0, 0, true, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.rows.size(), 1);
|
Assert.assertEquals(project.rows.size(), 1);
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||||
@ -251,13 +232,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||||
String input = " data1" + inputSeparator + inputSeparator + " data3";
|
String input = " data1" + inputSeparator + inputSeparator + " data3";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false);
|
prepareOptions(sep, -1, 0, 0, 0, true, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.rows.size(), 1);
|
Assert.assertEquals(project.rows.size(), 1);
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||||
@ -274,13 +254,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
"sub1" + inputSeparator + "sub2" + inputSeparator + "sub3\n" +
|
"sub1" + inputSeparator + "sub2" + inputSeparator + "sub3\n" +
|
||||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 2, false, true, false);
|
prepareOptions(sep, -1, 0, 0, 2, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2");
|
||||||
@ -299,13 +278,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3" + inputSeparator + "data4" + inputSeparator + "data5" + inputSeparator + "data6";
|
"data1" + inputSeparator + "data2" + inputSeparator + "data3" + inputSeparator + "data4" + inputSeparator + "data5" + inputSeparator + "data6";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 6);
|
Assert.assertEquals(project.columnModel.columns.size(), 6);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||||
@ -330,13 +308,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||||
"\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\"" + inputSeparator + "data2";
|
"\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\"" + inputSeparator + "data2";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||||
@ -355,13 +332,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
"col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
"col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 1, 1, false, true, false);
|
prepareOptions(sep, -1, 0, 1, 1, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||||
@ -381,13 +357,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
"skip1\n" +
|
"skip1\n" +
|
||||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 1, 0, 1, false, true, false);
|
prepareOptions(sep, -1, 1, 0, 1, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||||
@ -411,13 +386,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
"skip1\n" +
|
"skip1\n" +
|
||||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 1, 3, 2, false, true, false);
|
prepareOptions(sep, -1, 1, 3, 2, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2");
|
||||||
@ -444,10 +418,10 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
"data-row2-cell1" + inputSeparator + "data-row2-cell2" + inputSeparator + "\n" + //missing last data point of this row on purpose
|
"data-row2-cell1" + inputSeparator + "data-row2-cell2" + inputSeparator + "\n" + //missing last data point of this row on purpose
|
||||||
"data-row3-cell1" + inputSeparator + "data-row3-cell2" + inputSeparator + "data-row1-cell3";
|
"data-row3-cell1" + inputSeparator + "data-row3-cell2" + inputSeparator + "data-row1-cell3";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, 2, 2, 3, 2, false, true, false);
|
prepareOptions(sep, 2, 2, 3, 2, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
@ -471,13 +445,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||||
String input = "data1" + inputSeparator + "data2\"" + inputSeparator + "data3" + inputSeparator + "data4";
|
String input = "data1" + inputSeparator + "data2\"" + inputSeparator + "data3" + inputSeparator + "data4";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, true);
|
prepareOptions(sep, -1, 0, 0, 0, false, true, true);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 4);
|
Assert.assertEquals(project.columnModel.columns.size(), 4);
|
||||||
Assert.assertEquals(project.rows.size(), 1);
|
Assert.assertEquals(project.rows.size(), 1);
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
||||||
@ -493,13 +466,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||||
"\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\"" + inputSeparator + "data2";
|
"\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\"" + inputSeparator + "data2";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||||
@ -517,13 +489,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||||
"\"A line with many \n\n\n\n\n empty lines\"" + inputSeparator + "data2";
|
"\"A line with many \n\n\n\n\n empty lines\"" + inputSeparator + "data2";
|
||||||
|
|
||||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
|
||||||
try {
|
try {
|
||||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||||
} catch (IOException e) {
|
parseOneFile(SUT, new StringReader(input));
|
||||||
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||||
@ -536,35 +507,34 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
|
|
||||||
//---------------------read tests------------------------
|
//---------------------read tests------------------------
|
||||||
@Test
|
@Test
|
||||||
public void readCsvWithProperties(){
|
public void readCsvWithProperties() {
|
||||||
StringReader reader = new StringReader(SAMPLE_ROW);
|
StringReader reader = new StringReader(SAMPLE_ROW);
|
||||||
|
|
||||||
when(properties.getProperty("separator")).thenReturn(",");
|
when(JSONUtilities.getString(options, "separator", null)).thenReturn(",");
|
||||||
whenGetIntegerOption("ignore",properties,0);
|
whenGetIntegerOption("ignore", options, 0);
|
||||||
whenGetIntegerOption("header-lines",properties,0);
|
whenGetIntegerOption("header-lines", options, 0);
|
||||||
whenGetIntegerOption("limit",properties,-1);
|
whenGetIntegerOption("limit", options, -1);
|
||||||
whenGetIntegerOption("skip",properties,0);
|
whenGetIntegerOption("skip", options, 0);
|
||||||
whenGetIntegerOption("ignore-quotes",properties,0);
|
whenGetIntegerOption("ignore-quotes", options, 0);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.read(reader, project, new ProjectMetadata(), properties);
|
parseOneFile(SUT, reader);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Assert.assertEquals(project.rows.size(), 1);
|
Assert.assertEquals(project.rows.size(), 1);
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||||
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "NDB_No");
|
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "NDB_No");
|
||||||
Assert.assertEquals((String)project.rows.get(0).cells.get(1).value, "Shrt_Desc");
|
Assert.assertEquals((String)project.rows.get(0).cells.get(1).value, "Shrt_Desc");
|
||||||
Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "Water");
|
Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "Water");
|
||||||
|
|
||||||
verify(properties, times(1)).getProperty("separator");
|
JSONUtilities.getString(verify(options, times(1)), "separator", null);
|
||||||
verifyGetOption("ignore",properties);
|
verifyGetOption("ignore", options);
|
||||||
verifyGetOption("header-lines",properties);
|
verifyGetOption("header-lines", options);
|
||||||
verifyGetOption("limit",properties);
|
verifyGetOption("limit", options);
|
||||||
verifyGetOption("skip",properties);
|
verifyGetOption("skip", options);
|
||||||
verifyGetOption("ignore-quotes",properties);
|
verifyGetOption("ignore-quotes", options);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -572,20 +542,19 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
String input = "data1,data2\",data3,data4";
|
String input = "data1,data2\",data3,data4";
|
||||||
StringReader reader = new StringReader(input);
|
StringReader reader = new StringReader(input);
|
||||||
|
|
||||||
when(properties.getProperty("separator")).thenReturn(",");
|
when(JSONUtilities.getString(options, "separator", null)).thenReturn(",");
|
||||||
whenGetIntegerOption("ignore",properties,0);
|
whenGetIntegerOption("ignore", options, 0);
|
||||||
whenGetIntegerOption("header-lines",properties,0);
|
whenGetIntegerOption("header-lines", options, 0);
|
||||||
whenGetIntegerOption("limit",properties,-1);
|
whenGetIntegerOption("limit", options, -1);
|
||||||
whenGetIntegerOption("skip",properties,0);
|
whenGetIntegerOption("skip", options, 0);
|
||||||
whenGetBooleanOption("ignore-quotes",properties,true);
|
whenGetBooleanOption("ignore-quotes", options, true);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.read(reader, project, new ProjectMetadata(), properties);
|
parseOneFile(SUT, reader);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Assert.assertEquals(project.rows.size(), 1);
|
Assert.assertEquals(project.rows.size(), 1);
|
||||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
||||||
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "data1");
|
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "data1");
|
||||||
@ -593,12 +562,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "data3");
|
Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "data3");
|
||||||
Assert.assertEquals((String)project.rows.get(0).cells.get(3).value, "data4");
|
Assert.assertEquals((String)project.rows.get(0).cells.get(3).value, "data4");
|
||||||
|
|
||||||
verify(properties, times(1)).getProperty("separator");
|
JSONUtilities.getString(verify(options, times(1)), "separator", null);
|
||||||
verifyGetOption("ignore",properties);
|
verifyGetOption("ignore", options);
|
||||||
verifyGetOption("header-lines",properties);
|
verifyGetOption("header-lines", options);
|
||||||
verifyGetOption("limit",properties);
|
verifyGetOption("limit", options);
|
||||||
verifyGetOption("skip",properties);
|
verifyGetOption("skip", options);
|
||||||
verifyGetOption("ignore-quotes",properties);
|
verifyGetOption("ignore-quotes", options);
|
||||||
}
|
}
|
||||||
|
|
||||||
//--helpers--
|
//--helpers--
|
||||||
@ -612,19 +581,16 @@ public class TsvCsvImporterTests extends RefineTest {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
public void whenGetBooleanOption(String name, Properties properties, Boolean def){
|
private void prepareOptions(
|
||||||
when(properties.containsKey(name)).thenReturn(true);
|
String sep, int limit, int skip, int ignoreLines,
|
||||||
when(properties.getProperty(name)).thenReturn(Boolean.toString(def));
|
int headerLines, boolean guessValueType, boolean splitIntoColumns, boolean ignoreQuotes) {
|
||||||
|
JSONUtilities.safePut(options, "separator", sep);
|
||||||
|
JSONUtilities.safePut(options, "limit", limit);
|
||||||
|
JSONUtilities.safePut(options, "skipDataLines", skip);
|
||||||
|
JSONUtilities.safePut(options, "ignoreLines", ignoreLines);
|
||||||
|
JSONUtilities.safePut(options, "headerLines", headerLines);
|
||||||
|
JSONUtilities.safePut(options, "guessCellValueTypes", guessValueType);
|
||||||
|
JSONUtilities.safePut(options, "splitIntoColumns", splitIntoColumns);
|
||||||
|
JSONUtilities.safePut(options, "processQuotes", !ignoreQuotes);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void whenGetIntegerOption(String name, Properties properties, int def){
|
|
||||||
when(properties.containsKey(name)).thenReturn(true);
|
|
||||||
when(properties.getProperty(name)).thenReturn(Integer.toString(def));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void verifyGetOption(String name, Properties properties){
|
|
||||||
verify(properties, times(1)).containsKey(name);
|
|
||||||
verify(properties, times(1)).getProperty(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -35,27 +35,27 @@ package com.google.refine.tests.importers;
|
|||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||||
|
import com.google.refine.importers.tree.ImportRecord;
|
||||||
import com.google.refine.importers.XmlImportUtilities;
|
import com.google.refine.importers.tree.TreeReader;
|
||||||
import com.google.refine.importers.parsers.TreeParser;
|
import com.google.refine.importers.tree.XmlImportUtilities;
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
|
|
||||||
public class XmlImportUtilitiesStub extends XmlImportUtilities {
|
public class XmlImportUtilitiesStub extends XmlImportUtilities {
|
||||||
|
|
||||||
public List<String> detectRecordElementWrapper(TreeParser parser, String tag) throws ServletException{
|
public List<String> detectRecordElementWrapper(TreeReader parser, String tag) throws Exception{
|
||||||
return super.detectRecordElement(parser, tag);
|
return super.detectRecordElement(parser, tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void ProcessSubRecordWrapper(Project project, TreeParser parser, ImportColumnGroup columnGroup, ImportRecord record) throws ServletException{
|
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, ImportRecord record) throws Exception{
|
||||||
super.processSubRecord(project, parser, columnGroup, record);
|
super.processSubRecord(project, parser, columnGroup, record);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void findRecordWrapper(Project project, TreeParser parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws ServletException{
|
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws Exception{
|
||||||
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup);
|
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void processRecordWrapper(Project project, TreeParser parser, ImportColumnGroup rootColumnGroup) throws ServletException{
|
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup) throws Exception{
|
||||||
super.processRecord(project, parser, rootColumnGroup);
|
super.processRecord(project, parser, rootColumnGroup);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,11 +35,12 @@ package com.google.refine.tests.importers;
|
|||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import javax.servlet.ServletException;
|
import javax.xml.stream.XMLStreamException;
|
||||||
|
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
@ -48,13 +49,12 @@ import org.testng.annotations.BeforeMethod;
|
|||||||
import org.testng.annotations.BeforeTest;
|
import org.testng.annotations.BeforeTest;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.google.refine.importers.TreeImportUtilities.ImportColumn;
|
import com.google.refine.importers.JsonImporter.JSONTreeReader;
|
||||||
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
|
import com.google.refine.importers.XmlImporter.XmlParser;
|
||||||
import com.google.refine.importers.TreeImportUtilities.ImportRecord;
|
import com.google.refine.importers.tree.ImportColumn;
|
||||||
import com.google.refine.importers.parsers.JSONParser;
|
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||||
import com.google.refine.importers.parsers.TreeParser;
|
import com.google.refine.importers.tree.ImportRecord;
|
||||||
import com.google.refine.importers.parsers.TreeParserToken;
|
import com.google.refine.importers.tree.TreeReader;
|
||||||
import com.google.refine.importers.parsers.XmlParser;
|
|
||||||
import com.google.refine.model.Project;
|
import com.google.refine.model.Project;
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
import com.google.refine.tests.RefineTest;
|
import com.google.refine.tests.RefineTest;
|
||||||
@ -69,7 +69,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
|
|
||||||
//dependencies
|
//dependencies
|
||||||
Project project;
|
Project project;
|
||||||
TreeParser parser;
|
TreeReader parser;
|
||||||
ImportColumnGroup columnGroup;
|
ImportColumnGroup columnGroup;
|
||||||
ImportRecord record;
|
ImportRecord record;
|
||||||
ByteArrayInputStream inputStream;
|
ByteArrayInputStream inputStream;
|
||||||
@ -134,7 +134,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
List<String> response = new ArrayList<String>();
|
List<String> response = new ArrayList<String>();
|
||||||
try {
|
try {
|
||||||
response = SUT.detectRecordElementWrapper(parser, tag);
|
response = SUT.detectRecordElementWrapper(parser, tag);
|
||||||
} catch (ServletException e) {
|
} catch (Exception e) {
|
||||||
Assert.fail(e.getMessage());
|
Assert.fail(e.getMessage());
|
||||||
}
|
}
|
||||||
Assert.assertNotNull(response);
|
Assert.assertNotNull(response);
|
||||||
@ -152,7 +152,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
List<String> response = new ArrayList<String>();
|
List<String> response = new ArrayList<String>();
|
||||||
try {
|
try {
|
||||||
response = SUT.detectRecordElementWrapper(parser, tag);
|
response = SUT.detectRecordElementWrapper(parser, tag);
|
||||||
} catch (ServletException e) {
|
} catch (Exception e) {
|
||||||
Assert.fail(e.getMessage());
|
Assert.fail(e.getMessage());
|
||||||
}
|
}
|
||||||
Assert.assertNotNull(response);
|
Assert.assertNotNull(response);
|
||||||
@ -171,7 +171,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
List<String> response = new ArrayList<String>();
|
List<String> response = new ArrayList<String>();
|
||||||
try {
|
try {
|
||||||
response = SUT.detectRecordElementWrapper(parser, tag);
|
response = SUT.detectRecordElementWrapper(parser, tag);
|
||||||
} catch (ServletException e) {
|
} catch (Exception e) {
|
||||||
Assert.fail(e.getMessage());
|
Assert.fail(e.getMessage());
|
||||||
}
|
}
|
||||||
Assert.assertNull(response);
|
Assert.assertNull(response);
|
||||||
@ -181,7 +181,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
public void detectRecordElementRegressionXmlTest(){
|
public void detectRecordElementRegressionXmlTest(){
|
||||||
loadSampleXml();
|
loadSampleXml();
|
||||||
|
|
||||||
String[] path = XmlImportUtilitiesStub.detectRecordElement(new XmlParser(inputStream));
|
String[] path = XmlImportUtilitiesStub.detectRecordElement(createXmlParser());
|
||||||
Assert.assertNotNull(path);
|
Assert.assertNotNull(path);
|
||||||
Assert.assertEquals(path.length, 2);
|
Assert.assertEquals(path.length, 2);
|
||||||
Assert.assertEquals(path[0], "library");
|
Assert.assertEquals(path[0], "library");
|
||||||
@ -192,7 +192,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
public void detectRecordElementRegressionJsonTest(){
|
public void detectRecordElementRegressionJsonTest(){
|
||||||
loadSampleJson();
|
loadSampleJson();
|
||||||
|
|
||||||
String[] path = XmlImportUtilitiesStub.detectRecordElement(new JSONParser(inputStream));
|
String[] path = XmlImportUtilitiesStub.detectRecordElement(
|
||||||
|
new JSONTreeReader(new InputStreamReader(inputStream)));
|
||||||
Assert.assertNotNull(path);
|
Assert.assertNotNull(path);
|
||||||
Assert.assertEquals(path.length, 2);
|
Assert.assertEquals(path.length, 2);
|
||||||
Assert.assertEquals(path[0], "__anonymous__");
|
Assert.assertEquals(path[0], "__anonymous__");
|
||||||
@ -204,7 +205,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
loadSampleXml();
|
loadSampleXml();
|
||||||
|
|
||||||
String[] recordPath = new String[]{"library","book"};
|
String[] recordPath = new String[]{"library","book"};
|
||||||
XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup );
|
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1);
|
||||||
|
|
||||||
log(project);
|
log(project);
|
||||||
assertProjectCreated(project, 0, 6);
|
assertProjectCreated(project, 0, 6);
|
||||||
@ -224,7 +225,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
loadData(XmlImporterTests.getSampleWithVaryingStructure());
|
loadData(XmlImporterTests.getSampleWithVaryingStructure());
|
||||||
|
|
||||||
String[] recordPath = new String[]{"library", "book"};
|
String[] recordPath = new String[]{"library", "book"};
|
||||||
XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup);
|
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1);
|
||||||
|
|
||||||
log(project);
|
log(project);
|
||||||
assertProjectCreated(project, 0, 6);
|
assertProjectCreated(project, 0, 6);
|
||||||
@ -278,7 +279,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup);
|
SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup);
|
||||||
} catch (ServletException e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -297,7 +298,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.processRecordWrapper(project, parser, columnGroup);
|
SUT.processRecordWrapper(project, parser, columnGroup);
|
||||||
} catch (ServletException e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
log(project);
|
log(project);
|
||||||
@ -318,7 +319,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.processRecordWrapper(project, parser, columnGroup);
|
SUT.processRecordWrapper(project, parser, columnGroup);
|
||||||
} catch (ServletException e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
log(project);
|
log(project);
|
||||||
@ -343,7 +344,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.processRecordWrapper(project, parser, columnGroup);
|
SUT.processRecordWrapper(project, parser, columnGroup);
|
||||||
} catch (ServletException e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
log(project);
|
log(project);
|
||||||
@ -367,7 +368,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record);
|
SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record);
|
||||||
} catch (ServletException e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
log(project);
|
log(project);
|
||||||
@ -429,18 +430,24 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
|||||||
|
|
||||||
public void ParserSkip(){
|
public void ParserSkip(){
|
||||||
try {
|
try {
|
||||||
if(parser.getEventType() == TreeParserToken.Ignorable){
|
if (parser.current() == TreeReader.Token.Ignorable){
|
||||||
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
|
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
|
||||||
}
|
}
|
||||||
} catch (ServletException e1) {
|
} catch (Exception e1) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void createXmlParser(){
|
public TreeReader createXmlParser(){
|
||||||
|
try {
|
||||||
parser = new XmlParser(inputStream);
|
parser = new XmlParser(inputStream);
|
||||||
|
return parser;
|
||||||
|
} catch (XMLStreamException e) {
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
public void createJsonParser(){
|
}
|
||||||
parser = new JSONParser(inputStream);
|
public TreeReader createJsonParser(){
|
||||||
|
parser = new JSONTreeReader(new InputStreamReader(inputStream));
|
||||||
|
return parser;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -33,12 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
package com.google.refine.tests.importers;
|
package com.google.refine.tests.importers;
|
||||||
|
|
||||||
import static org.mockito.Mockito.mock;
|
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
@ -47,14 +44,11 @@ import org.testng.annotations.BeforeMethod;
|
|||||||
import org.testng.annotations.BeforeTest;
|
import org.testng.annotations.BeforeTest;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.google.refine.ProjectMetadata;
|
|
||||||
import com.google.refine.importers.XmlImporter;
|
import com.google.refine.importers.XmlImporter;
|
||||||
import com.google.refine.model.Project;
|
|
||||||
import com.google.refine.model.Row;
|
import com.google.refine.model.Row;
|
||||||
import com.google.refine.tests.RefineTest;
|
|
||||||
|
|
||||||
|
|
||||||
public class XmlImporterTests extends RefineTest {
|
public class XmlImporterTests extends ImporterTest {
|
||||||
|
|
||||||
@BeforeTest
|
@BeforeTest
|
||||||
public void init() {
|
public void init() {
|
||||||
@ -62,29 +56,30 @@ public class XmlImporterTests extends RefineTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//dependencies
|
//dependencies
|
||||||
Project project = null;
|
|
||||||
Properties options = null;
|
|
||||||
ByteArrayInputStream inputStream = null;
|
ByteArrayInputStream inputStream = null;
|
||||||
|
|
||||||
//System Under Test
|
//System Under Test
|
||||||
XmlImporter SUT = null;
|
XmlImporter SUT = null;
|
||||||
|
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void SetUp(){
|
public void SetUp(){
|
||||||
|
super.SetUp();
|
||||||
SUT = new XmlImporter();
|
SUT = new XmlImporter();
|
||||||
project = new Project();
|
|
||||||
options = mock(Properties.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterMethod
|
@AfterMethod
|
||||||
public void TearDown() throws IOException{
|
public void TearDown() {
|
||||||
SUT = null;
|
SUT = null;
|
||||||
project = null;
|
if (inputStream != null) {
|
||||||
options = null;
|
try {
|
||||||
if (inputStream != null) inputStream.close();
|
inputStream.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
inputStream = null;
|
inputStream = null;
|
||||||
}
|
}
|
||||||
|
super.TearDown();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void canParseSample(){
|
public void canParseSample(){
|
||||||
@ -309,11 +304,9 @@ public class XmlImporterTests extends RefineTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SUT.read(inputStream, project, new ProjectMetadata(), options);
|
parseOneFile(SUT, inputStream);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Assert.fail();
|
Assert.fail();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -50,9 +50,10 @@ function registerCommands() {
|
|||||||
|
|
||||||
RS.registerCommand(module, "get-version", new Packages.com.google.refine.commands.GetVersionCommand());
|
RS.registerCommand(module, "get-version", new Packages.com.google.refine.commands.GetVersionCommand());
|
||||||
|
|
||||||
RS.registerCommand(module, "create-import-job", new Packages.com.google.refine.commands.importing.CreateImportJobCommand());
|
RS.registerCommand(module, "get-importing-configuration", new Packages.com.google.refine.commands.importing.GetImportingConfigurationCommand());
|
||||||
RS.registerCommand(module, "retrieve-import-content", new Packages.com.google.refine.commands.importing.RetrieveImportContentCommand());
|
RS.registerCommand(module, "create-importing-job", new Packages.com.google.refine.commands.importing.CreateImportingJobCommand());
|
||||||
RS.registerCommand(module, "get-import-job-status", new Packages.com.google.refine.commands.importing.GetImportJobStatusCommand());
|
RS.registerCommand(module, "get-importing-job-status", new Packages.com.google.refine.commands.importing.GetImportingJobStatusCommand());
|
||||||
|
RS.registerCommand(module, "importing-controller", new Packages.com.google.refine.commands.importing.ImportingControllerCommand());
|
||||||
|
|
||||||
RS.registerCommand(module, "create-project-from-upload", new Packages.com.google.refine.commands.project.CreateProjectCommand());
|
RS.registerCommand(module, "create-project-from-upload", new Packages.com.google.refine.commands.project.CreateProjectCommand());
|
||||||
RS.registerCommand(module, "import-project", new Packages.com.google.refine.commands.project.ImportProjectCommand());
|
RS.registerCommand(module, "import-project", new Packages.com.google.refine.commands.project.ImportProjectCommand());
|
||||||
@ -120,12 +121,9 @@ function registerCommands() {
|
|||||||
|
|
||||||
RS.registerCommand(module, "get-expression-language-info", new Packages.com.google.refine.commands.expr.GetExpressionLanguageInfoCommand());
|
RS.registerCommand(module, "get-expression-language-info", new Packages.com.google.refine.commands.expr.GetExpressionLanguageInfoCommand());
|
||||||
RS.registerCommand(module, "get-expression-history", new Packages.com.google.refine.commands.expr.GetExpressionHistoryCommand());
|
RS.registerCommand(module, "get-expression-history", new Packages.com.google.refine.commands.expr.GetExpressionHistoryCommand());
|
||||||
RS.registerCommand(module, "get-starred-expressions", new Packages.com.google.refine.commands.expr.GetStarredExpressionsCommand());
|
|
||||||
RS.registerCommand(module, "toggle-starred-expression", new Packages.com.google.refine.commands.expr.ToggleStarredExpressionCommand());
|
|
||||||
RS.registerCommand(module, "log-expression", new Packages.com.google.refine.commands.expr.LogExpressionCommand());
|
RS.registerCommand(module, "log-expression", new Packages.com.google.refine.commands.expr.LogExpressionCommand());
|
||||||
RS.registerCommand(module, "preview-expression", new Packages.com.google.refine.commands.expr.PreviewExpressionCommand());
|
RS.registerCommand(module, "preview-expression", new Packages.com.google.refine.commands.expr.PreviewExpressionCommand());
|
||||||
|
|
||||||
|
|
||||||
RS.registerCommand(module, "get-preference", new Packages.com.google.refine.commands.GetPreferenceCommand());
|
RS.registerCommand(module, "get-preference", new Packages.com.google.refine.commands.GetPreferenceCommand());
|
||||||
RS.registerCommand(module, "get-all-preferences", new Packages.com.google.refine.commands.GetAllPreferencesCommand());
|
RS.registerCommand(module, "get-all-preferences", new Packages.com.google.refine.commands.GetAllPreferencesCommand());
|
||||||
RS.registerCommand(module, "set-preference", new Packages.com.google.refine.commands.SetPreferenceCommand());
|
RS.registerCommand(module, "set-preference", new Packages.com.google.refine.commands.SetPreferenceCommand());
|
||||||
@ -168,11 +166,98 @@ function registerOperations() {
|
|||||||
OR.registerOperation(module, "recon-copy-across-columns", Packages.com.google.refine.operations.recon.ReconCopyAcrossColumnsOperation);
|
OR.registerOperation(module, "recon-copy-across-columns", Packages.com.google.refine.operations.recon.ReconCopyAcrossColumnsOperation);
|
||||||
}
|
}
|
||||||
|
|
||||||
function registerImportSourceClasses() {
|
function registerImporting() {
|
||||||
var RM = Packages.com.google.refine.commands.importing.ImportManager;
|
var IM = Packages.com.google.refine.importing.ImportingManager;
|
||||||
RM.registerImportSourceClass("file-upload", Packages.com.google.refine.model.meta.FileUploadImportSource);
|
|
||||||
RM.registerImportSourceClass("text", Packages.com.google.refine.model.meta.TextImportSource);
|
/*
|
||||||
RM.registerImportSourceClass("web", Packages.com.google.refine.model.meta.WebImportSource);
|
* Formats and their UI class names and parsers:
|
||||||
|
* - UI class names are used on the client-side in Javascript to instantiate code that lets the user
|
||||||
|
* configure the parser's options.
|
||||||
|
* - Parsers are server-side code that do the actual parsing. Because they have access to the raw files,
|
||||||
|
* they also generate defaults for the client-side UIs to initialize.
|
||||||
|
*/
|
||||||
|
|
||||||
|
IM.registerFormat("text", "Text files"); // generic format, no parser to handle it
|
||||||
|
IM.registerFormat("text/line-based", "Line-based text files", "LineBasedParserUI",
|
||||||
|
new Packages.com.google.refine.importers.LineBasedImporter());
|
||||||
|
IM.registerFormat("text/line-based/*sv", "CSV / TSV / separator-based files", "SeparatorBasedParserUI",
|
||||||
|
new Packages.com.google.refine.importers.SeparatorBasedImporter());
|
||||||
|
IM.registerFormat("text/line-based/fixed-width", "Fixed-width field text files", "FixedWidthParserUI",
|
||||||
|
new Packages.com.google.refine.importers.FixedWidthImporter());
|
||||||
|
|
||||||
|
IM.registerFormat("text/xml", "XML files", "XmlParserUI", new Packages.com.google.refine.importers.XmlImporter());
|
||||||
|
IM.registerFormat("text/xml/xlsx", "Excel (.xlsx) files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
|
||||||
|
IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfParserUI", new Packages.com.google.refine.importers.RdfTripleImporter());
|
||||||
|
IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter());
|
||||||
|
IM.registerFormat("text/marc", "MARC files");
|
||||||
|
|
||||||
|
IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it
|
||||||
|
IM.registerFormat("binary/xls", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
|
||||||
|
|
||||||
|
IM.registerFormat("service", "Services"); // generic format, no parser to handle it
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Extension to format mappings
|
||||||
|
*/
|
||||||
|
IM.registerExtension(".txt", "text/line-based");
|
||||||
|
IM.registerExtension(".csv", "text/line-based/*sv");
|
||||||
|
IM.registerExtension(".tsv", "text/line-based/*sv");
|
||||||
|
|
||||||
|
IM.registerExtension(".xml", "text/xml");
|
||||||
|
IM.registerExtension(".rdf", "text/xml/rdf");
|
||||||
|
|
||||||
|
IM.registerExtension(".json", "text/json");
|
||||||
|
IM.registerExtension(".js", "text/json");
|
||||||
|
|
||||||
|
IM.registerExtension(".xls", "binary/xls");
|
||||||
|
IM.registerExtension(".xlsx", "text/xml/xlsx");
|
||||||
|
|
||||||
|
IM.registerExtension(".marc", "text/marc");
|
||||||
|
IM.registerExtension(".mrc", "text/marc");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mime type to format mappings
|
||||||
|
*/
|
||||||
|
IM.registerMimeType("text/plain", "text/line-based");
|
||||||
|
IM.registerMimeType("text/csv", "text/line-based/*sv");
|
||||||
|
IM.registerMimeType("text/x-csv", "text/line-based/*sv");
|
||||||
|
IM.registerMimeType("text/tab-separated-value", "text/line-based/*sv");
|
||||||
|
|
||||||
|
IM.registerMimeType("text/fixed-width", "text/line-based/fixed-width");
|
||||||
|
|
||||||
|
IM.registerMimeType("application/msexcel", "binary/xls");
|
||||||
|
IM.registerMimeType("application/x-msexcel", "binary/xls");
|
||||||
|
IM.registerMimeType("application/x-ms-excel", "binary/xls");
|
||||||
|
IM.registerMimeType("application/vnd.ms-excel", "binary/xls");
|
||||||
|
IM.registerMimeType("application/x-excel", "binary/xls");
|
||||||
|
IM.registerMimeType("application/xls", "binary/xls");
|
||||||
|
IM.registerMimeType("application/x-xls", "text/xml/xlsx");
|
||||||
|
|
||||||
|
IM.registerMimeType("application/json", "text/json");
|
||||||
|
IM.registerMimeType("text/json", "text/json");
|
||||||
|
|
||||||
|
IM.registerMimeType("application/rdf+xml", "text/xml/rdf");
|
||||||
|
|
||||||
|
IM.registerMimeType("application/marc", "text/marc");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Format guessers: these take a format derived from extensions or mime-types,
|
||||||
|
* look at the actual files' content, and try to guess a better format.
|
||||||
|
*/
|
||||||
|
IM.registerFormatGuesser("text", new Packages.com.google.refine.importers.TextFormatGuesser());
|
||||||
|
IM.registerFormatGuesser("text/line-based", new Packages.com.google.refine.importers.LineBasedFormatGuesser());
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Controllers: these implement high-level UI flows for importing data. For example, the default
|
||||||
|
* controller lets the user specify one or more source files, either local or remote or on the clipboard,
|
||||||
|
* lets the user select which files to actually import in case any of the original file is an archive
|
||||||
|
* containing several files, and then lets the user configure parsing options.
|
||||||
|
*/
|
||||||
|
IM.registerController(
|
||||||
|
module,
|
||||||
|
"default-importing-controller",
|
||||||
|
new Packages.com.google.refine.importing.DefaultImportingController()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -183,7 +268,7 @@ function init() {
|
|||||||
|
|
||||||
registerCommands();
|
registerCommands();
|
||||||
registerOperations();
|
registerOperations();
|
||||||
registerImportSourceClasses();
|
registerImporting();
|
||||||
|
|
||||||
var RC = Packages.com.google.refine.model.recon.ReconConfig;
|
var RC = Packages.com.google.refine.model.recon.ReconConfig;
|
||||||
RC.registerReconConfig(module, "standard-service", Packages.com.google.refine.model.recon.StandardReconConfig);
|
RC.registerReconConfig(module, "standard-service", Packages.com.google.refine.model.recon.StandardReconConfig);
|
||||||
@ -193,12 +278,36 @@ function init() {
|
|||||||
module,
|
module,
|
||||||
[
|
[
|
||||||
"externals/jquery-1.4.2.min.js",
|
"externals/jquery-1.4.2.min.js",
|
||||||
|
"externals/jquery.cookie.js",
|
||||||
|
"externals/jquery.eventstack-0.3.js",
|
||||||
"externals/jquery-ui/jquery-ui-1.8.custom.min.js",
|
"externals/jquery-ui/jquery-ui-1.8.custom.min.js",
|
||||||
"externals/date.js",
|
"externals/date.js",
|
||||||
|
|
||||||
|
"scripts/util/misc.js",
|
||||||
|
"scripts/util/url.js",
|
||||||
"scripts/util/string.js",
|
"scripts/util/string.js",
|
||||||
|
"scripts/util/ajax.js",
|
||||||
|
"scripts/util/menu.js",
|
||||||
|
"scripts/util/dialog.js",
|
||||||
"scripts/util/dom.js",
|
"scripts/util/dom.js",
|
||||||
|
|
||||||
"scripts/index.js",
|
"scripts/index.js",
|
||||||
"scripts/index/import-sources.js"
|
"scripts/index/create-project-ui.js",
|
||||||
|
"scripts/index/open-project-ui.js",
|
||||||
|
"scripts/index/import-project-ui.js",
|
||||||
|
|
||||||
|
"scripts/index/default-importing-controller/controller.js",
|
||||||
|
"scripts/index/default-importing-controller/file-selection-panel.js",
|
||||||
|
"scripts/index/default-importing-controller/parsing-panel.js",
|
||||||
|
|
||||||
|
"scripts/index/default-importing-sources/sources.js",
|
||||||
|
"scripts/index/parser-interfaces/preview-table.js",
|
||||||
|
"scripts/index/parser-interfaces/separator-based-parser-ui.js",
|
||||||
|
"scripts/index/parser-interfaces/line-based-parser-ui.js",
|
||||||
|
"scripts/index/parser-interfaces/fixed-width-parser-ui.js",
|
||||||
|
"scripts/index/parser-interfaces/excel-parser-ui.js",
|
||||||
|
"scripts/index/parser-interfaces/xml-parser-ui.js",
|
||||||
|
"scripts/index/parser-interfaces/json-parser-ui.js"
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -210,32 +319,20 @@ function init() {
|
|||||||
"styles/jquery-ui-overrides.less",
|
"styles/jquery-ui-overrides.less",
|
||||||
"styles/common.less",
|
"styles/common.less",
|
||||||
"styles/pure.css",
|
"styles/pure.css",
|
||||||
"styles/index.less"
|
"styles/index.less",
|
||||||
]
|
"styles/index/create-project-ui.less",
|
||||||
);
|
"styles/index/open-project-ui.less",
|
||||||
|
"styles/index/import-project-ui.less",
|
||||||
|
|
||||||
ClientSideResourceManager.addPaths(
|
"styles/index/default-importing-controller.less",
|
||||||
"import/scripts",
|
"styles/index/default-importing-file-selection-panel.less",
|
||||||
module,
|
"styles/index/default-importing-parsing-panel.less",
|
||||||
[
|
|
||||||
"externals/jquery-1.4.2.min.js",
|
|
||||||
"externals/jquery-ui/jquery-ui-1.8.custom.min.js",
|
|
||||||
"externals/date.js",
|
|
||||||
"scripts/util/string.js",
|
|
||||||
"scripts/util/dom.js",
|
|
||||||
"scripts/import.js"
|
|
||||||
]
|
|
||||||
);
|
|
||||||
|
|
||||||
ClientSideResourceManager.addPaths(
|
"styles/index/default-importing-sources.less",
|
||||||
"import/styles",
|
"styles/views/data-table-view.less", // for the preview table's styles
|
||||||
module,
|
"styles/index/fixed-width-parser-ui.less",
|
||||||
[
|
"styles/index/xml-parser-ui.less",
|
||||||
"externals/jquery-ui/css/ui-lightness/jquery-ui-1.8.custom.css",
|
"styles/index/json-parser-ui.less"
|
||||||
"styles/jquery-ui-overrides.less",
|
|
||||||
"styles/common.less",
|
|
||||||
"styles/pure.css",
|
|
||||||
"styles/import.less"
|
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -41,182 +41,33 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
$styleInjection
|
$styleInjection
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
#if($params.new && $params.new == "1")
|
<div id="header">
|
||||||
#set($newStyle = "")
|
<img alt="Google Refine" src="images/logo-googlerefine-30.png" width="129" height="29" />
|
||||||
#set($oldStyle = "display: none; ")
|
A power tool for working with messy data.
|
||||||
#else
|
|
||||||
#set($oldStyle = "")
|
|
||||||
#set($newStyle = "display: none; ")
|
|
||||||
#end
|
|
||||||
<div id="container">
|
|
||||||
<div id="logo"> </div>
|
|
||||||
<div id="header-home">
|
|
||||||
<img alt="Google Refine" src="images/logo-googlerefine-40.png" />
|
|
||||||
<h1>A power tool for working with messy data.</h1>
|
|
||||||
</div>
|
</div>
|
||||||
<div id="content-home">
|
|
||||||
|
<div id="left-panel" class="main-layout-panel"><div id="left-panel-body">
|
||||||
|
<ul id="action-area-tabs">
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<div id="project-links">
|
||||||
|
<div id="logo-container">
|
||||||
|
<img alt="Google Refine" src="images/logo-gem-40.png" />
|
||||||
|
<div id="google-refine-version"></div>
|
||||||
|
</div>
|
||||||
|
<ul>
|
||||||
|
<li><a href="http://code.google.com/p/google-refine/wiki/DocumentationForUsers">Help</a></li>
|
||||||
|
<li><a href="/about.html">About</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</div></div> <!-- left-panel -->
|
||||||
|
|
||||||
|
<div id="right-panel" class="main-layout-panel"><div id="right-panel-body">
|
||||||
|
</div></div> <!-- right-panel -->
|
||||||
|
|
||||||
<div id="no-project-message" class="message" style="display: none;">
|
<div id="no-project-message" class="message" style="display: none;">
|
||||||
No existing project. Create one now!<br/>
|
No existing project. Create one now!<br/>
|
||||||
Try these <a href="http://code.google.com/p/google-refine/wiki/SampleDatasets" target="_blank">sample data sets »</a>
|
Try these <a href="http://code.google.com/p/google-refine/wiki/SampleDatasets" target="_blank">sample data sets »</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="project-open">
|
|
||||||
<h1>Open a Project</h1>
|
|
||||||
<div id="projects-container"></div>
|
|
||||||
<div class="content-block-footer"><a href="javascript:openWorkspaceDir()" class="secondary">Browse workspace directory</a></div>
|
|
||||||
</div>
|
|
||||||
<div id="project-create">
|
|
||||||
<h1 style="$newStyle">Create a New Project</h1>
|
|
||||||
<div style="$newStyle" id="import-panel"><table id="import-panel-layout">
|
|
||||||
<tr>
|
|
||||||
<td id="import-panel-tab-headers">
|
|
||||||
<div>Import data from</div>
|
|
||||||
</td>
|
|
||||||
<td id="import-panel-tab-bodies"></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td colspan="2" id="import-panel-message">
|
|
||||||
<h3>What kinds of data files can I import?</h3>
|
|
||||||
<div>TSV, CSV, *SV, Excel (.xls and .xlsx), JSON, XML, RDF as XML, and
|
|
||||||
Google Spreadsheets are all supported. Support for other formats can
|
|
||||||
be added with Refine extensions.
|
|
||||||
</div>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</table></div>
|
|
||||||
|
|
||||||
<div style="$newStyle" id="import-progress-panel">
|
|
||||||
<div class="grid-layout layout-normal layout-full"><table>
|
|
||||||
<tr><td colspan="3" id="import-progress-message"></td></tr>
|
|
||||||
<tr><td colspan="3">
|
|
||||||
<div id="import-progress-bar-frame"><div id="import-progress-bar-body"></div></div>
|
|
||||||
</td></tr>
|
|
||||||
<tr>
|
|
||||||
<td id="import-progress-message-left"></td>
|
|
||||||
<td id="import-progress-message-center"></td>
|
|
||||||
<td id="import-progress-message-right"></td>
|
|
||||||
</tr>
|
|
||||||
<tr><td colspan="3">
|
|
||||||
<button class="button" id="import-progress-cancel-button">Cancel</button>
|
|
||||||
</td></tr>
|
|
||||||
</table></div>
|
|
||||||
<iframe id="import-iframe" name="import-iframe"></iframe>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div style="$newStyle" id="import-error-panel"><div class="grid-layout layout-normal layout-full"><table>
|
|
||||||
<tr><td id="import-error-message"></td></tr>
|
|
||||||
<tr><td id="import-error-stack"></td></tr>
|
|
||||||
<tr><td><button class="button button-primary" id="import-error-ok-button">OK</button></td></tr>
|
|
||||||
</table></div></div>
|
|
||||||
|
|
||||||
<form style="$oldStyle" id="file-upload-form" method="post" enctype="multipart/form-data" action="/command/core/create-project-from-upload" accept-charset="UTF-8">
|
|
||||||
<h1>Create a New Project</h1>
|
|
||||||
<h2 id="project-toggle">
|
|
||||||
<a class="secondary" href="javascript:showHide('file-upload-form', 'project-upload-form')">or Import an Existing Project</a>
|
|
||||||
</h2>
|
|
||||||
<div class="project-create-basic">
|
|
||||||
<table class="form-table">
|
|
||||||
<tr>
|
|
||||||
<th><label for="project-file">Data file:</label></th>
|
|
||||||
<td><input type="file" id="project-file-input" name="project-file" /></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th><label for="project-url">or data file URL:</label></th>
|
|
||||||
<td><input type="text" id="project-url-input" name="project-url" size="40" /></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th><label for="project-name">Project name:</label></th>
|
|
||||||
<td><input type="text" size="25" id="project-name-input" name="project-name" /></td></tr>
|
|
||||||
<tr>
|
|
||||||
<td></td>
|
|
||||||
<td><input type="submit" value="Create Project" id="upload-file-button" class="button button-primary" /></td>
|
|
||||||
</tr>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
<div class="project-create-advanced">
|
|
||||||
<h2>Advanced Options</h2>
|
|
||||||
<div class="project-create-option">
|
|
||||||
Limit load to:
|
|
||||||
<div class="project-create-suboption">
|
|
||||||
<input type="text" id="limit-input" name="limit" size="5" /> rows (blank for all)
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="project-create-option">
|
|
||||||
Ignore:
|
|
||||||
<div class="project-create-suboption">
|
|
||||||
<input type="text" id="ignore-input" name="ignore" size="5" value="0" /> initial non-blank lines
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="project-create-option">
|
|
||||||
Skip:
|
|
||||||
<div class="project-create-suboption">
|
|
||||||
<input type="text" id="skip-input" name="skip" size="5" value="0" /> initial data rows
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div id="project-create-parsetext">
|
|
||||||
<div class="project-create-option">
|
|
||||||
When parsing text files:
|
|
||||||
<div class="project-create-suboption">
|
|
||||||
<input id="split-into-columns-input" type="checkbox" checked="true" name="split-into-columns" />
|
|
||||||
Split into columns
|
|
||||||
</div>
|
|
||||||
<div class="project-create-suboption">
|
|
||||||
Column separator:
|
|
||||||
<input type="text" id="separator-input" name="separator" size="2" /><br />
|
|
||||||
(leave blank to auto-detect)
|
|
||||||
</div>
|
|
||||||
<div class="project-create-suboption">
|
|
||||||
<input type="checkbox" id="guess-value-type-input" name="guess-value-type" checked="true" />
|
|
||||||
Auto-detect value types<br />
|
|
||||||
(numbers, dates, etc)
|
|
||||||
</div>
|
|
||||||
<div class="project-create-suboption">
|
|
||||||
Header lines: <input type="text" id="header-lines-input" name="header-lines" size="5" value="1" /><br />
|
|
||||||
(use 0 if your data has no header)
|
|
||||||
</div>
|
|
||||||
<div class="project-create-suboption">
|
|
||||||
<input type="checkbox" id="ignore-quotes-input" name="ignore-quotes" />
|
|
||||||
Ignore quotation marks
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</form>
|
|
||||||
|
|
||||||
<form style="display: none;" id="project-upload-form" method="post" enctype="multipart/form-data" action="/command/core/import-project" accept-charset="UTF-8" style="display:none;">
|
|
||||||
<h1>Import an Existing Project</h1>
|
|
||||||
<h2 id="project-toggle">
|
|
||||||
<a class="secondary" href="javascript:showHide('project-upload-form', 'file-upload-form')">or Create a New Project</a>
|
|
||||||
</h2>
|
|
||||||
<div class="project-create-basic">
|
|
||||||
<p>Import an existing Google Refine .tar or .tar.gz project file:</p>
|
|
||||||
<table class="form-table">
|
|
||||||
<tr>
|
|
||||||
<th><label for="project-file">Project file:</label></th>
|
|
||||||
<td><input type="file" id="project-tar-file-input" name="project-file" /></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th><label for="project-name">Project name (optional):</label></th>
|
|
||||||
<td><input type="text" size="25" id="project-name-input" name="project-name" /></td></tr>
|
|
||||||
<tr>
|
|
||||||
<td></td>
|
|
||||||
<td><input type="submit" value="Import Project" id="import-project-button" class="button button-primary" /></td>
|
|
||||||
</tr>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div id="project-links">
|
|
||||||
<ul>
|
|
||||||
<li><a href="/about.html">About Google Refine</a></li>
|
|
||||||
<li><a href="https://code.google.com/p/google-refine/">Project Home Page</a></li>
|
|
||||||
<li><a href="http://code.google.com/p/google-refine/wiki/Screencasts">Screencasts</a></li>
|
|
||||||
<li><a href="http://code.google.com/p/google-refine/wiki/DocumentationForUsers">Help Documentation</a></li>
|
|
||||||
</ul>
|
|
||||||
<div id="google-refine-version"></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@ -31,64 +31,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
function onClickUploadFileButton(evt) {
|
var GoogleRefineVersion;
|
||||||
var projectName = $("#project-name-input")[0].value;
|
|
||||||
var dataURL = $.trim($("#project-url-input")[0].value);
|
|
||||||
if (! $.trim(projectName).length) {
|
|
||||||
window.alert("You must specify a project name.");
|
|
||||||
|
|
||||||
} else if ($("#project-file-input")[0].files.length === 0 && ! dataURL.length) {
|
var Refine = {
|
||||||
window.alert("You must specify a data file to upload or a URL to retrieve.");
|
actionAreas: []
|
||||||
|
};
|
||||||
|
|
||||||
} else {
|
Refine.selectActionArea = function(id) {
|
||||||
$("#file-upload-form").attr("action",
|
$('.action-area-tab').removeClass('selected');
|
||||||
"/command/core/create-project-from-upload?" + [
|
$('.action-area-tab-body').css('visibility', 'hidden').css('z-index', '100');
|
||||||
"url=" + escape(dataURL),
|
|
||||||
"split-into-columns=" + $("#split-into-columns-input")[0].checked,
|
|
||||||
"separator=" + $("#separator-input")[0].value,
|
|
||||||
"ignore=" + $("#ignore-input")[0].value,
|
|
||||||
"header-lines=" + $("#header-lines-input")[0].value,
|
|
||||||
"skip=" + $("#skip-input")[0].value,
|
|
||||||
"limit=" + $("#limit-input")[0].value,
|
|
||||||
"guess-value-type=" + $("#guess-value-type-input")[0].checked,
|
|
||||||
"ignore-quotes=" + $("#ignore-quotes-input")[0].checked
|
|
||||||
].join("&"));
|
|
||||||
|
|
||||||
return true;
|
for (var i = 0; i < Refine.actionAreas.length; i++) {
|
||||||
|
var actionArea = Refine.actionAreas[i];
|
||||||
|
if (id == actionArea.id) {
|
||||||
|
actionArea.tabElmt.addClass('selected');
|
||||||
|
actionArea.bodyElmt.css('visibility', 'visible').css('z-index', '110');;
|
||||||
}
|
}
|
||||||
|
|
||||||
evt.preventDefault();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
function formatDate(d) {
|
|
||||||
var d = new Date(d);
|
|
||||||
var last_year = Date.today().add({ years: -1 });
|
|
||||||
var last_month = Date.today().add({ months: -1 });
|
|
||||||
var last_week = Date.today().add({ days: -7 });
|
|
||||||
var today = Date.today();
|
|
||||||
var tomorrow = Date.today().add({ days: 1 });
|
|
||||||
|
|
||||||
if (d > today) {
|
|
||||||
return "today " + d.toString("h:mm tt");
|
|
||||||
} else if (d.between(last_week, today)) {
|
|
||||||
var diff = Math.floor(today.getDayOfYear() - d.getDayOfYear());
|
|
||||||
return (diff <= 1) ? ("yesterday " + d.toString("h:mm tt")) : (diff + " days ago");
|
|
||||||
} else if (d.between(last_month, today)) {
|
|
||||||
var diff = Math.floor((today.getDayOfYear() - d.getDayOfYear()) / 7);
|
|
||||||
if (diff < 0) {diff += 52;}
|
|
||||||
return (diff == 1) ? "a week ago" : diff.toFixed(0) + " weeks ago" ;
|
|
||||||
} else if (d.between(last_year, today)) {
|
|
||||||
var diff = Math.floor(today.getMonth() - d.getMonth());
|
|
||||||
if (diff < 0) {diff += 12;}
|
|
||||||
return (diff == 1) ? "a month ago" : diff + " months ago";
|
|
||||||
} else {
|
|
||||||
var diff = Math.floor(today.getYear() - d.getYear());
|
|
||||||
return (diff == 1) ? "a year ago" : diff + " years ago";
|
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
function isThereNewRelease() {
|
$(function() {
|
||||||
|
var isThereNewRelease = function() {
|
||||||
var thisRevision = GoogleRefineVersion.revision;
|
var thisRevision = GoogleRefineVersion.revision;
|
||||||
|
|
||||||
var revision_pattern = /r([0-9]+)/;
|
var revision_pattern = /r([0-9]+)/;
|
||||||
@ -103,150 +66,9 @@ function isThereNewRelease() {
|
|||||||
var latestRev = parseInt(revision_pattern.exec(GoogleRefineReleases.releases[0].revision)[1],10);
|
var latestRev = parseInt(revision_pattern.exec(GoogleRefineReleases.releases[0].revision)[1],10);
|
||||||
|
|
||||||
return latestRev > thisRev;
|
return latestRev > thisRev;
|
||||||
}
|
|
||||||
|
|
||||||
function fetchProjects() {
|
|
||||||
$.getJSON(
|
|
||||||
"/command/core/get-all-project-metadata",
|
|
||||||
null,
|
|
||||||
function(data) {
|
|
||||||
renderProjects(data);
|
|
||||||
},
|
|
||||||
"json"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function renderProjects(data) {
|
|
||||||
var projects = [];
|
|
||||||
for (var n in data.projects) {
|
|
||||||
if (data.projects.hasOwnProperty(n)) {
|
|
||||||
var project = data.projects[n];
|
|
||||||
project.id = n;
|
|
||||||
project.date = Date.parseExact(project.modified, "yyyy-MM-ddTHH:mm:ssZ");
|
|
||||||
projects.push(project);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
projects.sort(function(a, b) { return b.date.getTime() - a.date.getTime(); });
|
|
||||||
|
|
||||||
var container = $("#projects-container").empty();
|
|
||||||
if (!projects.length) {
|
|
||||||
$("#no-project-message").clone().show().appendTo(container);
|
|
||||||
} else {
|
|
||||||
var table = $(
|
|
||||||
'<table class="list-table"><tr>' +
|
|
||||||
'<th>Name</th>' +
|
|
||||||
'<th></th>' +
|
|
||||||
'<th></th>' +
|
|
||||||
'<th align="right">Last modified</th>' +
|
|
||||||
'</tr></table>'
|
|
||||||
).appendTo(container)[0];
|
|
||||||
|
|
||||||
var renderProject = function(project) {
|
|
||||||
var tr = table.insertRow(table.rows.length);
|
|
||||||
tr.className = "project";
|
|
||||||
|
|
||||||
var nameLink = $('<a></a>')
|
|
||||||
.addClass("list-table-itemname")
|
|
||||||
.text(project.name)
|
|
||||||
.attr("href", "/project?project=" + project.id)
|
|
||||||
.appendTo(tr.insertCell(tr.cells.length));
|
|
||||||
|
|
||||||
var renameLink = $('<a></a>')
|
|
||||||
.text("rename")
|
|
||||||
.addClass("secondary")
|
|
||||||
.attr("href", "javascript:{}")
|
|
||||||
.css("visibility", "hidden")
|
|
||||||
.click(function() {
|
|
||||||
var name = window.prompt("New project name:", project.name);
|
|
||||||
if (name == null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
name = $.trim(name);
|
|
||||||
if (project.name == name || name.length == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
$.ajax({
|
|
||||||
type: "POST",
|
|
||||||
url: "/command/core/rename-project",
|
|
||||||
data: { "project" : project.id, "name" : name },
|
|
||||||
dataType: "json",
|
|
||||||
success: function (data) {
|
|
||||||
if (data && typeof data.code != 'undefined' && data.code == "ok") {
|
|
||||||
nameLink.text(name);
|
|
||||||
} else {
|
|
||||||
alert("Failed to rename project: " + data.message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}).appendTo(tr.insertCell(tr.cells.length));
|
|
||||||
|
|
||||||
var deleteLink = $('<a></a>')
|
|
||||||
.addClass("delete-project")
|
|
||||||
.attr("title","Delete this project")
|
|
||||||
.attr("href","")
|
|
||||||
.css("visibility", "hidden")
|
|
||||||
.html("<img src='/images/close.png' />")
|
|
||||||
.click(function() {
|
|
||||||
if (window.confirm("Are you sure you want to delete project \"" + project.name + "\"?")) {
|
|
||||||
$.ajax({
|
|
||||||
type: "POST",
|
|
||||||
url: "/command/core/delete-project",
|
|
||||||
data: { "project" : project.id },
|
|
||||||
dataType: "json",
|
|
||||||
success: function (data) {
|
|
||||||
if (data && typeof data.code != 'undefined' && data.code == "ok") {
|
|
||||||
fetchProjects();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}).appendTo(tr.insertCell(tr.cells.length));
|
|
||||||
|
|
||||||
|
|
||||||
$('<div></div>')
|
|
||||||
.html(formatDate(project.date))
|
|
||||||
.addClass("last-modified")
|
|
||||||
.attr("title", project.date.toString())
|
|
||||||
.appendTo(tr.insertCell(tr.cells.length));
|
|
||||||
|
|
||||||
$(tr).mouseenter(function() {
|
|
||||||
renameLink.css("visibility", "visible");
|
|
||||||
deleteLink.css("visibility", "visible");
|
|
||||||
}).mouseleave(function() {
|
|
||||||
renameLink.css("visibility", "hidden");
|
|
||||||
deleteLink.css("visibility", "hidden");
|
|
||||||
});
|
|
||||||
};
|
};
|
||||||
|
|
||||||
for (var i = 0; i < projects.length; i++) {
|
var showVersion = function() {
|
||||||
renderProject(projects[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function showHide(toHide, toShow) {
|
|
||||||
$("#" + toHide).hide();
|
|
||||||
$("#" + toShow).show();
|
|
||||||
}
|
|
||||||
|
|
||||||
function openWorkspaceDir() {
|
|
||||||
$.ajax({
|
|
||||||
type: "POST",
|
|
||||||
url: "/command/core/open-workspace-dir",
|
|
||||||
dataType: "json",
|
|
||||||
success: function (data) {
|
|
||||||
if (data.code != "ok" && "message" in data) {
|
|
||||||
alert(data.message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
var GoogleRefineVersion;
|
|
||||||
function showVersion() {
|
|
||||||
$.getJSON(
|
$.getJSON(
|
||||||
"/command/core/get-version",
|
"/command/core/get-version",
|
||||||
null,
|
null,
|
||||||
@ -264,10 +86,10 @@ function showVersion() {
|
|||||||
if ("releases" in window) {
|
if ("releases" in window) {
|
||||||
if (isThereNewRelease()) {
|
if (isThereNewRelease()) {
|
||||||
var container = $('<div id="notification-container">')
|
var container = $('<div id="notification-container">')
|
||||||
.appendTo(document.body);
|
.appendTo(document.body)
|
||||||
var notification = $('<div id="notification">')
|
var notification = $('<div id="notification">')
|
||||||
.text('New version! ')
|
.text('New version! ')
|
||||||
.appendTo(container);
|
.appendTo(container)
|
||||||
$('<a>')
|
$('<a>')
|
||||||
.addClass('notification-action')
|
.addClass('notification-action')
|
||||||
.attr("href", releases.homepage)
|
.attr("href", releases.homepage)
|
||||||
@ -281,174 +103,72 @@ function showVersion() {
|
|||||||
window.setTimeout(poll, 1000);
|
window.setTimeout(poll, 1000);
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
|
||||||
|
|
||||||
function renderImportPanel() {
|
|
||||||
var headerContainer = $('#import-panel-tab-headers');
|
|
||||||
var bodyContainer = $('#import-panel-tab-bodies');
|
|
||||||
|
|
||||||
var selectImportSourceTab = function(importSource) {
|
|
||||||
$('.import-panel-tab-body').hide();
|
|
||||||
$('.import-panel-tab-header').removeClass('selected');
|
|
||||||
|
|
||||||
importSource._divBody.show();
|
|
||||||
importSource._divHeader.addClass('selected');
|
|
||||||
importSource._ui.focus();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
var createImportSourceTab = function(importSource) {
|
var resize = function() {
|
||||||
importSource._divBody = $('<div>')
|
var leftPanelWidth = 150;
|
||||||
.addClass('import-panel-tab-body')
|
// px
|
||||||
.appendTo(bodyContainer)
|
var width = $(window).width();
|
||||||
.hide();
|
var height = $(window).height();
|
||||||
|
var headerHeight = $('#header').outerHeight();
|
||||||
|
var panelHeight = height - headerHeight;
|
||||||
|
|
||||||
importSource._divHeader = $('<div>')
|
$('.main-layout-panel')
|
||||||
.addClass('import-panel-tab-header')
|
.css("top", headerHeight + "px")
|
||||||
.text(importSource.label)
|
.css("bottom", "0px")
|
||||||
.appendTo(headerContainer)
|
.css("height", panelHeight + "px")
|
||||||
.click(function() { selectImportSourceTab(importSource); });
|
.css("visibility", "visible");
|
||||||
|
|
||||||
importSource._ui = new importSource.ui(importSource._divBody);
|
$('#left-panel')
|
||||||
|
.css("left", "0px")
|
||||||
|
.css("width", leftPanelWidth + "px");
|
||||||
|
var leftPanelBodyHPaddings = 10;
|
||||||
|
// px
|
||||||
|
var leftPanelBodyVPaddings = 0;
|
||||||
|
// px
|
||||||
|
$('#left-panel-body')
|
||||||
|
.css("margin-left", leftPanelBodyHPaddings + "px")
|
||||||
|
.css("margin-top", leftPanelBodyVPaddings + "px")
|
||||||
|
.css("width", ($('#left-panel').width() - leftPanelBodyHPaddings) + "px")
|
||||||
|
.css("height", ($('#left-panel').height() - leftPanelBodyVPaddings) + "px");
|
||||||
|
|
||||||
|
$('#right-panel')
|
||||||
|
.css("left", leftPanelWidth + "px")
|
||||||
|
.css("width", (width - leftPanelWidth) + "px");
|
||||||
|
|
||||||
|
var rightPanelBodyHPaddings = 5;
|
||||||
|
// px
|
||||||
|
var rightPanelBodyVPaddings = 5;
|
||||||
|
// px
|
||||||
|
$('#right-panel-body')
|
||||||
|
.css("margin-left", rightPanelBodyHPaddings + "px")
|
||||||
|
.css("margin-top", rightPanelBodyVPaddings + "px")
|
||||||
|
.css("width", ($('#right-panel').width() - rightPanelBodyHPaddings) + "px")
|
||||||
|
.css("height", ($('#right-panel').height() - rightPanelBodyVPaddings) + "px");
|
||||||
|
};
|
||||||
|
$(window).bind("resize", resize);
|
||||||
|
window.setTimeout(resize, 50); // for Chrome, give the window some time to layout first
|
||||||
|
|
||||||
|
var renderActionArea = function(actionArea) {
|
||||||
|
actionArea.bodyElmt = $('<div>')
|
||||||
|
.addClass('action-area-tab-body')
|
||||||
|
.appendTo('#right-panel-body');
|
||||||
|
|
||||||
|
actionArea.tabElmt = $('<li>')
|
||||||
|
.addClass('action-area-tab')
|
||||||
|
.text(actionArea.label)
|
||||||
|
.appendTo($('#action-area-tabs'))
|
||||||
|
.click(function() {
|
||||||
|
Refine.selectActionArea(actionArea.id);
|
||||||
|
});
|
||||||
|
|
||||||
|
actionArea.ui = new actionArea.uiClass(actionArea.bodyElmt);
|
||||||
};
|
};
|
||||||
|
|
||||||
for (var i= 0; i < ImportSources.length; i++) {
|
for (var i = 0; i < Refine.actionAreas.length; i++) {
|
||||||
createImportSourceTab(ImportSources[i]);
|
renderActionArea(Refine.actionAreas[i]);
|
||||||
}
|
}
|
||||||
selectImportSourceTab(ImportSources[0]);
|
Refine.selectActionArea('create-project');
|
||||||
}
|
|
||||||
|
|
||||||
function startImportJob(importSource, form, progressMessage) {
|
|
||||||
$.post(
|
|
||||||
"/command/core/create-import-job",
|
|
||||||
null,
|
|
||||||
function(data) {
|
|
||||||
var jobID = data.jobID;
|
|
||||||
|
|
||||||
form.attr("method", "post")
|
|
||||||
.attr("enctype", "multipart/form-data")
|
|
||||||
.attr("accept-charset", "UTF-8")
|
|
||||||
.attr("target", "import-iframe")
|
|
||||||
.attr("action", "/command/core/retrieve-import-content?" + $.param({
|
|
||||||
"jobID" : jobID,
|
|
||||||
"source" : importSource
|
|
||||||
}));
|
|
||||||
|
|
||||||
form[0].submit();
|
|
||||||
|
|
||||||
var start = new Date();
|
|
||||||
var timerID = window.setInterval(function() { pollImportJob(start, jobID, timerID); }, 1000);
|
|
||||||
initializeImportProgressPanel(progressMessage, jobID, timerID);
|
|
||||||
},
|
|
||||||
"json"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function initializeImportProgressPanel(progressMessage, jobID, timerID) {
|
|
||||||
$('#import-progress-message').text(progressMessage);
|
|
||||||
$('#import-progress-bar-body').css("width", "0%");
|
|
||||||
$('#import-progress-message-left').text('Starting');
|
|
||||||
$('#import-progress-message-center').empty();
|
|
||||||
$('#import-progress-message-right').empty();
|
|
||||||
|
|
||||||
$('#import-panel').hide();
|
|
||||||
$('#import-progress-panel').show();
|
|
||||||
|
|
||||||
$('#import-progress-cancel-button').unbind().click(function() {
|
|
||||||
$('#import-panel').show();
|
|
||||||
$('#import-progress-panel').hide();
|
|
||||||
|
|
||||||
// stop the iframe
|
|
||||||
$('#import-iframe')[0].contentWindow.stop();
|
|
||||||
|
|
||||||
// stop the timed polling
|
|
||||||
window.clearInterval(timerID);
|
|
||||||
|
|
||||||
// explicitly cancel the import job
|
|
||||||
$.post("/command/core/cancel-import-job?" + $.param({ "jobID" : jobID }));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function bytesToString(b) {
|
|
||||||
if (b >= 1024 * 1024) {
|
|
||||||
return Math.round(b / (1024 * 1024)) + " MB";
|
|
||||||
} else if (b >= 1024) {
|
|
||||||
return Math.round(b / 1024) + " KB";
|
|
||||||
} else {
|
|
||||||
return b + " bytes";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function pollImportJob(start, jobID, timerID) {
|
|
||||||
$.post(
|
|
||||||
"/command/core/get-import-job-status?" + $.param({ "jobID" : jobID }),
|
|
||||||
null,
|
|
||||||
function(data) {
|
|
||||||
if (data.code == "error") {
|
|
||||||
showImportJobError(data.message);
|
|
||||||
window.clearInterval(timerID);
|
|
||||||
} else if (data.state == "error") {
|
|
||||||
showImportJobError(data.message, data.stack);
|
|
||||||
window.clearInterval(timerID);
|
|
||||||
} else if (data.state == "retrieving") {
|
|
||||||
if (data.progress < 0) {
|
|
||||||
$('#import-progress-message-left').text(bytesToString(data.bytesSaved) + " saved");
|
|
||||||
} else {
|
|
||||||
$('#import-progress-bar-body').css("width", data.progress + "%");
|
|
||||||
$('#import-progress-message-left').text(data.progress + "% saved");
|
|
||||||
}
|
|
||||||
} else if (data.state == "ready") {
|
|
||||||
window.clearInterval(timerID);
|
|
||||||
|
|
||||||
// Just so if the user clicks Back the progress panel won't be showing if the DOM is cached.
|
|
||||||
$('#import-progress-panel').hide();
|
|
||||||
$('#import-panel').show();
|
|
||||||
|
|
||||||
window.location = "/import?" + $.param({ "jobID" : jobID });
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"json"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
function showImportJobError(message, stack) {
|
|
||||||
$('#import-error-message').text(message);
|
|
||||||
$('#import-error-stack').text(stack || 'No technical details.');
|
|
||||||
|
|
||||||
$('#import-progress-panel').hide();
|
|
||||||
$('#import-error-panel').show();
|
|
||||||
|
|
||||||
$('#import-error-ok-button').unbind().click(function() {
|
|
||||||
$('#import-error-panel').hide();
|
|
||||||
$('#import-panel').show();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function onLoad() {
|
|
||||||
renderImportPanel();
|
|
||||||
|
|
||||||
fetchProjects();
|
|
||||||
|
|
||||||
$("#project-file-input").change(function() {
|
|
||||||
if ($("#project-name-input")[0].value.length == 0) {
|
|
||||||
var fileName = this.files[0].fileName;
|
|
||||||
if (fileName) {
|
|
||||||
$("#project-name-input")[0].value = fileName.replace(/\.\w+/, "").replace(/[_-]/g, " ");
|
|
||||||
}
|
|
||||||
$("#project-name-input").focus().select();
|
|
||||||
}
|
|
||||||
}).keypress(function(evt) {
|
|
||||||
if (evt.keyCode == 13) {
|
|
||||||
onClickUploadFileButton();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
$("#upload-file-button").click(onClickUploadFileButton);
|
|
||||||
$("#more-options-link").click(function() {
|
|
||||||
$("#more-options-controls").hide();
|
|
||||||
$("#more-options").show();
|
|
||||||
});
|
|
||||||
|
|
||||||
showVersion();
|
showVersion();
|
||||||
}
|
});
|
||||||
|
|
||||||
$(onLoad);
|
|
||||||
|
@ -0,0 +1,17 @@
|
|||||||
|
<div id="create-project-ui-source-selection" class="relative-frame"><table id="create-project-ui-source-selection-layout">
|
||||||
|
<tr>
|
||||||
|
<td colspan="2" id="create-project-ui-source-selection-message">
|
||||||
|
<h3>Create a project by importing data. What kinds of data files can I import?</h3>
|
||||||
|
<div>TSV, CSV, *SV, Excel (.xls and .xlsx), JSON, XML, RDF as XML, and
|
||||||
|
Google Spreadsheets are all supported. Support for other formats can
|
||||||
|
be added with Refine extensions.
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td id="create-project-ui-source-selection-tabs">
|
||||||
|
<div>Get data from</div>
|
||||||
|
</td>
|
||||||
|
<td id="create-project-ui-source-selection-tab-bodies"></td>
|
||||||
|
</tr>
|
||||||
|
</table></div>
|
145
main/webapp/modules/core/scripts/index/create-project-ui.js
Normal file
145
main/webapp/modules/core/scripts/index/create-project-ui.js
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
Refine.CreateProjectUI = function(elmt) {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._elmt = elmt;
|
||||||
|
this._sourceSelectionUIs = [];
|
||||||
|
this._customPanels = [];
|
||||||
|
this._controllers = [];
|
||||||
|
|
||||||
|
$.post(
|
||||||
|
"/command/core/get-importing-configuration",
|
||||||
|
null,
|
||||||
|
function(data) {
|
||||||
|
Refine.importingConfig = data.config;
|
||||||
|
self._initializeUI();
|
||||||
|
},
|
||||||
|
"json"
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.CreateProjectUI.controllers = [];
|
||||||
|
|
||||||
|
Refine.CreateProjectUI.prototype._initializeUI = function() {
|
||||||
|
this._sourceSelectionElmt =
|
||||||
|
$(DOM.loadHTML("core", "scripts/index/create-project-ui-source-selection.html")).appendTo(this._elmt);
|
||||||
|
|
||||||
|
this._sourceSelectionElmts = DOM.bind(this._sourceSelectionElmt);
|
||||||
|
|
||||||
|
for (var i = 0; i < Refine.CreateProjectUI.controllers.length; i++) {
|
||||||
|
this._controllers.push(new Refine.CreateProjectUI.controllers[i](this));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.CreateProjectUI.prototype.addSourceSelectionUI = function(sourceSelectionUI) {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
var headerContainer = $('#create-project-ui-source-selection-tabs');
|
||||||
|
var bodyContainer = $('#create-project-ui-source-selection-tab-bodies');
|
||||||
|
|
||||||
|
sourceSelectionUI._divBody = $('<div>')
|
||||||
|
.addClass('create-project-ui-source-selection-tab-body')
|
||||||
|
.appendTo(bodyContainer)
|
||||||
|
.hide();
|
||||||
|
|
||||||
|
sourceSelectionUI._divHeader = $('<div>')
|
||||||
|
.addClass('create-project-ui-source-selection-tab')
|
||||||
|
.text(sourceSelectionUI.label)
|
||||||
|
.appendTo(headerContainer)
|
||||||
|
.click(function() { self.selectImportSource(sourceSelectionUI.id); });
|
||||||
|
|
||||||
|
sourceSelectionUI.ui.attachUI(sourceSelectionUI._divBody);
|
||||||
|
|
||||||
|
this._sourceSelectionUIs.push(sourceSelectionUI);
|
||||||
|
|
||||||
|
if (this._sourceSelectionUIs.length == 1) {
|
||||||
|
self.selectImportSource(sourceSelectionUI.id);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.CreateProjectUI.prototype.selectImportSource = function(id) {
|
||||||
|
for (var i = 0; i < this._sourceSelectionUIs.length; i++) {
|
||||||
|
var sourceSelectionUI = this._sourceSelectionUIs[i];
|
||||||
|
if (sourceSelectionUI.id == id) {
|
||||||
|
$('.create-project-ui-source-selection-tab-body').hide();
|
||||||
|
$('.create-project-ui-source-selection-tab').removeClass('selected');
|
||||||
|
|
||||||
|
sourceSelectionUI._divBody.show();
|
||||||
|
sourceSelectionUI._divHeader.addClass('selected');
|
||||||
|
|
||||||
|
sourceSelectionUI.ui.focus();
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.CreateProjectUI.prototype.addCustomPanel = function() {
|
||||||
|
var div = $('<div>')
|
||||||
|
.addClass('create-project-ui-panel')
|
||||||
|
.appendTo(this._elmt);
|
||||||
|
|
||||||
|
var innerDiv = $('<div>')
|
||||||
|
.addClass('relative-frame')
|
||||||
|
.appendTo(div);
|
||||||
|
|
||||||
|
this._customPanels.push(div);
|
||||||
|
|
||||||
|
return innerDiv;
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.CreateProjectUI.prototype.showCustomPanel = function(div) {
|
||||||
|
var parent = div.parent();
|
||||||
|
for (var i = 0; i < this._customPanels.length; i++) {
|
||||||
|
var panel = this._customPanels[i];
|
||||||
|
if (panel[0] === parent[0]) {
|
||||||
|
$('.create-project-ui-panel').css('visibility', 'hidden');
|
||||||
|
this._sourceSelectionElmt.css('visibility', 'hidden');
|
||||||
|
panel.css('visibility', 'visible');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.CreateProjectUI.prototype.showSourceSelectionPanel = function() {
|
||||||
|
$('.create-project-ui-panel').css('visibility', 'hidden');
|
||||||
|
this._sourceSelectionElmt.css('visibility', 'visible');
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.actionAreas.push({
|
||||||
|
id: "create-project",
|
||||||
|
label: "Create Project",
|
||||||
|
uiClass: Refine.CreateProjectUI
|
||||||
|
});
|
@ -0,0 +1,395 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
Refine.DefaultImportingController = function(createProjectUI) {
|
||||||
|
this._createProjectUI = createProjectUI;
|
||||||
|
|
||||||
|
this._progressPanel = createProjectUI.addCustomPanel();
|
||||||
|
this._progressPanel.html(DOM.loadHTML("core", "scripts/index/default-importing-controller/progress-panel.html"));
|
||||||
|
|
||||||
|
this._errorPanel = createProjectUI.addCustomPanel();
|
||||||
|
this._errorPanel.html(DOM.loadHTML("core", "scripts/index/default-importing-controller/error-panel.html"));
|
||||||
|
|
||||||
|
this._fileSelectionPanel = createProjectUI.addCustomPanel();
|
||||||
|
this._parsingPanel = createProjectUI.addCustomPanel();
|
||||||
|
|
||||||
|
for (var i = 0; i < Refine.DefaultImportingController.sources.length; i++) {
|
||||||
|
var sourceSelectionUI = Refine.DefaultImportingController.sources[i];
|
||||||
|
sourceSelectionUI.ui = new sourceSelectionUI.uiClass(this);
|
||||||
|
|
||||||
|
createProjectUI.addSourceSelectionUI(sourceSelectionUI);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Refine.CreateProjectUI.controllers.push(Refine.DefaultImportingController);
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.sources = [];
|
||||||
|
Refine.DefaultImportingController.parserUIs = {};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._startOver = function() {
|
||||||
|
this._disposeFileSelectionPanel();
|
||||||
|
this._disposeFileSelectionPanel();
|
||||||
|
|
||||||
|
delete this._fileSelectionPanelElmts;
|
||||||
|
delete this._parsingPanelElmts;
|
||||||
|
|
||||||
|
delete this._jobID;
|
||||||
|
delete this._job;
|
||||||
|
delete this._extensions;
|
||||||
|
|
||||||
|
delete this._format;
|
||||||
|
delete this._parserOptions;
|
||||||
|
delete this._projectName;
|
||||||
|
|
||||||
|
this._createProjectUI.showSourceSelectionPanel();
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype.startImportJob = function(form, progressMessage, callback) {
|
||||||
|
var self = this;
|
||||||
|
$.post(
|
||||||
|
"/command/core/create-importing-job",
|
||||||
|
null,
|
||||||
|
function(data) {
|
||||||
|
var jobID = self._jobID = data.jobID;
|
||||||
|
|
||||||
|
form.attr("method", "post")
|
||||||
|
.attr("enctype", "multipart/form-data")
|
||||||
|
.attr("accept-charset", "UTF-8")
|
||||||
|
.attr("target", "default-importing-iframe")
|
||||||
|
.attr("action", "/command/core/importing-controller?" + $.param({
|
||||||
|
"controller": "core/default-importing-controller",
|
||||||
|
"jobID": jobID,
|
||||||
|
"subCommand": "load-raw-data"
|
||||||
|
}));
|
||||||
|
form[0].submit();
|
||||||
|
|
||||||
|
var start = new Date();
|
||||||
|
var timerID = window.setInterval(
|
||||||
|
function() {
|
||||||
|
self._pollImportJob(
|
||||||
|
start, jobID, timerID,
|
||||||
|
function(job) {
|
||||||
|
return job.config.hasData;
|
||||||
|
},
|
||||||
|
function(jobID, job) {
|
||||||
|
self._job = job;
|
||||||
|
self._onImportJobReady();
|
||||||
|
if (callback) {
|
||||||
|
callback(jobID, job);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
},
|
||||||
|
1000
|
||||||
|
);
|
||||||
|
self._initializeImportProgressPanel(progressMessage, function() {
|
||||||
|
// stop the iframe
|
||||||
|
$('#default-importing-iframe')[0].contentWindow.stop();
|
||||||
|
|
||||||
|
// stop the timed polling
|
||||||
|
window.clearInterval(timerID);
|
||||||
|
|
||||||
|
// explicitly cancel the import job
|
||||||
|
$.post("/command/core/cancel-importing-job?" + $.param({ "jobID": jobID }));
|
||||||
|
|
||||||
|
self._createProjectUI.showSourceSelectionPanel();
|
||||||
|
});
|
||||||
|
},
|
||||||
|
"json"
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._initializeImportProgressPanel = function(progressMessage, onCancel) {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._createProjectUI.showCustomPanel(this._progressPanel);
|
||||||
|
|
||||||
|
$('#default-importing-progress-message').text(progressMessage);
|
||||||
|
$('#default-importing-progress-bar-body').css("width", "0%");
|
||||||
|
$('#default-importing-progress-message-left').text('Starting');
|
||||||
|
$('#default-importing-progress-message-center').empty();
|
||||||
|
$('#default-importing-progress-message-right').empty();
|
||||||
|
$('#default-importing-progress-timing').empty();
|
||||||
|
|
||||||
|
$('#default-importing-progress-cancel-button').unbind().click(onCancel);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._pollImportJob = function(start, jobID, timerID, checkDone, callback) {
|
||||||
|
var self = this;
|
||||||
|
$.post(
|
||||||
|
"/command/core/get-importing-job-status?" + $.param({ "jobID": jobID }),
|
||||||
|
null,
|
||||||
|
function(data) {
|
||||||
|
if (!(data)) {
|
||||||
|
self._showImportJobError("Unknown error");
|
||||||
|
window.clearInterval(timerID);
|
||||||
|
return;
|
||||||
|
} else if (data.code == "error" || !("job" in data)) {
|
||||||
|
self._showImportJobError(data.message || "Unknown error");
|
||||||
|
window.clearInterval(timerID);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var job = data.job;
|
||||||
|
if (checkDone(job)) {
|
||||||
|
$('#default-importing-progress-message').text('Done.');
|
||||||
|
|
||||||
|
window.clearInterval(timerID);
|
||||||
|
if (callback) {
|
||||||
|
callback(jobID, job);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
var progress = job.config.progress;
|
||||||
|
if (progress.percent > 0) {
|
||||||
|
var secondsSpent = (new Date().getTime() - start.getTime()) / 1000;
|
||||||
|
var secondsRemaining = (100 / progress.percent) * secondsSpent - secondsSpent;
|
||||||
|
|
||||||
|
$('#default-importing-progress-bar-body')
|
||||||
|
.removeClass('indefinite')
|
||||||
|
.css("width", progress.percent + "%");
|
||||||
|
|
||||||
|
if (secondsRemaining > 1) {
|
||||||
|
if (secondsRemaining > 60) {
|
||||||
|
$('#default-importing-progress-timing').text(
|
||||||
|
Math.ceil(secondsRemaining / 60) + " minutes remaining");
|
||||||
|
} else {
|
||||||
|
$('#default-importing-progress-timing').text(
|
||||||
|
Math.ceil(secondsRemaining) + " seconds remaining");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$('#default-importing-progress-timing').text('almost done ...');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$('#default-importing-progress-bar-body').addClass('indefinite');
|
||||||
|
$('#default-importing-progress-timing').empty();
|
||||||
|
}
|
||||||
|
$('#default-importing-progress-message').text(progress.message);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"json"
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._showImportJobError = function(message, stack) {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
$('#default-importing-error-message').text(message);
|
||||||
|
$('#default-importing-error-stack').text(stack || 'No technical details.');
|
||||||
|
|
||||||
|
this._createProjectUI.showCustomPanel(this._errorPanel);
|
||||||
|
$('#default-importing-error-ok-button').unbind().click(function() {
|
||||||
|
self._createProjectUI.showSourceSelectionPanel();
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._onImportJobReady = function() {
|
||||||
|
this._prepareData();
|
||||||
|
if (this._job.config.retrievalRecord.files.length > 1) {
|
||||||
|
this._showFileSelectionPanel();
|
||||||
|
} else {
|
||||||
|
this._showParsingPanel(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._prepareData = function() {
|
||||||
|
var extensionMap = {};
|
||||||
|
var extensionList = [];
|
||||||
|
|
||||||
|
var files = this._job.config.retrievalRecord.files;
|
||||||
|
var fileSelection = this._job.config.fileSelection;
|
||||||
|
for (var i = 0; i < files.length; i++) {
|
||||||
|
var file = files[i];
|
||||||
|
file.selected = false;
|
||||||
|
|
||||||
|
var slash = file.fileName.lastIndexOf('/');
|
||||||
|
var dot = file.fileName.lastIndexOf('.');
|
||||||
|
if (dot > slash + 1) {
|
||||||
|
var extension = file.fileName.substring(dot);
|
||||||
|
if (extension in extensionMap) {
|
||||||
|
extensionMap[extension].count++;
|
||||||
|
} else {
|
||||||
|
extensionMap[extension] = { extension: extension, count: 1 };
|
||||||
|
extensionList.push(extensionMap[extension]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (var i = 0; i < fileSelection.length; i++) {
|
||||||
|
files[fileSelection[i]].selected = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
extensionList.sort(function(a, b) {
|
||||||
|
return b.count - a.count;
|
||||||
|
});
|
||||||
|
this._extensions = extensionList;
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._ensureFormatParserUIHasInitializationData = function(format, onDone) {
|
||||||
|
if (!(format in this._parserOptions)) {
|
||||||
|
var self = this;
|
||||||
|
var dismissBusy = DialogSystem.showBusy("Inspecting selected files ...");
|
||||||
|
$.post(
|
||||||
|
"/command/core/importing-controller?" + $.param({
|
||||||
|
"controller": "core/default-importing-controller",
|
||||||
|
"jobID": this._jobID,
|
||||||
|
"subCommand": "initialize-parser-ui",
|
||||||
|
"format": format
|
||||||
|
}),
|
||||||
|
null,
|
||||||
|
function(data) {
|
||||||
|
dismissBusy();
|
||||||
|
|
||||||
|
if (data.options) {
|
||||||
|
self._parserOptions[format] = data.options;
|
||||||
|
onDone();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"json"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
onDone();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype.updateFormatAndOptions = function(options, callback) {
|
||||||
|
var self = this;
|
||||||
|
$.post(
|
||||||
|
"/command/core/importing-controller?" + $.param({
|
||||||
|
"controller": "core/default-importing-controller",
|
||||||
|
"jobID": this._jobID,
|
||||||
|
"subCommand": "update-format-and-options"
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
"format" : this._format,
|
||||||
|
"options" : JSON.stringify(options)
|
||||||
|
},
|
||||||
|
callback,
|
||||||
|
"json"
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype.getPreviewData = function(callback, numRows) {
|
||||||
|
var self = this;
|
||||||
|
var result = {};
|
||||||
|
|
||||||
|
$.post(
|
||||||
|
"/command/core/get-models?" + $.param({ "importingJobID" : this._jobID }),
|
||||||
|
null,
|
||||||
|
function(data) {
|
||||||
|
for (var n in data) {
|
||||||
|
if (data.hasOwnProperty(n)) {
|
||||||
|
result[n] = data[n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$.post(
|
||||||
|
"/command/core/get-rows?" + $.param({
|
||||||
|
"importingJobID" : self._jobID,
|
||||||
|
"start" : 0,
|
||||||
|
"limit" : numRows || 100 // More than we parse for preview anyway
|
||||||
|
}),
|
||||||
|
null,
|
||||||
|
function(data) {
|
||||||
|
// Un-pool objects
|
||||||
|
for (var r = 0; r < data.rows.length; r++) {
|
||||||
|
var row = data.rows[r];
|
||||||
|
for (var c = 0; c < row.cells.length; c++) {
|
||||||
|
var cell = row.cells[c];
|
||||||
|
if ((cell) && ("r" in cell)) {
|
||||||
|
cell.r = data.pool.recons[cell.r];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.rowModel = data;
|
||||||
|
callback(result);
|
||||||
|
},
|
||||||
|
"jsonp"
|
||||||
|
);
|
||||||
|
},
|
||||||
|
"json"
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._createProject = function() {
|
||||||
|
if ((this._formatParserUI) && this._formatParserUI.confirmReadyToCreateProject()) {
|
||||||
|
var projectName = $.trim(this._parsingPanelElmts.projectNameInput[0].value);
|
||||||
|
if (projectName.length == 0) {
|
||||||
|
window.alert("Please name the project.");
|
||||||
|
this._parsingPanelElmts.focus();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var self = this;
|
||||||
|
var options = this._formatParserUI.getOptions();
|
||||||
|
options.projectName = projectName;
|
||||||
|
$.post(
|
||||||
|
"/command/core/importing-controller?" + $.param({
|
||||||
|
"controller": "core/default-importing-controller",
|
||||||
|
"jobID": this._jobID,
|
||||||
|
"subCommand": "create-project"
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
"format" : this._format,
|
||||||
|
"options" : JSON.stringify(options)
|
||||||
|
},
|
||||||
|
function() {
|
||||||
|
var start = new Date();
|
||||||
|
var timerID = window.setInterval(
|
||||||
|
function() {
|
||||||
|
self._pollImportJob(
|
||||||
|
start,
|
||||||
|
self._jobID,
|
||||||
|
timerID,
|
||||||
|
function(job) {
|
||||||
|
return "projectID" in job.config;
|
||||||
|
},
|
||||||
|
function(jobID, job) {
|
||||||
|
document.location = "project?project=" + job.config.projectID;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
},
|
||||||
|
1000
|
||||||
|
);
|
||||||
|
self._initializeImportProgressPanel("Creating project ...", function() {
|
||||||
|
// stop the timed polling
|
||||||
|
window.clearInterval(timerID);
|
||||||
|
|
||||||
|
// explicitly cancel the import job
|
||||||
|
$.post("/command/core/cancel-importing-job?" + $.param({ "jobID": jobID }));
|
||||||
|
|
||||||
|
self._createProjectUI.showSourceSelectionPanel();
|
||||||
|
});
|
||||||
|
},
|
||||||
|
"json"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
@ -0,0 +1,5 @@
|
|||||||
|
<div id="default-importing-error-panel"><div class="grid-layout layout-normal layout-full"><table>
|
||||||
|
<tr><td id="default-importing-error-message"></td></tr>
|
||||||
|
<tr><td id="default-importing-error-stack"></td></tr>
|
||||||
|
<tr><td><button class="button button-primary" id="default-importing-error-ok-button">OK</button></td></tr>
|
||||||
|
</table></div></div>
|
@ -0,0 +1,36 @@
|
|||||||
|
<div bind="wizardHeader" class="default-importing-wizard-header"><div class="grid-layout layout-tightest layout-full"><table><tr>
|
||||||
|
<td width="1%"><button bind="startOverButton" class="button">« Start Over</button></td>
|
||||||
|
<td width="98%">Select Files to Import</td>
|
||||||
|
<td width="1%"><button bind="nextButton" class="button button-primary">Configure Parsing Opions »</button></td>
|
||||||
|
</tr></table></div></div>
|
||||||
|
|
||||||
|
<div bind="controlPanel" class="default-importing-file-selection-control-panel">
|
||||||
|
<div class="grid-layout layout-full layout-tighter"><table>
|
||||||
|
<tr>
|
||||||
|
<td colspan="3">
|
||||||
|
There are several files available to import.
|
||||||
|
Please select the desired ones.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td width="98%" bind="summary"></td>
|
||||||
|
<td width="1%"><button bind="selectAllButton" class="button">Select All</button></td>
|
||||||
|
<td width="1%"><button bind="unselectAllButton" class="button">Unselect All</button></td>
|
||||||
|
</tr>
|
||||||
|
</table></div>
|
||||||
|
|
||||||
|
<h2>Select by Extension</h2>
|
||||||
|
<div bind="extensionContainer" class="grid-layout layout-full layout-tightest"></div>
|
||||||
|
|
||||||
|
<h2>Select by Regex on File Names</h2>
|
||||||
|
<div class="grid-layout layout-full layout-tighter"><table>
|
||||||
|
<tr><td colspan="3"><input bind="regexInput" style="width: 100%;"/></td></tr>
|
||||||
|
<tr>
|
||||||
|
<td bind="regexSummary"></td>
|
||||||
|
<td width="1%"><button bind="selectRegexButton" class="button">Select</button></td>
|
||||||
|
<td width="1%"><button bind="unselectRegexButton" class="button">Unselect</button></td>
|
||||||
|
</tr>
|
||||||
|
</table></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div bind="filePanel" class="default-importing-file-selection-file-panel"></div>
|
@ -0,0 +1,314 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._showFileSelectionPanel = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._prepareFileSelectionPanel();
|
||||||
|
|
||||||
|
this._fileSelectionPanelElmts.nextButton.click(function() {
|
||||||
|
self._commitFileSelection();
|
||||||
|
});
|
||||||
|
this._renderFileSelectionPanel();
|
||||||
|
this._createProjectUI.showCustomPanel(this._fileSelectionPanel);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._disposeFileSelectionPanel = function() {
|
||||||
|
if (this._fileSelectionPanelResizer) {
|
||||||
|
$(window).unbind("resize", this._fileSelectionPanelResizer);
|
||||||
|
}
|
||||||
|
this._fileSelectionPanel.unbind().empty();
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._prepareFileSelectionPanel = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._fileSelectionPanel.unbind().empty().html(
|
||||||
|
DOM.loadHTML("core", "scripts/index/default-importing-controller/file-selection-panel.html"));
|
||||||
|
|
||||||
|
this._fileSelectionPanelElmts = DOM.bind(this._fileSelectionPanel);
|
||||||
|
this._fileSelectionPanelElmts.startOverButton.click(function() {
|
||||||
|
self._startOver();
|
||||||
|
});
|
||||||
|
|
||||||
|
this._fileSelectionPanelResizer = function() {
|
||||||
|
var elmts = self._fileSelectionPanelElmts;
|
||||||
|
var width = self._fileSelectionPanel.width();
|
||||||
|
var height = self._fileSelectionPanel.height();
|
||||||
|
var headerHeight = elmts.wizardHeader.outerHeight(true);
|
||||||
|
var controlPanelWidth = 350;
|
||||||
|
|
||||||
|
elmts.controlPanel
|
||||||
|
.css("left", "0px")
|
||||||
|
.css("top", headerHeight + "px")
|
||||||
|
.css("width", (controlPanelWidth - DOM.getHPaddings(elmts.controlPanel)) + "px")
|
||||||
|
.css("height", (height - headerHeight - DOM.getVPaddings(elmts.controlPanel)) + "px");
|
||||||
|
|
||||||
|
elmts.filePanel
|
||||||
|
.css("left", controlPanelWidth + "px")
|
||||||
|
.css("top", headerHeight + "px")
|
||||||
|
.css("width", (width - controlPanelWidth - DOM.getHPaddings(elmts.filePanel)) + "px")
|
||||||
|
.css("height", (height - headerHeight - DOM.getVPaddings(elmts.filePanel)) + "px");
|
||||||
|
};
|
||||||
|
|
||||||
|
$(window).resize(this._fileSelectionPanelResizer);
|
||||||
|
this._fileSelectionPanelResizer();
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._renderFileSelectionPanel = function() {
|
||||||
|
this._renderFileSelectionPanelFileTable();
|
||||||
|
this._renderFileSelectionPanelControlPanel();
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._renderFileSelectionPanelFileTable = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._fileSelectionPanelElmts.filePanel.empty();
|
||||||
|
|
||||||
|
var fileTable = $('<table><tr><th></th><th>Name</th><th>Mime-type</th><th>Format</th><th>Size</th></tr></table>')
|
||||||
|
.appendTo(this._fileSelectionPanelElmts.filePanel)[0];
|
||||||
|
|
||||||
|
var files = this._job.config.retrievalRecord.files;
|
||||||
|
var renderFile = function(fileRecord, index) {
|
||||||
|
var tr = fileTable.insertRow(fileTable.rows.length);
|
||||||
|
$(tr).addClass(index % 2 == 0 ? 'even' : 'odd');
|
||||||
|
|
||||||
|
var tdSelect = $('<td>').appendTo(tr);
|
||||||
|
var checkbox = $('<input>')
|
||||||
|
.attr("type", "checkbox")
|
||||||
|
.attr("index", index)
|
||||||
|
.appendTo(tdSelect)
|
||||||
|
.click(function() {
|
||||||
|
files[index].selected = this.checked;
|
||||||
|
self._updateFileSelectionSummary();
|
||||||
|
});
|
||||||
|
if (fileRecord.selected) {
|
||||||
|
checkbox.attr("checked", "checked");
|
||||||
|
}
|
||||||
|
|
||||||
|
$('<td>').text(fileRecord.fileName).addClass("default-importing-file-selection-filename").appendTo(tr);
|
||||||
|
$('<td>').text(fileRecord.declaredMimeType || fileRecord.mimeType || "unknown").appendTo(tr);
|
||||||
|
$('<td>').text(fileRecord.format || "unknown").appendTo(tr);
|
||||||
|
$('<td>').text(fileRecord.size + " bytes").appendTo(tr);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (var i = 0; i < files.length; i++) {
|
||||||
|
renderFile(files[i], i);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._renderFileSelectionPanelControlPanel = function() {
|
||||||
|
var self = this;
|
||||||
|
var files = this._job.config.retrievalRecord.files;
|
||||||
|
|
||||||
|
this._fileSelectionPanelElmts.extensionContainer.empty();
|
||||||
|
this._fileSelectionPanelElmts.selectAllButton.unbind().click(function(evt) {
|
||||||
|
for (var i = 0; i < files.length; i++) {
|
||||||
|
files[i].selected = true;
|
||||||
|
}
|
||||||
|
self._fileSelectionPanelElmts.filePanel.find("input").attr("checked", "checked");
|
||||||
|
self._updateFileSelectionSummary();
|
||||||
|
});
|
||||||
|
this._fileSelectionPanelElmts.unselectAllButton.unbind().click(function(evt) {
|
||||||
|
for (var i = 0; i < files.length; i++) {
|
||||||
|
files[i].selected = false;
|
||||||
|
}
|
||||||
|
self._fileSelectionPanelElmts.filePanel.find("input").removeAttr("checked");
|
||||||
|
self._updateFileSelectionSummary();
|
||||||
|
});
|
||||||
|
|
||||||
|
var table = $('<table></table>')
|
||||||
|
.appendTo(this._fileSelectionPanelElmts.extensionContainer)[0];
|
||||||
|
|
||||||
|
var renderExtension = function(extension) {
|
||||||
|
var tr = table.insertRow(table.rows.length);
|
||||||
|
$('<td>').text(extension.extension).appendTo(tr);
|
||||||
|
$('<td>').text(extension.count + (extension.count > 1 ? " files" : " file")).appendTo(tr);
|
||||||
|
$('<button>')
|
||||||
|
.text("Select")
|
||||||
|
.addClass("button")
|
||||||
|
.appendTo($('<td>').appendTo(tr))
|
||||||
|
.click(function() {
|
||||||
|
for (var i = 0; i < files.length; i++) {
|
||||||
|
var file = files[i];
|
||||||
|
if (!file.selected) {
|
||||||
|
if (file.fileName.endsWith(extension.extension)) {
|
||||||
|
file.selected = true;
|
||||||
|
self._fileSelectionPanelElmts.filePanel
|
||||||
|
.find("input[index='" + i + "']")
|
||||||
|
.attr("checked", "checked");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self._updateFileSelectionSummary();
|
||||||
|
});
|
||||||
|
$('<button>')
|
||||||
|
.text("Unselect")
|
||||||
|
.addClass("button")
|
||||||
|
.appendTo($('<td>').appendTo(tr))
|
||||||
|
.click(function() {
|
||||||
|
for (var i = 0; i < files.length; i++) {
|
||||||
|
var file = files[i];
|
||||||
|
if (file.selected) {
|
||||||
|
if (file.fileName.endsWith(extension.extension)) {
|
||||||
|
file.selected = false;
|
||||||
|
self._fileSelectionPanelElmts.filePanel
|
||||||
|
.find("input[index='" + i + "']")
|
||||||
|
.removeAttr("checked");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self._updateFileSelectionSummary();
|
||||||
|
});
|
||||||
|
};
|
||||||
|
for (var i = 0; i < this._extensions.length; i++) {
|
||||||
|
renderExtension(this._extensions[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
this._updateFileSelectionSummary();
|
||||||
|
|
||||||
|
this._fileSelectionPanelElmts.regexInput.unbind().keyup(function() {
|
||||||
|
var count = 0;
|
||||||
|
var elmts = self._fileSelectionPanelElmts.filePanel
|
||||||
|
.find(".default-importing-file-selection-filename")
|
||||||
|
.removeClass("highlighted");
|
||||||
|
try {
|
||||||
|
var regex = new RegExp(this.value);
|
||||||
|
elmts.each(function() {
|
||||||
|
if (regex.test($(this).text())) {
|
||||||
|
$(this).addClass("highlighted");
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
self._fileSelectionPanelElmts.regexSummary.text(count + (count == 1 ? " match" : " matches"));
|
||||||
|
});
|
||||||
|
this._fileSelectionPanelElmts.selectRegexButton.unbind().click(function() {
|
||||||
|
self._fileSelectionPanelElmts.filePanel
|
||||||
|
.find(".default-importing-file-selection-filename")
|
||||||
|
.removeClass("highlighted");
|
||||||
|
try {
|
||||||
|
var regex = new RegExp(self._fileSelectionPanelElmts.regexInput[0].value);
|
||||||
|
for (var i = 0; i < files.length; i++) {
|
||||||
|
var file = files[i];
|
||||||
|
if (!file.selected) {
|
||||||
|
if (regex.test(file.fileName)) {
|
||||||
|
file.selected = true;
|
||||||
|
self._fileSelectionPanelElmts.filePanel
|
||||||
|
.find("input[index='" + i + "']")
|
||||||
|
.attr("checked", "checked");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self._updateFileSelectionSummary();
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
});
|
||||||
|
this._fileSelectionPanelElmts.unselectRegexButton.unbind().click(function() {
|
||||||
|
self._fileSelectionPanelElmts.filePanel
|
||||||
|
.find(".default-importing-file-selection-filename")
|
||||||
|
.removeClass("highlighted");
|
||||||
|
try {
|
||||||
|
var regex = new RegExp(self._fileSelectionPanelElmts.regexInput[0].value);
|
||||||
|
for (var i = 0; i < files.length; i++) {
|
||||||
|
var file = files[i];
|
||||||
|
if (file.selected) {
|
||||||
|
if (regex.test(file.fileName)) {
|
||||||
|
file.selected = false;
|
||||||
|
self._fileSelectionPanelElmts.filePanel
|
||||||
|
.find("input[index='" + i + "']")
|
||||||
|
.removeAttr("checked");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self._updateFileSelectionSummary();
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._updateFileSelectionSummary = function() {
|
||||||
|
var fileSelection = [];
|
||||||
|
var files = this._job.config.retrievalRecord.files;
|
||||||
|
for (var i = 0; i < files.length; i++) {
|
||||||
|
if (files[i].selected) {
|
||||||
|
fileSelection.push(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this._job.config.fileSelection = fileSelection;
|
||||||
|
this._fileSelectionPanelElmts.summary.text(fileSelection.length + " of " + files.length + " files selected");
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._commitFileSelection = function() {
|
||||||
|
if (this._job.config.fileSelection.length == 0) {
|
||||||
|
alert("Please select at least one file.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var self = this;
|
||||||
|
var dismissBusy = DialogSystem.showBusy("Inspecting selected files ...");
|
||||||
|
$.post(
|
||||||
|
"/command/core/importing-controller?" + $.param({
|
||||||
|
"controller": "core/default-importing-controller",
|
||||||
|
"jobID": this._jobID,
|
||||||
|
"subCommand": "update-file-selection"
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
"fileSelection" : JSON.stringify(this._job.config.fileSelection)
|
||||||
|
},
|
||||||
|
function(data) {
|
||||||
|
if (!(data)) {
|
||||||
|
self._showImportJobError("Unknown error");
|
||||||
|
window.clearInterval(timerID);
|
||||||
|
return;
|
||||||
|
} else if (data.code == "error" || !("job" in data)) {
|
||||||
|
self._showImportJobError(data.message || "Unknown error");
|
||||||
|
window.clearInterval(timerID);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
dismissBusy();
|
||||||
|
|
||||||
|
// Different files might be selected. We start over again.
|
||||||
|
delete this._parserOptions;
|
||||||
|
|
||||||
|
self._job = data.job;
|
||||||
|
self._showParsingPanel(true);
|
||||||
|
},
|
||||||
|
"json"
|
||||||
|
);
|
||||||
|
};
|
@ -0,0 +1,21 @@
|
|||||||
|
<div bind="wizardHeader" class="default-importing-wizard-header"><div class="grid-layout layout-tightest layout-full"><table><tr>
|
||||||
|
<td width="1%"><button bind="startOverButton" class="button">« Start Over</button></td>
|
||||||
|
<td width="1%"><button bind="previousButton" class="button">« Re-select Files</button></td>
|
||||||
|
<td width="98%">Configure Parsing Options</td>
|
||||||
|
<td style="text-align: right;">Project name</td>
|
||||||
|
<td width="1%"><input class="inline" type="text" size="30" bind="projectNameInput" /></td>
|
||||||
|
<td width="1%"><button bind="nextButton" class="button button-primary">Create Project »</button></td>
|
||||||
|
</tr></table></div></div>
|
||||||
|
|
||||||
|
<div bind="dataPanel" class="default-importing-parsing-data-panel"></div>
|
||||||
|
<div bind="progressPanel" class="default-importing-progress-data-panel">
|
||||||
|
<img src="images/large-spinner.gif" /> Updating preview ...
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div bind="controlPanel" class="default-importing-parsing-control-panel"><table><tr>
|
||||||
|
<td class="default-importing-parsing-control-panel-formats">
|
||||||
|
<div class="default-importing-parsing-control-panel-formats-message">Parse data as</div>
|
||||||
|
<div bind="formatsContainer"></div>
|
||||||
|
</td>
|
||||||
|
<td bind="optionsContainer" class="default-importing-parsing-control-panel-options-panel"></td>
|
||||||
|
</tr></table></div>
|
@ -0,0 +1,186 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._showParsingPanel = function(hasFileSelection) {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
if (!(this._format)) {
|
||||||
|
this._format = this._job.config.rankedFormats[0];
|
||||||
|
}
|
||||||
|
if (!(this._parserOptions)) {
|
||||||
|
this._parserOptions = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
this._prepareParsingPanel();
|
||||||
|
this._parsingPanelElmts.nextButton.click(function() {
|
||||||
|
self._createProject();
|
||||||
|
});
|
||||||
|
if (hasFileSelection) {
|
||||||
|
this._parsingPanelElmts.previousButton.click(function() {
|
||||||
|
self._createProjectUI.showCustomPanel(self._fileSelectionPanel);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
this._parsingPanelElmts.previousButton.hide();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(this._projectName) && this._job.config.fileSelection.length > 0) {
|
||||||
|
var index = this._job.config.fileSelection[0];
|
||||||
|
var record = this._job.config.retrievalRecord.files[index];
|
||||||
|
this._projectName = $.trim(record.fileName.replace(/\W/g, ' ').replace(/\s+/g, ' '));
|
||||||
|
}
|
||||||
|
if (this._projectName) {
|
||||||
|
this._parsingPanelElmts.projectNameInput[0].value = this._projectName;
|
||||||
|
}
|
||||||
|
|
||||||
|
this._createProjectUI.showCustomPanel(this._parsingPanel);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._disposeFileSelectionPanel = function() {
|
||||||
|
this._disposeParserUI();
|
||||||
|
|
||||||
|
if (this._parsingPanelResizer) {
|
||||||
|
$(window).unbind("resize", this._parsingPanelResizer);
|
||||||
|
}
|
||||||
|
|
||||||
|
this._parsingPanel.unbind().empty();
|
||||||
|
delete this._parsingPanelElmts;
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._prepareParsingPanel = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._parsingPanel.unbind().empty().html(
|
||||||
|
DOM.loadHTML("core", "scripts/index/default-importing-controller/parsing-panel.html"));
|
||||||
|
|
||||||
|
this._parsingPanelElmts = DOM.bind(this._parsingPanel);
|
||||||
|
this._parsingPanelElmts.startOverButton.click(function() {
|
||||||
|
self._startOver();
|
||||||
|
});
|
||||||
|
this._parsingPanelElmts.progressPanel.hide();
|
||||||
|
|
||||||
|
this._parsingPanelResizer = function() {console.log("here");
|
||||||
|
var elmts = self._parsingPanelElmts;
|
||||||
|
var width = self._parsingPanel.width();
|
||||||
|
var height = self._parsingPanel.height();
|
||||||
|
var headerHeight = elmts.wizardHeader.outerHeight(true);
|
||||||
|
var controlPanelHeight = 300;
|
||||||
|
|
||||||
|
elmts.dataPanel
|
||||||
|
.css("left", "0px")
|
||||||
|
.css("top", headerHeight + "px")
|
||||||
|
.css("width", (width - DOM.getHPaddings(elmts.dataPanel)) + "px")
|
||||||
|
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.dataPanel)) + "px");
|
||||||
|
elmts.progressPanel
|
||||||
|
.css("left", "0px")
|
||||||
|
.css("top", headerHeight + "px")
|
||||||
|
.css("width", (width - DOM.getHPaddings(elmts.progressPanel)) + "px")
|
||||||
|
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.progressPanel)) + "px");
|
||||||
|
|
||||||
|
elmts.controlPanel
|
||||||
|
.css("left", "0px")
|
||||||
|
.css("top", (height - controlPanelHeight) + "px")
|
||||||
|
.css("width", (width - DOM.getHPaddings(elmts.controlPanel)) + "px")
|
||||||
|
.css("height", (controlPanelHeight - DOM.getVPaddings(elmts.controlPanel)) + "px");
|
||||||
|
};
|
||||||
|
|
||||||
|
$(window).resize(this._parsingPanelResizer);
|
||||||
|
this._parsingPanelResizer();
|
||||||
|
|
||||||
|
var formats = this._job.config.rankedFormats;
|
||||||
|
var createFormatTab = function(format) {
|
||||||
|
var tab = $('<div>')
|
||||||
|
.text(Refine.importingConfig.formats[format].label)
|
||||||
|
.attr("format", format)
|
||||||
|
.addClass("default-importing-parsing-control-panel-format")
|
||||||
|
.appendTo(self._parsingPanelElmts.formatsContainer)
|
||||||
|
.click(function() {
|
||||||
|
self._selectFormat(format);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (format == self._format) {
|
||||||
|
tab.addClass("selected");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
for (var i = 0; i < formats.length; i++) {
|
||||||
|
createFormatTab(formats[i]);
|
||||||
|
}
|
||||||
|
this._selectFormat(this._format);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._disposeParserUI = function() {
|
||||||
|
if (this._formatParserUI) {
|
||||||
|
this._formatParserUI.dispose();
|
||||||
|
delete this._formatParserUI;
|
||||||
|
}
|
||||||
|
if (this._parsingPanelElmts) {
|
||||||
|
this._parsingPanelElmts.optionsContainer.unbind().empty();
|
||||||
|
this._parsingPanelElmts.progressPanel.unbind();
|
||||||
|
this._parsingPanelElmts.dataPanel.unbind().empty();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.DefaultImportingController.prototype._selectFormat = function(newFormat) {
|
||||||
|
if (newFormat == this._format && (this._formatParserUI)) {
|
||||||
|
// The new format is the same as the existing one.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var uiClassName = Refine.importingConfig.formats[newFormat].uiClass;
|
||||||
|
var uiClass = Refine.DefaultImportingController.parserUIs[uiClassName];
|
||||||
|
if (uiClass) {
|
||||||
|
var self = this;
|
||||||
|
this._ensureFormatParserUIHasInitializationData(newFormat, function() {
|
||||||
|
self._disposeParserUI();
|
||||||
|
self._parsingPanelElmts.formatsContainer
|
||||||
|
.find(".default-importing-parsing-control-panel-format")
|
||||||
|
.removeClass("selected")
|
||||||
|
.each(function() {
|
||||||
|
if (this.getAttribute("format") == newFormat) {
|
||||||
|
$(this).addClass("selected");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
self._format = newFormat;
|
||||||
|
self._formatParserUI = new uiClass(
|
||||||
|
self,
|
||||||
|
self._jobID,
|
||||||
|
self._job,
|
||||||
|
self._format,
|
||||||
|
self._parserOptions[newFormat],
|
||||||
|
self._parsingPanelElmts.dataPanel,
|
||||||
|
self._parsingPanelElmts.progressPanel,
|
||||||
|
self._parsingPanelElmts.optionsContainer
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
@ -0,0 +1,13 @@
|
|||||||
|
<div id="default-importing-progress-panel">
|
||||||
|
<div class="grid-layout layout-normal layout-full"><table>
|
||||||
|
<tr><td colspan="3" id="default-importing-progress-message"></td></tr>
|
||||||
|
<tr><td colspan="3">
|
||||||
|
<div id="default-importing-progress-bar-frame"><div id="default-importing-progress-bar-body"></div></div>
|
||||||
|
</td></tr>
|
||||||
|
<tr><td colspan="3">
|
||||||
|
<button class="button" id="default-importing-progress-cancel-button">Cancel</button>
|
||||||
|
<span id="default-importing-progress-timing"></span>
|
||||||
|
</td></tr>
|
||||||
|
</table></div>
|
||||||
|
<iframe id="default-importing-iframe" name="default-importing-iframe"></iframe>
|
||||||
|
</div>
|
@ -0,0 +1,6 @@
|
|||||||
|
<form bind="form"><div class="grid-layout layout-normal"><table>
|
||||||
|
<tr><td>Paste data from clipboard here:</td></tr>
|
||||||
|
<tr><td><textarea bind="textInput" name="clipboard" id="default-importing-clipboard-textarea"></textarea>
|
||||||
|
</td></tr>
|
||||||
|
<tr><td><button bind="nextButton" class="button button-primary" type="button">Next »</button></td></tr>
|
||||||
|
</table></div></form>
|
@ -0,0 +1,5 @@
|
|||||||
|
<form bind="form"><div class="grid-layout layout-normal"><table>
|
||||||
|
<tr><td>Locate one or more files on your computer to upload:</td></tr>
|
||||||
|
<tr><td><input type="file" multiple bind="fileInput" name="upload" /></td></tr>
|
||||||
|
<tr><td><button bind="nextButton" class="button button-primary" type="button">Next »</button></td></tr>
|
||||||
|
</table></div></form>
|
@ -0,0 +1,8 @@
|
|||||||
|
<form bind="form"><div class="grid-layout layout-normal"><table>
|
||||||
|
<tr><td colspan="2">Enter one or more web addresses (URLs) pointing to data to download:</td></tr>
|
||||||
|
<tr bind="urlRow"><td colspan="2"><input bind="urlInput" name="download" class="default-importing-web-url" /></td></tr>
|
||||||
|
<tr bind="buttons">
|
||||||
|
<td width="1"><button bind="addButton" class="button" type="button">Add Another URL</button></td>
|
||||||
|
<td><button bind="nextButton" class="button button-primary" type="button">Next »</button></td>
|
||||||
|
</tr>
|
||||||
|
</table></div></form>
|
@ -0,0 +1,118 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
function ThisComputerImportingSourceUI(controller) {
|
||||||
|
this._controller = controller;
|
||||||
|
}
|
||||||
|
Refine.DefaultImportingController.sources.push({
|
||||||
|
"label": "This Computer",
|
||||||
|
"id": "upload",
|
||||||
|
"uiClass": ThisComputerImportingSourceUI
|
||||||
|
});
|
||||||
|
|
||||||
|
ThisComputerImportingSourceUI.prototype.attachUI = function(bodyDiv) {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
bodyDiv.html(DOM.loadHTML("core", "scripts/index/default-importing-sources/import-from-computer-form.html"));
|
||||||
|
|
||||||
|
this._elmts = DOM.bind(bodyDiv);
|
||||||
|
this._elmts.nextButton.click(function(evt) {
|
||||||
|
if (self._elmts.fileInput[0].files.length === 0) {
|
||||||
|
window.alert("You must specify a data file to import.");
|
||||||
|
} else {
|
||||||
|
self._controller.startImportJob(self._elmts.form, "Uploading data ...");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
ThisComputerImportingSourceUI.prototype.focus = function() {
|
||||||
|
};
|
||||||
|
|
||||||
|
function UrlImportingSourceUI(controller) {
|
||||||
|
this._controller = controller;
|
||||||
|
}
|
||||||
|
Refine.DefaultImportingController.sources.push({
|
||||||
|
"label": "Web Addresses (URLs)",
|
||||||
|
"id": "download",
|
||||||
|
"uiClass": UrlImportingSourceUI
|
||||||
|
});
|
||||||
|
|
||||||
|
UrlImportingSourceUI.prototype.attachUI = function(bodyDiv) {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
bodyDiv.html(DOM.loadHTML("core", "scripts/index/default-importing-sources/import-from-web-form.html"));
|
||||||
|
|
||||||
|
this._elmts = DOM.bind(bodyDiv);
|
||||||
|
this._elmts.nextButton.click(function(evt) {
|
||||||
|
if ($.trim(self._elmts.urlInput[0].value.length) === 0) {
|
||||||
|
window.alert("You must specify a web address (URL) to import.");
|
||||||
|
} else {
|
||||||
|
self._controller.startImportJob(self._elmts.form, "Downloading data ...");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
this._elmts.addButton.click(function(evt) {
|
||||||
|
self._elmts.buttons.before(self._elmts.urlRow.clone());
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
UrlImportingSourceUI.prototype.focus = function() {
|
||||||
|
this._elmts.urlInput.focus();
|
||||||
|
};
|
||||||
|
|
||||||
|
function ClipboardImportingSourceUI(controller) {
|
||||||
|
this._controller = controller;
|
||||||
|
}
|
||||||
|
Refine.DefaultImportingController.sources.push({
|
||||||
|
"label": "Clipboard",
|
||||||
|
"id": "clipboard",
|
||||||
|
"uiClass": ClipboardImportingSourceUI
|
||||||
|
});
|
||||||
|
|
||||||
|
ClipboardImportingSourceUI.prototype.attachUI = function(bodyDiv) {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
bodyDiv.html(DOM.loadHTML("core", "scripts/index/default-importing-sources/import-from-clipboard-form.html"));
|
||||||
|
|
||||||
|
this._elmts = DOM.bind(bodyDiv);
|
||||||
|
this._elmts.nextButton.click(function(evt) {
|
||||||
|
if ($.trim(self._elmts.textInput[0].value).length === 0) {
|
||||||
|
window.alert("You must paste some data to import.");
|
||||||
|
} else {
|
||||||
|
self._controller.startImportJob(self._elmts.form, "Uploading pasted data ...");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
ClipboardImportingSourceUI.prototype.focus = function() {
|
||||||
|
this._elmts.textInput.focus();
|
||||||
|
};
|
@ -1,7 +0,0 @@
|
|||||||
<form bind="form"><div class="grid-layout layout-normal"><table>
|
|
||||||
<tr><td>File to import:</td></tr>
|
|
||||||
<tr><td><input type="file" bind="fileInput" name="project-file" />
|
|
||||||
<input type="hidden" name="project-name" bind="nameInput" />
|
|
||||||
</td></tr>
|
|
||||||
<tr><td><button bind="nextButton" class="button button-primary" type="button">Next »</button></td></tr>
|
|
||||||
</table></div></form>
|
|
@ -0,0 +1,17 @@
|
|||||||
|
<form id="project-upload-form" method="post" enctype="multipart/form-data" action="/command/core/import-project" accept-charset="UTF-8">
|
||||||
|
<div class="grid-layout layout-normal"><table>
|
||||||
|
<tr>
|
||||||
|
<td colspan="2">Locate an existing Google Refine project file (.tar or .tar.gz):</td>
|
||||||
|
<tr>
|
||||||
|
<td>Project file:</th>
|
||||||
|
<td><input type="file" id="project-tar-file-input" name="project-file" /></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Re-name project (optional):</th>
|
||||||
|
<td><input type="text" size="25" id="project-name-input" name="project-name" /></td></tr>
|
||||||
|
<tr>
|
||||||
|
<td></td>
|
||||||
|
<td><input type="submit" value="Import Project" id="import-project-button" class="button button-primary" /></td>
|
||||||
|
</tr>
|
||||||
|
</table></div>
|
||||||
|
</form>
|
45
main/webapp/modules/core/scripts/index/import-project-ui.js
Normal file
45
main/webapp/modules/core/scripts/index/import-project-ui.js
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
Refine.ImportProjectUI = function(elmt) {
|
||||||
|
elmt.html(DOM.loadHTML("core", "scripts/index/import-project-ui.html"));
|
||||||
|
|
||||||
|
this._elmt = elmt;
|
||||||
|
this._elmts = DOM.bind(elmt);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.actionAreas.push({
|
||||||
|
id: "import-project",
|
||||||
|
label: "Import Project",
|
||||||
|
uiClass: Refine.ImportProjectUI
|
||||||
|
});
|
@ -0,0 +1,4 @@
|
|||||||
|
<div class="relative-frame">
|
||||||
|
<div bind="projectsContainer" id="projects-container"></div>
|
||||||
|
<div bind="workspaceControls" id="projects-workspace-controls"><a id="projects-workspace-open" href="javascript:{}" class="secondary">Browse workspace directory</a></div>
|
||||||
|
</div>
|
274
main/webapp/modules/core/scripts/index/open-project-ui.js
Normal file
274
main/webapp/modules/core/scripts/index/open-project-ui.js
Normal file
@ -0,0 +1,274 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
Refine.OpenProjectUI = function(elmt) {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
elmt.html(DOM.loadHTML("core", "scripts/index/open-project-ui.html"));
|
||||||
|
|
||||||
|
this._elmt = elmt;
|
||||||
|
this._elmts = DOM.bind(elmt);
|
||||||
|
|
||||||
|
var resize = function() {
|
||||||
|
var height = elmt.height();
|
||||||
|
var width = elmt.width();
|
||||||
|
var controlsHeight = self._elmts.workspaceControls.outerHeight();
|
||||||
|
self._elmts.projectsContainer
|
||||||
|
.css("height", (height - controlsHeight - DOM.getVPaddings(self._elmts.projectsContainer)) + "px");
|
||||||
|
self._elmts.workspaceControls
|
||||||
|
.css("bottom", "0px")
|
||||||
|
.css("width", (width - DOM.getHPaddings(self._elmts.workspaceControls)) + "px")
|
||||||
|
};
|
||||||
|
$(window).resize(resize);
|
||||||
|
window.setTimeout(resize, 100);
|
||||||
|
|
||||||
|
$("#project-file-input").change(function() {
|
||||||
|
if ($("#project-name-input")[0].value.length == 0) {
|
||||||
|
var fileName = this.files[0].fileName;
|
||||||
|
if (fileName) {
|
||||||
|
$("#project-name-input")[0].value = fileName.replace(/\.\w+/, "").replace(/[_-]/g, " ");
|
||||||
|
}
|
||||||
|
$("#project-name-input").focus().select();
|
||||||
|
}
|
||||||
|
}).keypress(function(evt) {
|
||||||
|
if (evt.keyCode == 13) {
|
||||||
|
return self._onClickUploadFileButton(evt);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
$("#upload-file-button").click(function(evt) {
|
||||||
|
return self._onClickUploadFileButton(evt)
|
||||||
|
});
|
||||||
|
|
||||||
|
$('#projects-workspace-open').click(function() {
|
||||||
|
$.ajax({
|
||||||
|
type: "POST",
|
||||||
|
url: "/command/core/open-workspace-dir",
|
||||||
|
dataType: "json",
|
||||||
|
success: function (data) {
|
||||||
|
if (data.code != "ok" && "message" in data) {
|
||||||
|
alert(data.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
this._fetchProjects();
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.OpenProjectUI.prototype._fetchProjects = function() {
|
||||||
|
var self = this;
|
||||||
|
$.getJSON(
|
||||||
|
"/command/core/get-all-project-metadata",
|
||||||
|
null,
|
||||||
|
function(data) {
|
||||||
|
self._renderProjects(data);
|
||||||
|
},
|
||||||
|
"json"
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.OpenProjectUI.prototype._renderProjects = function(data) {
|
||||||
|
var self = this;
|
||||||
|
var projects = [];
|
||||||
|
for (var n in data.projects) {
|
||||||
|
if (data.projects.hasOwnProperty(n)) {
|
||||||
|
var project = data.projects[n];
|
||||||
|
project.id = n;
|
||||||
|
project.date = Date.parseExact(project.modified, "yyyy-MM-ddTHH:mm:ssZ");
|
||||||
|
projects.push(project);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
projects.sort(function(a, b) { return b.date.getTime() - a.date.getTime(); });
|
||||||
|
|
||||||
|
var container = $("#projects-container").empty();
|
||||||
|
if (!projects.length) {
|
||||||
|
$("#no-project-message").clone().show().appendTo(container);
|
||||||
|
} else {
|
||||||
|
Refine.selectActionArea('open-project');
|
||||||
|
|
||||||
|
var table = $(
|
||||||
|
'<table class="list-table"><tr>' +
|
||||||
|
'<th>Name</th>' +
|
||||||
|
'<th></th>' +
|
||||||
|
'<th></th>' +
|
||||||
|
'<th align="right">Last modified</th>' +
|
||||||
|
'</tr></table>'
|
||||||
|
).appendTo(container)[0];
|
||||||
|
|
||||||
|
var formatDate = function(d) {
|
||||||
|
var d = new Date(d);
|
||||||
|
var last_year = Date.today().add({ years: -1 });
|
||||||
|
var last_month = Date.today().add({ months: -1 });
|
||||||
|
var last_week = Date.today().add({ days: -7 });
|
||||||
|
var today = Date.today();
|
||||||
|
var tomorrow = Date.today().add({ days: 1 });
|
||||||
|
|
||||||
|
if (d.between(today, tomorrow)) {
|
||||||
|
return "today " + d.toString("h:mm tt");
|
||||||
|
} else if (d.between(last_week, today)) {
|
||||||
|
var diff = Math.floor(today.getDayOfYear() - d.getDayOfYear());
|
||||||
|
return (diff <= 1) ? ("yesterday " + d.toString("h:mm tt")) : (diff + " days ago");
|
||||||
|
} else if (d.between(last_month, today)) {
|
||||||
|
var diff = Math.floor((today.getDayOfYear() - d.getDayOfYear()) / 7);
|
||||||
|
return (diff == 1) ? "a week ago" : diff.toFixed(0) + " weeks ago" ;
|
||||||
|
} else if (d.between(last_year, today)) {
|
||||||
|
var diff = Math.floor(today.getMonth() - d.getMonth());
|
||||||
|
return (diff == 1) ? "a month ago" : diff + " months ago";
|
||||||
|
} else {
|
||||||
|
var diff = Math.floor(today.getYear() - d.getYear());
|
||||||
|
return (diff == 1) ? "a year ago" : diff + " years ago";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
var renderProject = function(project) {
|
||||||
|
var tr = table.insertRow(table.rows.length);
|
||||||
|
tr.className = "project";
|
||||||
|
|
||||||
|
var nameLink = $('<a></a>')
|
||||||
|
.addClass("list-table-itemname")
|
||||||
|
.text(project.name)
|
||||||
|
.attr("href", "/project?project=" + project.id)
|
||||||
|
.appendTo(tr.insertCell(tr.cells.length));
|
||||||
|
|
||||||
|
var renameLink = $('<a></a>')
|
||||||
|
.text("rename")
|
||||||
|
.addClass("secondary")
|
||||||
|
.attr("href", "javascript:{}")
|
||||||
|
.css("visibility", "hidden")
|
||||||
|
.click(function() {
|
||||||
|
var name = window.prompt("New project name:", project.name);
|
||||||
|
if (name == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
name = $.trim(name);
|
||||||
|
if (project.name == name || name.length == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$.ajax({
|
||||||
|
type: "POST",
|
||||||
|
url: "/command/core/rename-project",
|
||||||
|
data: { "project" : project.id, "name" : name },
|
||||||
|
dataType: "json",
|
||||||
|
success: function (data) {
|
||||||
|
if (data && typeof data.code != 'undefined' && data.code == "ok") {
|
||||||
|
nameLink.text(name);
|
||||||
|
} else {
|
||||||
|
alert("Failed to rename project: " + data.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}).appendTo(tr.insertCell(tr.cells.length));
|
||||||
|
|
||||||
|
var deleteLink = $('<a></a>')
|
||||||
|
.addClass("delete-project")
|
||||||
|
.attr("title","Delete this project")
|
||||||
|
.attr("href","")
|
||||||
|
.css("visibility", "hidden")
|
||||||
|
.html("<img src='/images/close.png' />")
|
||||||
|
.click(function() {
|
||||||
|
if (window.confirm("Are you sure you want to delete project \"" + project.name + "\"?")) {
|
||||||
|
$.ajax({
|
||||||
|
type: "POST",
|
||||||
|
url: "/command/core/delete-project",
|
||||||
|
data: { "project" : project.id },
|
||||||
|
dataType: "json",
|
||||||
|
success: function (data) {
|
||||||
|
if (data && typeof data.code != 'undefined' && data.code == "ok") {
|
||||||
|
self._fetchProjects();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}).appendTo(tr.insertCell(tr.cells.length));
|
||||||
|
|
||||||
|
|
||||||
|
$('<div></div>')
|
||||||
|
.html(formatDate(project.date))
|
||||||
|
.addClass("last-modified")
|
||||||
|
.attr("title", project.date.toString())
|
||||||
|
.appendTo(tr.insertCell(tr.cells.length));
|
||||||
|
|
||||||
|
$(tr).mouseenter(function() {
|
||||||
|
renameLink.css("visibility", "visible");
|
||||||
|
deleteLink.css("visibility", "visible");
|
||||||
|
}).mouseleave(function() {
|
||||||
|
renameLink.css("visibility", "hidden");
|
||||||
|
deleteLink.css("visibility", "hidden");
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
for (var i = 0; i < projects.length; i++) {
|
||||||
|
renderProject(projects[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.OpenProjectUI.prototype._onClickUploadFileButton = function(evt) {
|
||||||
|
var projectName = $("#project-name-input")[0].value;
|
||||||
|
var dataURL = $.trim($("#project-url-input")[0].value);
|
||||||
|
if (! $.trim(projectName).length) {
|
||||||
|
window.alert("You must specify a project name.");
|
||||||
|
|
||||||
|
} else if ($("#project-file-input")[0].files.length === 0 && ! dataURL.length) {
|
||||||
|
window.alert("You must specify a data file to upload or a URL to retrieve.");
|
||||||
|
|
||||||
|
} else {
|
||||||
|
$("#file-upload-form").attr("action",
|
||||||
|
"/command/core/create-project-from-upload?" + [
|
||||||
|
"url=" + escape(dataURL),
|
||||||
|
"split-into-columns=" + $("#split-into-columns-input")[0].checked,
|
||||||
|
"separator=" + $("#separator-input")[0].value,
|
||||||
|
"ignore=" + $("#ignore-input")[0].value,
|
||||||
|
"header-lines=" + $("#header-lines-input")[0].value,
|
||||||
|
"skip=" + $("#skip-input")[0].value,
|
||||||
|
"limit=" + $("#limit-input")[0].value,
|
||||||
|
"guess-value-type=" + $("#guess-value-type-input")[0].checked,
|
||||||
|
"ignore-quotes=" + $("#ignore-quotes-input")[0].checked
|
||||||
|
].join("&"));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
evt.preventDefault();
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.actionAreas.push({
|
||||||
|
id: "open-project",
|
||||||
|
label: "Open Project",
|
||||||
|
uiClass: Refine.OpenProjectUI
|
||||||
|
});
|
@ -0,0 +1,36 @@
|
|||||||
|
<div class="grid-layout layout-loose layout-full"><table>
|
||||||
|
<tr>
|
||||||
|
<td colspan="2"></td>
|
||||||
|
<td><div class="grid-layout layout-tighter layout-full"><table>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align: right;"> </td>
|
||||||
|
<td width="1%"><button class="button" bind="previewButton">Update Preview</button></td>
|
||||||
|
</tr>
|
||||||
|
</table></div></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><div class="grid-layout layout-tightest"><table bind="sheetRecordContainer">
|
||||||
|
<tr><td colspan="3">Worksheets to Import</td></tr>
|
||||||
|
</table></div></td>
|
||||||
|
|
||||||
|
<td><div class="grid-layout layout-tightest"><table>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="ignoreCheckbox" /></td><td>Ignore first</td>
|
||||||
|
<td><input bind="ignoreInput" type="text" class="lightweight" size="2" value="0" /> line(s) at beginning of file</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="headerLinesCheckbox" /></td><td>Parse next</td>
|
||||||
|
<td><input bind="headerLinesInput" type="text" class="lightweight" size="2" value="1" /> line(s) as column headers</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="skipCheckbox" /></td><td>Discard initial</td>
|
||||||
|
<td><input bind="skipInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
|
||||||
|
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||||
|
</table></div></td>
|
||||||
|
|
||||||
|
<td><div class="grid-layout layout-tightest"><table>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" /></td>
|
||||||
|
<td colspan="2">Store blank rows</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" /></td>
|
||||||
|
<td colspan="2">Store blank cells as nulls</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
|
||||||
|
<td>Store file source<br/>(file names, URLs)<br/>in each row</td></tr>
|
||||||
|
</table></div></td>
|
||||||
|
</tr>
|
||||||
|
</table></div>
|
@ -0,0 +1,198 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
Refine.ExcelParserUI = function(controller, jobID, job, format, config,
|
||||||
|
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
|
||||||
|
this._controller = controller;
|
||||||
|
this._jobID = jobID;
|
||||||
|
this._job = job;
|
||||||
|
this._format = format;
|
||||||
|
this._config = config;
|
||||||
|
|
||||||
|
this._dataContainer = dataContainerElmt;
|
||||||
|
this._progressContainer = progressContainerElmt;
|
||||||
|
this._optionContainer = optionContainerElmt;
|
||||||
|
|
||||||
|
this._timerID = null;
|
||||||
|
this._initialize();
|
||||||
|
this._updatePreview();
|
||||||
|
};
|
||||||
|
Refine.DefaultImportingController.parserUIs["ExcelParserUI"] = Refine.ExcelParserUI;
|
||||||
|
|
||||||
|
Refine.ExcelParserUI.prototype.dispose = function() {
|
||||||
|
if (this._timerID != null) {
|
||||||
|
window.clearTimeout(this._timerID);
|
||||||
|
this._timerID = null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.ExcelParserUI.prototype.confirmReadyToCreateProject = function() {
|
||||||
|
return true; // always ready
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.ExcelParserUI.prototype.getOptions = function() {
|
||||||
|
var options = {
|
||||||
|
xmlBased: this._config.xmlBased,
|
||||||
|
sheets: []
|
||||||
|
};
|
||||||
|
|
||||||
|
var parseIntDefault = function(s, def) {
|
||||||
|
try {
|
||||||
|
var n = parseInt(s);
|
||||||
|
if (!isNaN(n)) {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
return def;
|
||||||
|
};
|
||||||
|
|
||||||
|
this._optionContainerElmts.sheetRecordContainer.find('input').each(function() {
|
||||||
|
if (this.checked) {
|
||||||
|
options.sheets.push(parseInt(this.getAttribute('index')));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (this._optionContainerElmts.ignoreCheckbox[0].checked) {
|
||||||
|
options.ignoreLines = parseIntDefault(this._optionContainerElmts.ignoreInput[0].value, -1);
|
||||||
|
} else {
|
||||||
|
options.ignoreLines = -1;
|
||||||
|
}
|
||||||
|
if (this._optionContainerElmts.headerLinesCheckbox[0].checked) {
|
||||||
|
options.headerLines = parseIntDefault(this._optionContainerElmts.headerLinesInput[0].value, 0);
|
||||||
|
} else {
|
||||||
|
options.headerLines = 0;
|
||||||
|
}
|
||||||
|
if (this._optionContainerElmts.skipCheckbox[0].checked) {
|
||||||
|
options.skipDataLines = parseIntDefault(this._optionContainerElmts.skipInput[0].value, 0);
|
||||||
|
} else {
|
||||||
|
options.skipDataLines = 0;
|
||||||
|
}
|
||||||
|
if (this._optionContainerElmts.limitCheckbox[0].checked) {
|
||||||
|
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
|
||||||
|
} else {
|
||||||
|
options.limit = -1;
|
||||||
|
}
|
||||||
|
options.storeBlankRows = this._optionContainerElmts.storeBlankRowsCheckbox[0].checked;
|
||||||
|
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
||||||
|
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||||
|
|
||||||
|
return options;
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.ExcelParserUI.prototype._initialize = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._optionContainer.unbind().empty().html(
|
||||||
|
DOM.loadHTML("core", "scripts/index/parser-interfaces/excel-parser-ui.html"));
|
||||||
|
this._optionContainerElmts = DOM.bind(this._optionContainer);
|
||||||
|
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
|
||||||
|
|
||||||
|
var sheetTable = this._optionContainerElmts.sheetRecordContainer[0];
|
||||||
|
$.each(this._config.sheetRecords, function(i, v) {
|
||||||
|
var tr = sheetTable.insertRow(sheetTable.rows.length);
|
||||||
|
var td0 = $(tr.insertCell(0)).attr('width', '1%');
|
||||||
|
var checkbox = $('<input>')
|
||||||
|
.attr('type', 'checkbox')
|
||||||
|
.attr('index', i)
|
||||||
|
.appendTo(td0);
|
||||||
|
if (this.selected) {
|
||||||
|
checkbox.attr('checked', 'true');
|
||||||
|
}
|
||||||
|
|
||||||
|
$(tr.insertCell(1)).text(this.name);
|
||||||
|
$(tr.insertCell(2)).text(this.rows + ' rows');
|
||||||
|
});
|
||||||
|
|
||||||
|
if (this._config.ignoreLines > 0) {
|
||||||
|
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
|
||||||
|
this._optionContainerElmts.ignoreInput[0].value = this._config.ignoreLines.toString();
|
||||||
|
}
|
||||||
|
if (this._config.headerLines > 0) {
|
||||||
|
this._optionContainerElmts.headerLinesCheckbox.attr("checked", "checked");
|
||||||
|
this._optionContainerElmts.headerLinesInput[0].value = this._config.headerLines.toString();
|
||||||
|
}
|
||||||
|
if (this._config.limit > 0) {
|
||||||
|
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
|
||||||
|
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||||
|
}
|
||||||
|
if (this._config.skipDataLines > 0) {
|
||||||
|
this._optionContainerElmts.skipCheckbox.attr("checked", "checked");
|
||||||
|
this._optionContainerElmts.skipInput.value[0].value = this._config.skipDataLines.toString();
|
||||||
|
}
|
||||||
|
if (this._config.storeBlankRows) {
|
||||||
|
this._optionContainerElmts.storeBlankRowsCheckbox.attr("checked", "checked");
|
||||||
|
}
|
||||||
|
if (this._config.storeBlankCellsAsNulls) {
|
||||||
|
this._optionContainerElmts.storeBlankCellsAsNullsCheckbox.attr("checked", "checked");
|
||||||
|
}
|
||||||
|
if (this._config.includeFileSources) {
|
||||||
|
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
|
||||||
|
}
|
||||||
|
|
||||||
|
var onChange = function() {
|
||||||
|
self._scheduleUpdatePreview();
|
||||||
|
};
|
||||||
|
this._optionContainer.find("input").bind("change", onChange);
|
||||||
|
this._optionContainer.find("select").bind("change", onChange);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.ExcelParserUI.prototype._scheduleUpdatePreview = function() {
|
||||||
|
if (this._timerID != null) {
|
||||||
|
window.clearTimeout(this._timerID);
|
||||||
|
this._timerID = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var self = this;
|
||||||
|
this._timerID = window.setTimeout(function() {
|
||||||
|
self._timerID = null;
|
||||||
|
self._updatePreview();
|
||||||
|
}, 500); // 0.5 second
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.ExcelParserUI.prototype._updatePreview = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._progressContainer.show();
|
||||||
|
|
||||||
|
this._controller.updateFormatAndOptions(this.getOptions(), function(result) {
|
||||||
|
if (result.status == "ok") {
|
||||||
|
self._controller.getPreviewData(function(projectData) {
|
||||||
|
self._progressContainer.hide();
|
||||||
|
|
||||||
|
new Refine.PreviewTable(projectData, self._dataContainer.unbind().empty());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
@ -0,0 +1,51 @@
|
|||||||
|
<div class="grid-layout layout-loose layout-full"><table>
|
||||||
|
<tr>
|
||||||
|
<td><div class="grid-layout layout-tighter"><table>
|
||||||
|
<tr>
|
||||||
|
<td width="1%">Character encoding</td>
|
||||||
|
<td><select bind="encodingSelect"></select></td>
|
||||||
|
</tr>
|
||||||
|
</table></div></td>
|
||||||
|
<td colspan="2"><div class="grid-layout layout-tighter layout-full"><table>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align: right;"> </td>
|
||||||
|
<td width="1%"><button class="button" bind="previewButton">Update Preview</button></td>
|
||||||
|
</tr>
|
||||||
|
</table></div></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td colspan="3"><div class="grid-layout layout-tightest"><table>
|
||||||
|
<tr><td>Column widths:</td><td><input style="width: 40em;" bind="columnWidthsInput" /></td><td>comma separated numbers</td></tr>
|
||||||
|
<tr><td>Column names:</td><td><input style="width: 40em;" bind="columnNamesInput" /></td><td>optional, comma separated</td></tr>
|
||||||
|
</table></div></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><div class="grid-layout layout-tightest"><table>
|
||||||
|
<tr><td colspan="2">Rows are separated by</td></tr>
|
||||||
|
<tr><td width="1%"><input type="radio" name="row-separator" value="new-line" /></td><td>new line characters \n</td></tr>
|
||||||
|
<tr><td width="1%"><input type="radio" name="row-separator" value="custom" /></td><td>custom
|
||||||
|
<input bind="rowSeparatorInput" type="text" class="lightweight" size="5" /></td></tr>
|
||||||
|
<tr><td colspan="2">Escape special characters with \</td></tr>
|
||||||
|
</table></div></td>
|
||||||
|
<td><div class="grid-layout layout-tightest"><table>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="ignoreCheckbox" /></td><td>Ignore first</td>
|
||||||
|
<td><input bind="ignoreInput" type="text" class="lightweight" size="2" value="0" /> line(s) at beginning of file</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="headerLinesCheckbox" /></td><td>Parse next</td>
|
||||||
|
<td><input bind="headerLinesInput" type="text" class="lightweight" size="2" value="1" /> line(s) as column headers</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="skipCheckbox" /></td><td>Discard initial</td>
|
||||||
|
<td><input bind="skipInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
|
||||||
|
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||||
|
</table></div></td>
|
||||||
|
<td><div class="grid-layout layout-tightest"><table>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" /></td>
|
||||||
|
<td>Parse cell text into<br/>numbers, dates, ...</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" /></td>
|
||||||
|
<td colspan="2">Store blank rows</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" /></td>
|
||||||
|
<td colspan="2">Store blank cells as nulls</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
|
||||||
|
<td>Store file source<br/>(file names, URLs)<br/>in each row</td></tr>
|
||||||
|
</table></div></td>
|
||||||
|
</tr>
|
||||||
|
</table></div>
|
@ -0,0 +1,471 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
Refine.FixedWidthParserUI = function(controller, jobID, job, format, config,
|
||||||
|
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
|
||||||
|
|
||||||
|
this._controller = controller;
|
||||||
|
this._jobID = jobID;
|
||||||
|
this._job = job;
|
||||||
|
this._format = format;
|
||||||
|
this._config = config;
|
||||||
|
|
||||||
|
this._dataContainer = dataContainerElmt;
|
||||||
|
this._progressContainer = progressContainerElmt;
|
||||||
|
this._optionContainer = optionContainerElmt;
|
||||||
|
|
||||||
|
this._timerID = null;
|
||||||
|
this._initialize();
|
||||||
|
this.updatePreview();
|
||||||
|
};
|
||||||
|
Refine.DefaultImportingController.parserUIs["FixedWidthParserUI"] = Refine.FixedWidthParserUI;
|
||||||
|
|
||||||
|
Refine.FixedWidthParserUI.encodeSeparator = function(s) {
|
||||||
|
return s.replace("\\", "\\\\")
|
||||||
|
.replace("\n", "\\n")
|
||||||
|
.replace("\t", "\\t");
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.FixedWidthParserUI.decodeSeparator = function(s) {
|
||||||
|
return s.replace("\\n", "\n")
|
||||||
|
.replace("\\t", "\t")
|
||||||
|
.replace("\\\\", "\\");
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.FixedWidthParserUI.prototype.dispose = function() {
|
||||||
|
if (this._timerID != null) {
|
||||||
|
window.clearTimeout(this._timerID);
|
||||||
|
this._timerID = null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.FixedWidthParserUI.prototype.confirmReadyToCreateProject = function() {
|
||||||
|
return true; // always ready
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.FixedWidthParserUI.prototype.getOptions = function() {
|
||||||
|
var options = {
|
||||||
|
columnWidths: [].concat(this._config.columnWidths)
|
||||||
|
};
|
||||||
|
|
||||||
|
var columnNames = $.trim(this._optionContainerElmts.columnNamesInput[0].value).replace(/,\s+/g, ',').split(',');
|
||||||
|
if (columnNames.length > 0 && columnNames[0].length > 0) {
|
||||||
|
options.columnNames = columnNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (this._optionContainer.find("input[name='row-separator']:checked")[0].value) {
|
||||||
|
case 'new-line':
|
||||||
|
options.lineSeparator = "\n";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
options.lineSeparator = Refine.FixedWidthParserUI.decodeSeparator(
|
||||||
|
this._optionContainerElmts.rowSeparatorInput[0].value);
|
||||||
|
}
|
||||||
|
|
||||||
|
var parseIntDefault = function(s, def) {
|
||||||
|
try {
|
||||||
|
var n = parseInt(s);
|
||||||
|
if (!isNaN(n)) {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore
|
||||||
|
}
|
||||||
|
return def;
|
||||||
|
};
|
||||||
|
if (this._optionContainerElmts.ignoreCheckbox[0].checked) {
|
||||||
|
options.ignoreLines = parseIntDefault(this._optionContainerElmts.ignoreInput[0].value, -1);
|
||||||
|
} else {
|
||||||
|
options.ignoreLines = -1;
|
||||||
|
}
|
||||||
|
if (this._optionContainerElmts.headerLinesCheckbox[0].checked) {
|
||||||
|
options.headerLines = parseIntDefault(this._optionContainerElmts.headerLinesInput[0].value, 0);
|
||||||
|
} else {
|
||||||
|
options.headerLines = 0;
|
||||||
|
}
|
||||||
|
if (this._optionContainerElmts.skipCheckbox[0].checked) {
|
||||||
|
options.skipDataLines = parseIntDefault(this._optionContainerElmts.skipInput[0].value, 0);
|
||||||
|
} else {
|
||||||
|
options.skipDataLines = 0;
|
||||||
|
}
|
||||||
|
if (this._optionContainerElmts.limitCheckbox[0].checked) {
|
||||||
|
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
|
||||||
|
} else {
|
||||||
|
options.limit = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
|
||||||
|
|
||||||
|
options.storeBlankRows = this._optionContainerElmts.storeBlankRowsCheckbox[0].checked;
|
||||||
|
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
||||||
|
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||||
|
|
||||||
|
return options;
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.FixedWidthParserUI.prototype._initialize = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._optionContainer.unbind().empty().html(
|
||||||
|
DOM.loadHTML("core", "scripts/index/parser-interfaces/fixed-width-parser-ui.html"));
|
||||||
|
this._optionContainerElmts = DOM.bind(this._optionContainer);
|
||||||
|
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
|
||||||
|
|
||||||
|
this._optionContainerElmts.columnWidthsInput[0].value = this._config.columnWidths.join(',');
|
||||||
|
if ('columnNames' in this._config) {
|
||||||
|
this._optionContainerElmts.columnNamesInput[0].value = this._config.columnNames.join(',');
|
||||||
|
}
|
||||||
|
|
||||||
|
var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom';
|
||||||
|
this._optionContainer.find(
|
||||||
|
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
|
||||||
|
this._optionContainerElmts.rowSeparatorInput[0].value =
|
||||||
|
Refine.FixedWidthParserUI.encodeSeparator(this._config.lineSeparator);
|
||||||
|
|
||||||
|
if (this._config.ignoreLines > 0) {
|
||||||
|
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
|
||||||
|
this._optionContainerElmts.ignoreInput[0].value = this._config.ignoreLines.toString();
|
||||||
|
}
|
||||||
|
if (this._config.headerLines > 0) {
|
||||||
|
this._optionContainerElmts.headerLinesCheckbox.attr("checked", "checked");
|
||||||
|
this._optionContainerElmts.headerLinesInput[0].value = this._config.headerLines.toString();
|
||||||
|
}
|
||||||
|
if (this._config.limit > 0) {
|
||||||
|
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
|
||||||
|
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||||
|
}
|
||||||
|
if (this._config.skipDataLines > 0) {
|
||||||
|
this._optionContainerElmts.skipCheckbox.attr("checked", "checked");
|
||||||
|
this._optionContainerElmts.skipInput.value[0].value = this._config.skipDataLines.toString();
|
||||||
|
}
|
||||||
|
if (this._config.storeBlankRows) {
|
||||||
|
this._optionContainerElmts.storeBlankRowsCheckbox.attr("checked", "checked");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this._config.guessCellValueTypes) {
|
||||||
|
this._optionContainerElmts.guessCellValueTypesCheckbox.attr("checked", "checked");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this._config.storeBlankCellsAsNulls) {
|
||||||
|
this._optionContainerElmts.storeBlankCellsAsNullsCheckbox.attr("checked", "checked");
|
||||||
|
}
|
||||||
|
if (this._config.includeFileSources) {
|
||||||
|
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
|
||||||
|
}
|
||||||
|
|
||||||
|
var onChange = function() {
|
||||||
|
self._scheduleUpdatePreview();
|
||||||
|
};
|
||||||
|
this._optionContainer.find("input").bind("change", onChange);
|
||||||
|
this._optionContainer.find("select").bind("change", onChange);
|
||||||
|
|
||||||
|
this._optionContainerElmts.columnWidthsInput.bind("change", function() {
|
||||||
|
var newColumnWidths = [];
|
||||||
|
var a = $.trim(this.value).replace(/,\s+/g, ',').split(',');
|
||||||
|
for (var i = 0; i < a.length; i++) {
|
||||||
|
var n = parseInt(a[i]);
|
||||||
|
if (isNaN(n)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
newColumnWidths.push(n);
|
||||||
|
}
|
||||||
|
self._config.columnWidths = newColumnWidths;
|
||||||
|
onChange();
|
||||||
|
});
|
||||||
|
this._optionContainerElmts.columnNamesInput.bind("change", onChange);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.FixedWidthParserUI.prototype._scheduleUpdatePreview = function() {
|
||||||
|
if (this._timerID != null) {
|
||||||
|
window.clearTimeout(this._timerID);
|
||||||
|
this._timerID = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var self = this;
|
||||||
|
this._timerID = window.setTimeout(function() {
|
||||||
|
self._timerID = null;
|
||||||
|
self.updatePreview();
|
||||||
|
}, 500); // 0.5 second
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.FixedWidthParserUI.prototype.updatePreview = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._progressContainer.show();
|
||||||
|
|
||||||
|
var options = this.getOptions();
|
||||||
|
// for preview, we need exact text, so it's easier to show where the columns are split
|
||||||
|
options.guessCellValueTypes = false;
|
||||||
|
|
||||||
|
this._controller.updateFormatAndOptions(options, function(result) {
|
||||||
|
if (result.status == "ok") {
|
||||||
|
self._controller.getPreviewData(function(projectData) {
|
||||||
|
new Refine.FixedWidthPreviewTable(
|
||||||
|
self,
|
||||||
|
self._config,
|
||||||
|
projectData,
|
||||||
|
self._dataContainer
|
||||||
|
);
|
||||||
|
self._progressContainer.hide();
|
||||||
|
}, 20);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.FixedWidthPreviewTable = function(parserUI, config, projectData, elmt) {
|
||||||
|
this._parserUI = parserUI;
|
||||||
|
this._config = config;
|
||||||
|
this._projectData = projectData;
|
||||||
|
this._elmt = elmt;
|
||||||
|
this._render();
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.FixedWidthPreviewTable.prototype._render = function() {
|
||||||
|
var scrollTop = this._elmt[0].scrollTop;
|
||||||
|
var scrollLeft = this._elmt[0].scrollLeft;
|
||||||
|
|
||||||
|
this._elmt.unbind().empty();
|
||||||
|
|
||||||
|
var self = this;
|
||||||
|
var container = $('<div>')
|
||||||
|
.addClass('fixed-width-preview-container')
|
||||||
|
.appendTo(this._elmt);
|
||||||
|
var table = $('<table>')
|
||||||
|
.addClass("data-table")
|
||||||
|
.addClass("fixed-width-preview-data-table")
|
||||||
|
.appendTo(container)[0];
|
||||||
|
|
||||||
|
var columns = this._projectData.columnModel.columns;
|
||||||
|
var columnWidths = [].concat(this._config.columnWidths);
|
||||||
|
|
||||||
|
var addCell = function(tr) {
|
||||||
|
var index = tr.cells.length;
|
||||||
|
var td = tr.insertCell(index);
|
||||||
|
td.className = (index % 2 == 0) ? 'even' : 'odd';
|
||||||
|
return td;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*------------------------------------------------------------
|
||||||
|
* Column Headers
|
||||||
|
*------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
var trHead = table.insertRow(table.rows.length);
|
||||||
|
$(addCell(trHead)).addClass("column-header").html(' '); // index
|
||||||
|
|
||||||
|
var createColumnHeader = function(column, index) {
|
||||||
|
var name = column.name;
|
||||||
|
if (index < columnWidths.length) {
|
||||||
|
name = name.slice(0, columnWidths[index]);
|
||||||
|
}
|
||||||
|
$(addCell(trHead))
|
||||||
|
.addClass("column-header")
|
||||||
|
.text(name)
|
||||||
|
.attr('title', column.name);
|
||||||
|
};
|
||||||
|
for (var i = 0; i < columns.length; i++) {
|
||||||
|
createColumnHeader(columns[i], i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*------------------------------------------------------------
|
||||||
|
* Data Cells
|
||||||
|
*------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
var rows = this._projectData.rowModel.rows;
|
||||||
|
var renderRow = function(tr, r, row) {
|
||||||
|
var tdIndex = addCell(tr);
|
||||||
|
$('<div></div>').html((row.i + 1) + ".").appendTo(tdIndex);
|
||||||
|
|
||||||
|
var cells = row.cells;
|
||||||
|
for (var i = 0; i < columns.length; i++) {
|
||||||
|
var column = columns[i];
|
||||||
|
var td = addCell(tr);
|
||||||
|
var divContent = $('<div/>').addClass("data-table-cell-content").appendTo(td);
|
||||||
|
|
||||||
|
var cell = (column.cellIndex < cells.length) ? cells[column.cellIndex] : null;
|
||||||
|
if (!cell || ("v" in cell && cell.v === null)) {
|
||||||
|
$('<span>').html(" ").appendTo(divContent);
|
||||||
|
} else if ("e" in cell) {
|
||||||
|
$('<span>').addClass("data-table-error").text(cell.e).appendTo(divContent);
|
||||||
|
} else if (!("r" in cell) || !cell.r) {
|
||||||
|
if (typeof cell.v !== "string") {
|
||||||
|
if (typeof cell.v == "number") {
|
||||||
|
divContent.addClass("data-table-cell-content-numeric");
|
||||||
|
}
|
||||||
|
$('<span>')
|
||||||
|
.addClass("data-table-value-nonstring")
|
||||||
|
.text(cell.v)
|
||||||
|
.appendTo(divContent);
|
||||||
|
} else if (URL.looksLikeUrl(cell.v)) {
|
||||||
|
$('<a>')
|
||||||
|
.text(cell.v)
|
||||||
|
.attr("href", cell.v)
|
||||||
|
.attr("target", "_blank")
|
||||||
|
.appendTo(divContent);
|
||||||
|
} else {
|
||||||
|
$('<span>').text(cell.v).appendTo(divContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (var r = 0; r < rows.length; r++) {
|
||||||
|
var row = rows[r];
|
||||||
|
renderRow(table.insertRow(table.rows.length), r, row);
|
||||||
|
}
|
||||||
|
|
||||||
|
var pixelOffset = $(trHead.cells[1]).position().left;
|
||||||
|
var testString = '01234567890123456789012345678901234567890123456789';
|
||||||
|
var testDiv = $('<div>')
|
||||||
|
.css('position', 'absolute')
|
||||||
|
.css('top', '-100px')
|
||||||
|
.text(testString)
|
||||||
|
.appendTo(container);
|
||||||
|
var pixelsPerChar = testDiv.width() / testString.length;
|
||||||
|
testDiv.remove();
|
||||||
|
|
||||||
|
var columnSeparators = [];
|
||||||
|
var columnCharIndexes = [];
|
||||||
|
var positionColumnSeparator = function(outer, charIndex) {
|
||||||
|
outer.css('left',
|
||||||
|
Math.round(pixelOffset + charIndex * pixelsPerChar - DOM.getHPaddings(outer) / 2) + 'px');
|
||||||
|
};
|
||||||
|
var computeCharIndex = function(evt) {
|
||||||
|
var offset = evt.pageX - container.offset().left;
|
||||||
|
return Math.round((offset - pixelOffset) / pixelsPerChar);
|
||||||
|
};
|
||||||
|
var updatePreview = function() {
|
||||||
|
columnCharIndexes.sort(function(a, b) { return a - b; });
|
||||||
|
|
||||||
|
var newColumnWidths = [];
|
||||||
|
for (var i = 0; i < columnCharIndexes.length; i++) {
|
||||||
|
var charIndex = columnCharIndexes[i];
|
||||||
|
var columnWidth = (i == 0) ? charIndex : (charIndex - columnCharIndexes[i - 1]);
|
||||||
|
if (columnWidth > 0) {
|
||||||
|
newColumnWidths.push(columnWidth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self._config.columnWidths = newColumnWidths;
|
||||||
|
self._parserUI._optionContainerElmts.columnWidthsInput[0].value = newColumnWidths.join(',');
|
||||||
|
self._parserUI.updatePreview();
|
||||||
|
};
|
||||||
|
|
||||||
|
var newSeparator = $('<div>')
|
||||||
|
.addClass('fixed-width-preview-column-separator-outer')
|
||||||
|
.append($('<div>').addClass('fixed-width-preview-column-separator-inner'))
|
||||||
|
.appendTo(container);
|
||||||
|
|
||||||
|
var createColumnSeparator = function(charIndex, index) {
|
||||||
|
columnCharIndexes[index] = charIndex;
|
||||||
|
|
||||||
|
var outer = $('<div>')
|
||||||
|
.addClass('fixed-width-preview-column-separator-outer')
|
||||||
|
.appendTo(container);
|
||||||
|
var inner = $('<div>')
|
||||||
|
.addClass('fixed-width-preview-column-separator-inner')
|
||||||
|
.appendTo(outer);
|
||||||
|
var close = $('<div>').appendTo(inner);
|
||||||
|
|
||||||
|
positionColumnSeparator(outer, charIndex);
|
||||||
|
|
||||||
|
outer.mouseover(function() {
|
||||||
|
newSeparator.hide();
|
||||||
|
})
|
||||||
|
.mouseout(function() {
|
||||||
|
newSeparator.show();
|
||||||
|
})
|
||||||
|
.mousedown(function() {
|
||||||
|
var mouseMove = function(evt) {
|
||||||
|
var newCharIndex = computeCharIndex(evt);
|
||||||
|
positionColumnSeparator(outer, newCharIndex);
|
||||||
|
|
||||||
|
evt.preventDefault();
|
||||||
|
evt.stopPropagation();
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
var mouseUp = function(evt) {
|
||||||
|
container.unbind('mousemove', mouseMove);
|
||||||
|
container.unbind('mouseup', mouseUp);
|
||||||
|
|
||||||
|
var newCharIndex = computeCharIndex(evt);
|
||||||
|
positionColumnSeparator(outer, newCharIndex);
|
||||||
|
|
||||||
|
columnCharIndexes[index] = newCharIndex;
|
||||||
|
updatePreview();
|
||||||
|
|
||||||
|
evt.preventDefault();
|
||||||
|
evt.stopPropagation();
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
container.bind('mousemove', mouseMove);
|
||||||
|
container.bind('mouseup', mouseUp);
|
||||||
|
});
|
||||||
|
|
||||||
|
close.click(function() {
|
||||||
|
columnCharIndexes[index] = index > 0 ? columnCharIndexes[index - 1] : 0;
|
||||||
|
updatePreview();
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
var charOffset = 0;
|
||||||
|
for (var i = 0; i < columnWidths.length; i++) {
|
||||||
|
var columnWidth = columnWidths[i];
|
||||||
|
createColumnSeparator(charOffset + columnWidth, i);
|
||||||
|
charOffset += columnWidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
container
|
||||||
|
.mouseout(function(evt) {
|
||||||
|
newSeparator.hide();
|
||||||
|
})
|
||||||
|
.mousemove(function(evt) {
|
||||||
|
var offset = evt.pageX - container.offset().left;
|
||||||
|
var newCharIndex = Math.round((offset - pixelOffset) / pixelsPerChar);
|
||||||
|
positionColumnSeparator(newSeparator.show(), newCharIndex);
|
||||||
|
});
|
||||||
|
newSeparator.mousedown(function(evt) {
|
||||||
|
var newCharIndex = computeCharIndex(evt);
|
||||||
|
columnCharIndexes.push(newCharIndex);
|
||||||
|
updatePreview();
|
||||||
|
|
||||||
|
evt.preventDefault();
|
||||||
|
evt.stopPropagation();
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
this._elmt[0].scrollTop = scrollTop;
|
||||||
|
this._elmt[0].scrollLeft = scrollLeft;
|
||||||
|
};
|
@ -0,0 +1,4 @@
|
|||||||
|
<div class="json-parser-ui-select-message">
|
||||||
|
Click on the first JSON { } node corresponding to the first record to load.
|
||||||
|
</div>
|
||||||
|
<div class="json-parser-ui-select-dom" bind="domContainer"></div>
|
@ -0,0 +1,19 @@
|
|||||||
|
<div class="grid-layout layout-loose layout-full"><table>
|
||||||
|
<tr>
|
||||||
|
<td colspan="2"><div class="grid-layout layout-tighter layout-full"><table>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align: right;"> </td>
|
||||||
|
<td width="1%"><button class="button" bind="pickRecordElementsButton">Pick Record Nodes</button></td>
|
||||||
|
<td width="1%"><button class="button" bind="previewButton">Update Preview</button></td>
|
||||||
|
</tr>
|
||||||
|
</table></div></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><div class="grid-layout layout-tightest"><table>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
|
||||||
|
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> record(s) of data</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
|
||||||
|
<td colspan="2">Store file source (file names, URLs) in each row</td></tr>
|
||||||
|
</table></div></td>
|
||||||
|
</tr>
|
||||||
|
</table></div>
|
@ -0,0 +1,241 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Copyright 2011, Google Inc.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of Google Inc. nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
Refine.JsonParserUI = function(controller, jobID, job, format, config,
|
||||||
|
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
|
||||||
|
|
||||||
|
this._controller = controller;
|
||||||
|
this._jobID = jobID;
|
||||||
|
this._job = job;
|
||||||
|
this._format = format;
|
||||||
|
this._config = config;
|
||||||
|
|
||||||
|
this._dataContainer = dataContainerElmt;
|
||||||
|
this._progressContainer = progressContainerElmt;
|
||||||
|
this._optionContainer = optionContainerElmt;
|
||||||
|
|
||||||
|
this._timerID = null;
|
||||||
|
this._initialize();
|
||||||
|
this._showPickRecordNodesUI();
|
||||||
|
};
|
||||||
|
Refine.DefaultImportingController.parserUIs["JsonParserUI"] = Refine.JsonParserUI;
|
||||||
|
|
||||||
|
Refine.JsonParserUI.prototype.dispose = function() {
|
||||||
|
if (this._timerID != null) {
|
||||||
|
window.clearTimeout(this._timerID);
|
||||||
|
this._timerID = null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.JsonParserUI.prototype.confirmReadyToCreateProject = function() {
|
||||||
|
if ((this._config.recordPath) && this._config.recordPath.length > 0) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
window.alert('Please specify a record path first.');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.JsonParserUI.prototype.getOptions = function() {
|
||||||
|
var options = {
|
||||||
|
recordPath: this._config.recordPath
|
||||||
|
};
|
||||||
|
|
||||||
|
if (this._optionContainerElmts.limitCheckbox[0].checked) {
|
||||||
|
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
|
||||||
|
} else {
|
||||||
|
options.limit = -1;
|
||||||
|
}
|
||||||
|
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||||
|
|
||||||
|
return options;
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.JsonParserUI.prototype._initialize = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._optionContainer.unbind().empty().html(
|
||||||
|
DOM.loadHTML("core", "scripts/index/parser-interfaces/json-parser-ui.html"));
|
||||||
|
this._optionContainerElmts = DOM.bind(this._optionContainer);
|
||||||
|
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
|
||||||
|
|
||||||
|
if (this._config.limit > 0) {
|
||||||
|
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
|
||||||
|
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||||
|
}
|
||||||
|
if (this._config.includeFileSources) {
|
||||||
|
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
|
||||||
|
}
|
||||||
|
this._optionContainerElmts.pickRecordElementsButton.click(function() {
|
||||||
|
self._showPickRecordNodesUI();
|
||||||
|
});
|
||||||
|
|
||||||
|
var onChange = function() {
|
||||||
|
self._scheduleUpdatePreview();
|
||||||
|
};
|
||||||
|
this._optionContainer.find("input").bind("change", onChange);
|
||||||
|
this._optionContainer.find("select").bind("change", onChange);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.JsonParserUI.prototype._showPickRecordNodesUI = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._dataContainer.unbind().empty().html(
|
||||||
|
DOM.loadHTML("core", "scripts/index/parser-interfaces/json-parser-select-ui.html"));
|
||||||
|
|
||||||
|
var elmts = DOM.bind(this._dataContainer);
|
||||||
|
|
||||||
|
var escapeElmt = $('<span>');
|
||||||
|
var escapeHtml = function(s) {
|
||||||
|
escapeElmt.empty().text(s);
|
||||||
|
return escapeElmt.html();
|
||||||
|
};
|
||||||
|
var textAsHtml = function(s) {
|
||||||
|
s = s.length <= 200 ? s : (s.substring(0, 200) + ' ...');
|
||||||
|
return '<span class="text">' + escapeHtml(s) + '</span>';
|
||||||
|
};
|
||||||
|
var hittest = function(evt, elmt) {
|
||||||
|
var a = $(evt.target).closest('.node');
|
||||||
|
return a.length > 0 && a[0] == elmt[0];
|
||||||
|
};
|
||||||
|
var registerEvents = function(elmt, path) {
|
||||||
|
elmt.bind('mouseover', function(evt) {
|
||||||
|
if (hittest(evt, elmt)) {
|
||||||
|
elmts.domContainer.find('.highlight').removeClass('highlight');
|
||||||
|
elmt.addClass('highlight');
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.bind('mouseout', function(evt) {
|
||||||
|
elmt.removeClass('highlight');
|
||||||
|
})
|
||||||
|
.click(function(evt) {
|
||||||
|
if (hittest(evt, elmt)) {
|
||||||
|
self._setRecordPath(path);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
||||||
|
var renderArray = function(a, container, parentPath) {
|
||||||
|
$('<span>').addClass('punctuation').text('[').appendTo(container);
|
||||||
|
|
||||||
|
var parentPath2 = [].concat(parentPath);
|
||||||
|
parentPath2.push('__anonymous__');
|
||||||
|
|
||||||
|
var elementNode = null;
|
||||||
|
for (var i = 0; i < a.length; i++) {
|
||||||
|
if (elementNode != null) {
|
||||||
|
$('<span>').addClass('punctuation').text(',').appendTo(elementNode);
|
||||||
|
}
|
||||||
|
elementNode = $('<div>').addClass('node').addClass('indented').appendTo(container);
|
||||||
|
|
||||||
|
renderNode(a[i], elementNode, parentPath2);
|
||||||
|
}
|
||||||
|
|
||||||
|
$('<span>').addClass('punctuation').text(']').appendTo(container);
|
||||||
|
};
|
||||||
|
var renderObject = function(o, container, parentPath) {
|
||||||
|
$('<span>').addClass('punctuation').text('{').appendTo(container);
|
||||||
|
|
||||||
|
var elementNode = null;
|
||||||
|
for (var key in o) {
|
||||||
|
if (o.hasOwnProperty(key)) {
|
||||||
|
if (elementNode != null) {
|
||||||
|
$('<span>').addClass('punctuation').text(',').appendTo(elementNode);
|
||||||
|
}
|
||||||
|
elementNode = $('<div>').addClass('node').addClass('indented').appendTo(container);
|
||||||
|
|
||||||
|
$('<span>').text(key).addClass('field-name').appendTo(elementNode);
|
||||||
|
$('<span>').text(': ').addClass('punctuation').appendTo(elementNode);
|
||||||
|
|
||||||
|
var parentPath2 = [].concat(parentPath);
|
||||||
|
parentPath2.push(key);
|
||||||
|
|
||||||
|
renderNode(o[key], elementNode, parentPath2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$('<span>').addClass('punctuation').text('}').appendTo(container);
|
||||||
|
|
||||||
|
registerEvents(container, parentPath);
|
||||||
|
};
|
||||||
|
var renderNode = function(node, container, parentPath) {
|
||||||
|
if (node == null) {
|
||||||
|
$('<span>').addClass('literal').text('null').appendTo(container);
|
||||||
|
} else {
|
||||||
|
if ($.isPlainObject(node)) {
|
||||||
|
renderObject(node, container, parentPath);
|
||||||
|
} else if ($.isArray(node)) {
|
||||||
|
renderArray(node, container, parentPath);
|
||||||
|
} else {
|
||||||
|
$('<span>').addClass('literal').text(node.toString()).appendTo(container);
|
||||||
|
registerEvents(container, parentPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
renderNode(this._config.dom, elmts.domContainer, [ '__anonymous__' ]);
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.JsonParserUI.prototype._scheduleUpdatePreview = function() {
|
||||||
|
if (this._timerID != null) {
|
||||||
|
window.clearTimeout(this._timerID);
|
||||||
|
this._timerID = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
var self = this;
|
||||||
|
this._timerID = window.setTimeout(function() {
|
||||||
|
self._timerID = null;
|
||||||
|
self._updatePreview();
|
||||||
|
}, 500); // 0.5 second
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.JsonParserUI.prototype._setRecordPath = function(path) {
|
||||||
|
this._config.recordPath = path;
|
||||||
|
this._updatePreview();
|
||||||
|
};
|
||||||
|
|
||||||
|
Refine.JsonParserUI.prototype._updatePreview = function() {
|
||||||
|
var self = this;
|
||||||
|
|
||||||
|
this._progressContainer.show();
|
||||||
|
|
||||||
|
var options = this.getOptions();
|
||||||
|
// for preview, we need exact text, so it's easier to show where the columns are split
|
||||||
|
options.guessCellValueTypes = false;
|
||||||
|
|
||||||
|
this._controller.updateFormatAndOptions(options, function(result) {
|
||||||
|
if (result.status == "ok") {
|
||||||
|
self._controller.getPreviewData(function(projectData) {
|
||||||
|
self._progressContainer.hide();
|
||||||
|
|
||||||
|
new Refine.PreviewTable(projectData, self._dataContainer.unbind().empty());
|
||||||
|
}, 100);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
};
|
@ -0,0 +1,45 @@
|
|||||||
|
<div class="grid-layout layout-loose layout-full"><table>
|
||||||
|
<tr>
|
||||||
|
<td><div class="grid-layout layout-tighter"><table>
|
||||||
|
<tr>
|
||||||
|
<td width="1%">Character encoding</td>
|
||||||
|
<td><select bind="encodingSelect"></select></td>
|
||||||
|
</tr>
|
||||||
|
</table></div></td>
|
||||||
|
<td colspan="2"><div class="grid-layout layout-tighter layout-full"><table>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align: right;"> </td>
|
||||||
|
<td width="1%"><button class="button" bind="previewButton">Update Preview</button></td>
|
||||||
|
</tr>
|
||||||
|
</table></div></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><div class="grid-layout layout-tightest"><table>
|
||||||
|
<tr><td colspan="2">Parse every <input bind="linesPerRowInput" type="text" class="lightweight" size="2" value="0" />
|
||||||
|
lines into one row
|
||||||
|
</td></tr>
|
||||||
|
|
||||||
|
<tr><td colspan="2">Lines are separated by</td></tr>
|
||||||
|
<tr><td width="1%"><input type="radio" name="row-separator" value="new-line" /></td><td>new line characters \n</td></tr>
|
||||||
|
<tr><td width="1%"><input type="radio" name="row-separator" value="custom" /></td><td>custom
|
||||||
|
<input bind="rowSeparatorInput" type="text" class="lightweight" size="5" /></td></tr>
|
||||||
|
<tr><td colspan="2">Escape special characters with \</td></tr>
|
||||||
|
</table></div></td>
|
||||||
|
<td><div class="grid-layout layout-tightest"><table>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" /></td>
|
||||||
|
<td colspan="2">Store blank rows</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" /></td>
|
||||||
|
<td colspan="2">Store blank cells as nulls</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
|
||||||
|
<td>Store file source<br/>(file names, URLs)<br/>in each row</td></tr>
|
||||||
|
</table></div></td>
|
||||||
|
<td colspan="2"><div class="grid-layout layout-tightest"><table>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="ignoreCheckbox" /></td><td>Ignore first</td>
|
||||||
|
<td><input bind="ignoreInput" type="text" class="lightweight" size="2" value="0" /> line(s) at beginning of file</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="skipCheckbox" /></td><td>Discard initial</td>
|
||||||
|
<td><input bind="skipInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||||
|
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
|
||||||
|
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||||
|
</table></div></td>
|
||||||
|
</tr>
|
||||||
|
</table></div>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user