Merged new importer UI work from branch over.
git-svn-id: http://google-refine.googlecode.com/svn/trunk@2170 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
0fa99d21ca
commit
78edff6f7f
@ -33,8 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
var html = "text/html";
|
||||
var encoding = "UTF-8";
|
||||
var version="0.2"
|
||||
var ClientSideResourceManager = Packages.com.google.refine.ClientSideResourceManager;
|
||||
var version = "0.2";
|
||||
|
||||
/*
|
||||
* Function invoked to initialize the extension.
|
||||
@ -43,21 +42,24 @@ function init() {
|
||||
// Packages.java.lang.System.err.println("Initializing gData extension");
|
||||
// Packages.java.lang.System.err.println(module.getMountPoint());
|
||||
|
||||
Packages.com.google.refine.RefineServlet.registerCommand(
|
||||
module, "authorize", Packages.com.google.refine.extension.gdata.AuthorizeCommand());
|
||||
Packages.com.google.refine.RefineServlet.registerCommand(
|
||||
module, "authorize2", Packages.com.google.refine.extension.gdata.AuthorizeCommand2());
|
||||
Packages.com.google.refine.RefineServlet.registerCommand(
|
||||
module, "deauthorize", Packages.com.google.refine.extension.gdata.DeAuthorizeCommand());
|
||||
var RS = Packages.com.google.refine.RefineServlet;
|
||||
RS.registerCommand(module, "authorize", Packages.com.google.refine.extension.gdata.AuthorizeCommand());
|
||||
RS.registerCommand(module, "authorize2", Packages.com.google.refine.extension.gdata.AuthorizeCommand2());
|
||||
RS.registerCommand(module, "deauthorize", Packages.com.google.refine.extension.gdata.DeAuthorizeCommand());
|
||||
|
||||
// Register importer and exporter
|
||||
Packages.com.google.refine.importers.ImporterRegistry.registerImporter(
|
||||
"gdata-importer", new Packages.com.google.refine.extension.gdata.GDataImporter());
|
||||
var IM = Packages.com.google.refine.importing.ImportingManager;
|
||||
IM.registerFormat("service/gdata", "GData services"); // generic format, no parser to handle it
|
||||
IM.registerFormat("service/gdata/spreadsheet", "Google spreadsheets", false, "GoogleSpreadsheetParserUI",
|
||||
new Packages.com.google.refine.extension.gdata.GDataImporter());
|
||||
IM.registerUrlRewriter(new Packages.com.google.refine.extension.gdata.GDataUrlRewriter())
|
||||
IM.registerUrlRewriter(new Packages.com.google.refine.extension.gdata.FusionTablesUrlRewriter())
|
||||
|
||||
// Packages.com.google.refine.exporters.ExporterRegistry.registerExporter(
|
||||
// "gdata-exporter", new Packages.com.google.refine.extension.gdata.GDataExporter());
|
||||
|
||||
// Script files to inject into /project page
|
||||
var ClientSideResourceManager = Packages.com.google.refine.ClientSideResourceManager;
|
||||
ClientSideResourceManager.addPaths(
|
||||
"project/scripts",
|
||||
module,
|
||||
|
@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright (c) 2010, Thomas F. Morris
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* - Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* Neither the name of Google nor the names of its contributors may be used to
|
||||
* endorse or promote products derived from this software without specific
|
||||
* prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
package com.google.refine.extension.gdata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
|
||||
import com.google.gdata.client.GoogleService;
|
||||
import com.google.gdata.client.Service.GDataRequest;
|
||||
import com.google.gdata.client.Service.GDataRequest.RequestType;
|
||||
import com.google.gdata.util.ContentType;
|
||||
import com.google.gdata.util.ServiceException;
|
||||
import com.google.refine.importing.UrlRewriter;
|
||||
|
||||
/**
|
||||
* @author Tom Morris <tfmorris@gmail.com>
|
||||
* @copyright 2010 Thomas F. Morris
|
||||
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
|
||||
*/
|
||||
public class FusionTablesUrlRewriter implements UrlRewriter {
|
||||
|
||||
@Override
|
||||
public Result rewrite(String urlString) {
|
||||
try {
|
||||
URL url = new URL(urlString);
|
||||
if (isFusionTableURL(url)) {
|
||||
Result result = new Result();
|
||||
try {
|
||||
result.rewrittenUrl = generateQueryUrl(url, 0, -1).toExternalForm();
|
||||
result.format = "text/line-based/*sv";
|
||||
result.download = true;
|
||||
return result;
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// TODO: what do we do here?
|
||||
}
|
||||
}
|
||||
} catch (MalformedURLException e) {
|
||||
// Ignore
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static public boolean isFusionTableURL(URL url) {
|
||||
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
|
||||
String query = url.getQuery();
|
||||
if (query == null) {
|
||||
query = "";
|
||||
}
|
||||
return url.getHost().endsWith(".google.com")
|
||||
&& url.getPath().startsWith("/fusiontables/DataSource")
|
||||
&& query.contains("dsrcid=");
|
||||
}
|
||||
|
||||
static public URL generateQueryUrl(URL url, int start, int limit)
|
||||
throws MalformedURLException, UnsupportedEncodingException {
|
||||
|
||||
String tableId = getFusionTableKey(url);
|
||||
|
||||
final String SERVICE_URL =
|
||||
"http://www.google.com/fusiontables/api/query";
|
||||
final String selectQuery = "select * from " + tableId
|
||||
+ " offset " + (start) + (limit > 0 ? (" limit " + limit) : "");
|
||||
|
||||
return new URL(SERVICE_URL + "?sql=" + URLEncoder.encode(selectQuery, "UTF-8"));
|
||||
}
|
||||
|
||||
static public InputStream openInputStream(URL queryUrl) throws IOException, ServiceException {
|
||||
GoogleService service = new GoogleService("fusiontables", GDataExtension.SERVICE_APP_NAME);
|
||||
// String token = TokenCookie.getToken(request);
|
||||
// if (token != null) {
|
||||
// service.setAuthSubToken(token);
|
||||
// }
|
||||
GDataRequest queryRequest = service.getRequestFactory().getRequest(
|
||||
RequestType.QUERY, queryUrl, ContentType.TEXT_PLAIN);
|
||||
queryRequest.execute();
|
||||
|
||||
return queryRequest.getResponseStream();
|
||||
}
|
||||
|
||||
static private String getFusionTableKey(URL url) {
|
||||
String query = url.getQuery();
|
||||
if (query != null) {
|
||||
String[] parts = query.split("&");
|
||||
for (String part : parts) {
|
||||
if (part.startsWith("dsrcid=")) {
|
||||
int offset = ("dsrcid=").length();
|
||||
String tableId = part.substring(offset);
|
||||
// TODO: Any special id format considerations to worry about?
|
||||
// if (tableId.startsWith("p") || !tableId.contains(".")) {
|
||||
// return tableId;
|
||||
// }
|
||||
return tableId;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2010, Thomas F. Morris
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* - Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* Neither the name of Google nor the names of its contributors may be used to
|
||||
* endorse or promote products derived from this software without specific
|
||||
* prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
package com.google.refine.extension.gdata;
|
||||
|
||||
import com.google.gdata.client.spreadsheet.FeedURLFactory;
|
||||
|
||||
/**
|
||||
* @author Tom Morris <tfmorris@gmail.com>
|
||||
* @copyright 2010 Thomas F. Morris
|
||||
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
|
||||
*/
|
||||
abstract public class GDataExtension {
|
||||
static final String SERVICE_APP_NAME = "Google-Refine-GData-Extension";
|
||||
|
||||
static private FeedURLFactory factory;
|
||||
static public FeedURLFactory getFeedUrlFactory() {
|
||||
if (factory == null) {
|
||||
// Careful - this is shared by everyone.
|
||||
factory = FeedURLFactory.getDefault();
|
||||
}
|
||||
return factory;
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2010,2011. Thomas F. Morris
|
||||
* Copyright (c) 2010, Thomas F. Morris
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -29,281 +29,125 @@
|
||||
package com.google.refine.extension.gdata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.Scanner;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.google.gdata.client.GoogleService;
|
||||
import com.google.gdata.client.Service.GDataRequest;
|
||||
import com.google.gdata.client.Service.GDataRequest.RequestType;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.gdata.client.spreadsheet.CellQuery;
|
||||
import com.google.gdata.client.spreadsheet.FeedURLFactory;
|
||||
import com.google.gdata.client.spreadsheet.SpreadsheetService;
|
||||
import com.google.gdata.data.spreadsheet.Cell;
|
||||
import com.google.gdata.data.spreadsheet.CellEntry;
|
||||
import com.google.gdata.data.spreadsheet.CellFeed;
|
||||
import com.google.gdata.data.spreadsheet.ListEntry;
|
||||
import com.google.gdata.data.spreadsheet.ListFeed;
|
||||
import com.google.gdata.data.spreadsheet.SpreadsheetEntry;
|
||||
import com.google.gdata.data.spreadsheet.SpreadsheetFeed;
|
||||
import com.google.gdata.data.spreadsheet.WorksheetEntry;
|
||||
import com.google.gdata.data.spreadsheet.WorksheetFeed;
|
||||
import com.google.gdata.util.ContentType;
|
||||
import com.google.gdata.util.InvalidEntryException;
|
||||
import com.google.gdata.util.ServiceException;
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.expr.ExpressionUtils;
|
||||
import com.google.refine.importers.ImporterUtilities;
|
||||
import com.google.refine.importers.UrlImporter;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.importers.TabularImportingParserBase;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
/**
|
||||
* Google Refine importer for Google Spreadsheets.
|
||||
* Google Refine parser for Google Spreadsheets.
|
||||
*
|
||||
* @author Tom Morris <tfmorris@gmail.com>
|
||||
* @copyright 2010 Thomas F. Morris
|
||||
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
|
||||
*/
|
||||
public class GDataImporter implements UrlImporter {
|
||||
|
||||
static final String SERVICE_APP_NAME = "Google-Refine-GData-Extension";
|
||||
|
||||
private FeedURLFactory factory;
|
||||
|
||||
public class GDataImporter extends TabularImportingParserBase {
|
||||
public GDataImporter() {
|
||||
// Careful - this constructor is called at server init time
|
||||
// and is shared by everyone.
|
||||
factory = FeedURLFactory.getDefault();
|
||||
super(false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void read(URL url, Project project, ProjectMetadata metadata,
|
||||
Properties options) throws Exception {
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
JSONObject fileRecord,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) throws IOException {
|
||||
String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
||||
String urlString = JSONUtilities.getString(fileRecord, "url", null);
|
||||
URL url = new URL(urlString);
|
||||
|
||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
||||
int limit = ImporterUtilities.getIntegerOption("limit", options, -1);
|
||||
|
||||
// Note: Unlike TSV/CSV importer, we count all rows towards skip, not
|
||||
// just "data" rows
|
||||
int skip = ImporterUtilities.getIntegerOption("skip", options, 0);
|
||||
int dataStart = ignoreLines + headerLines + skip;
|
||||
boolean guessValueType = ImporterUtilities.getBooleanOption(
|
||||
"guess-value-type", options, true);
|
||||
|
||||
// TODO: Put this in a namespace?
|
||||
metadata.setCustomMetadata("source-url", url.toExternalForm());
|
||||
|
||||
// Start fresh for each read so that we're not caching authorization or
|
||||
// anything
|
||||
if (isSpreadsheetURL(url)) {
|
||||
importSpreadsheet(url, project, ignoreLines, headerLines, limit,
|
||||
dataStart, guessValueType);
|
||||
} else if (isFusionTableURL(url)) {
|
||||
importFusionTable(url, project, ignoreLines, headerLines, limit,
|
||||
dataStart, guessValueType);
|
||||
} else {
|
||||
// should never happen (famous last words)
|
||||
throw new IllegalArgumentException(
|
||||
"Got invalid format URL in GDataImporter.read()");
|
||||
}
|
||||
}
|
||||
|
||||
private void importSpreadsheet(URL url, Project project, int ignoreLines,
|
||||
int headerLines, int limit, int dataStart, boolean guessValueType)
|
||||
throws MalformedURLException, IOException, ServiceException,
|
||||
Exception {
|
||||
SpreadsheetService service = new SpreadsheetService(SERVICE_APP_NAME);
|
||||
SpreadsheetService service = new SpreadsheetService(GDataExtension.SERVICE_APP_NAME);
|
||||
// String token = TokenCookie.getToken(request);
|
||||
// if (token != null) {
|
||||
// service.setAuthSubToken(token);
|
||||
// }
|
||||
String spreadsheetKey = getSpreadsheetKey(url);
|
||||
|
||||
int[] sheets = JSONUtilities.getIntArray(options, "sheets");
|
||||
for (int sheetIndex : sheets) {
|
||||
WorksheetEntry worksheet;
|
||||
try {
|
||||
worksheet = getWorksheetEntries(service, spreadsheetKey).get(0);
|
||||
} catch (InvalidEntryException e) {
|
||||
throw new RuntimeException("Failed to open spreadsheet "
|
||||
+ e.getResponseBody(), e);
|
||||
}
|
||||
|
||||
// Create columns
|
||||
List<String> columnHeaders = getColumnHeaders(service, worksheet,
|
||||
ignoreLines, headerLines);
|
||||
|
||||
int columnCount = worksheet.getColCount();
|
||||
project.columnModel.setMaxCellIndex(columnCount);
|
||||
boolean validColumn[] = new boolean[columnCount];
|
||||
int index = 0;
|
||||
for (String name : columnHeaders) {
|
||||
Column column = new Column(index, name + " " + index);
|
||||
project.columnModel.columns.add(column);
|
||||
validColumn[index++] = true;
|
||||
}
|
||||
for (int i = index; index < columnCount; index++) {
|
||||
Column column = new Column(index, "Column " + index);
|
||||
project.columnModel.columns.add(column);
|
||||
validColumn[i] = true;
|
||||
}
|
||||
|
||||
// Create data rows & cells
|
||||
int previousRow = dataStart - 1;
|
||||
int previousCol = -1;
|
||||
List<CellEntry> cellEntries = getCells(service, worksheet, dataStart);
|
||||
Row row = null;
|
||||
for (CellEntry cellEntry : cellEntries) {
|
||||
com.google.gdata.data.spreadsheet.Cell cell = cellEntry.getCell();
|
||||
if (cell == null) {
|
||||
worksheet = getWorksheetEntries(service, spreadsheetKey).get(sheetIndex);
|
||||
} catch (ServiceException e) {
|
||||
exceptions.add(e);
|
||||
continue;
|
||||
}
|
||||
int r = cell.getRow() - 1; // convert from 1-based to 0-based
|
||||
int c = cell.getCol() - 1;
|
||||
|
||||
if (limit > 0 && r > limit) {
|
||||
break;
|
||||
readTable(
|
||||
project,
|
||||
metadata,
|
||||
job,
|
||||
new BatchRowReader(service, worksheet, 20),
|
||||
fileSource + "#" + worksheet.getTitle().getPlainText(),
|
||||
limit,
|
||||
options,
|
||||
exceptions
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle gaps in rows
|
||||
if (r > previousRow) {
|
||||
// Finish and add current row
|
||||
if (row != null) {
|
||||
project.rows.add(row);
|
||||
// project.columnModel.setMaxCellIndex(row.cells.size()); //
|
||||
// TODO: ???
|
||||
static private class BatchRowReader implements TableDataReader {
|
||||
final int batchSize;
|
||||
final SpreadsheetService service;
|
||||
final WorksheetEntry worksheet;
|
||||
final int totalRowCount;
|
||||
|
||||
int nextRow = 0; // 0-based
|
||||
int batchRowStart = -1; // 0-based
|
||||
List<List<Object>> rowsOfCells = null;
|
||||
|
||||
public BatchRowReader(SpreadsheetService service, WorksheetEntry worksheet, int batchSize) {
|
||||
this.service = service;
|
||||
this.worksheet = worksheet;
|
||||
this.batchSize = batchSize;
|
||||
this.totalRowCount = worksheet.getRowCount();
|
||||
}
|
||||
|
||||
// Add empty rows for skipped rows
|
||||
while (previousRow < r - 1) {
|
||||
project.rows.add(new Row(columnCount));
|
||||
previousRow++;
|
||||
@Override
|
||||
public List<Object> getNextRowOfCells() throws IOException {
|
||||
if (rowsOfCells == null || nextRow > batchRowStart + rowsOfCells.size()) {
|
||||
batchRowStart = batchRowStart + (rowsOfCells == null ? 0 : rowsOfCells.size());
|
||||
if (batchRowStart < totalRowCount) {
|
||||
try {
|
||||
rowsOfCells = getRowsOfCells(service, worksheet, batchRowStart + 1, batchSize);
|
||||
} catch (ServiceException e) {
|
||||
rowsOfCells = null;
|
||||
throw new IOException(e);
|
||||
}
|
||||
row = new Row(columnCount);
|
||||
previousRow = r;
|
||||
previousCol = 0;
|
||||
}
|
||||
|
||||
// Add blank cells for any that were skipped before the current one
|
||||
for (int col = previousCol + 1; col < c; col++) {
|
||||
row.cells.add(new Cell("", null));
|
||||
}
|
||||
previousCol = c;
|
||||
|
||||
String s = cell.getValue();
|
||||
if (s != null) {
|
||||
s = s.trim();
|
||||
}
|
||||
if (ExpressionUtils.isNonBlankData(s)) {
|
||||
Serializable value = guessValueType ? ImporterUtilities
|
||||
.parseCellValue(s) : s;
|
||||
row.cells.add(new Cell(value, null));
|
||||
} else {
|
||||
row.cells.add(null);
|
||||
}
|
||||
}
|
||||
// Add last row
|
||||
if (row != null) {
|
||||
project.rows.add(row);
|
||||
rowsOfCells = null;
|
||||
}
|
||||
}
|
||||
|
||||
private void importFusionTable(URL url, Project project, int ignoreLines,
|
||||
int headerLines, int limit, int dataStart, boolean guessValueType)
|
||||
throws MalformedURLException, IOException, ServiceException,
|
||||
Exception {
|
||||
GoogleService service = new GoogleService("fusiontables", SERVICE_APP_NAME);
|
||||
// String token = TokenCookie.getToken(request);
|
||||
// if (token != null) {
|
||||
// service.setAuthSubToken(token);
|
||||
// }
|
||||
String tableId = getFusionTableKey(url);
|
||||
|
||||
final String SERVICE_URL =
|
||||
"http://www.google.com/fusiontables/api/query";
|
||||
final String selectQuery = "select * from " + tableId
|
||||
+ " offset " + (dataStart) + (limit>0 ? (" limit " + limit):"");
|
||||
|
||||
URL queryUrl = new URL(
|
||||
SERVICE_URL + "?sql=" + URLEncoder.encode(selectQuery, "UTF-8"));
|
||||
GDataRequest queryRequest = service.getRequestFactory().getRequest(
|
||||
RequestType.QUERY, queryUrl, ContentType.TEXT_PLAIN);
|
||||
queryRequest.execute();
|
||||
|
||||
Scanner scanner = new Scanner(queryRequest.getResponseStream(),"UTF-8");
|
||||
|
||||
// TODO: Just use the first row of data as column headers for now
|
||||
List<String> columnHeaders = getTableRow(scanner);
|
||||
|
||||
// Create columns
|
||||
int columnCount = columnHeaders.size();
|
||||
project.columnModel.setMaxCellIndex(columnCount);
|
||||
boolean validColumn[] = new boolean[columnCount];
|
||||
int index = 0;
|
||||
for (String name : columnHeaders) {
|
||||
Column column = new Column(index, name + " " + index);
|
||||
project.columnModel.columns.add(column);
|
||||
validColumn[index++] = true;
|
||||
}
|
||||
for (int i = index; index < columnCount; index++) {
|
||||
Column column = new Column(index, "Column " + index);
|
||||
project.columnModel.columns.add(column);
|
||||
validColumn[i] = true;
|
||||
}
|
||||
|
||||
// Create data rows & cells
|
||||
List<String> values = columnHeaders;
|
||||
while (values != null) {
|
||||
Row row = new Row(columnCount);
|
||||
for (String valString : values) {
|
||||
valString = valString.trim();
|
||||
if (ExpressionUtils.isNonBlankData(valString)) {
|
||||
Serializable value = guessValueType ? ImporterUtilities
|
||||
.parseCellValue(valString) : valString;
|
||||
row.cells.add(new Cell(value, null));
|
||||
if (rowsOfCells != null && nextRow - batchRowStart < rowsOfCells.size()) {
|
||||
return rowsOfCells.get(nextRow++ - batchRowStart);
|
||||
} else {
|
||||
row.cells.add(null);
|
||||
}
|
||||
}
|
||||
project.rows.add(row);
|
||||
values = getTableRow(scanner);
|
||||
}
|
||||
}
|
||||
|
||||
private List<String> getTableRow(Scanner scanner) {
|
||||
/**
|
||||
* CSV values are terminated by comma or end-of-line and consist either of
|
||||
* plain text without commas or quotes, or a quoted expression, where inner
|
||||
* quotes are escaped by doubling.
|
||||
*/
|
||||
final Pattern CSV_VALUE_PATTERN =
|
||||
Pattern.compile("([^,\\r\\n\"]*|\"(([^\"]*\"\")*[^\"]*)\")(,|\\r?\\n)");
|
||||
|
||||
if (!scanner.hasNextLine()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<String> result = new ArrayList<String>();
|
||||
while (scanner.hasNextLine()) {
|
||||
scanner.findWithinHorizon(CSV_VALUE_PATTERN, 0);
|
||||
MatchResult match = scanner.match();
|
||||
String quotedString = match.group(2);
|
||||
String decoded = quotedString == null ? match.group(1)
|
||||
: quotedString.replaceAll("\"\"", "\"");
|
||||
result.add(decoded);
|
||||
if (!match.group(4).equals(",")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the spreadsheets that an authenticated user has access to. Not
|
||||
@ -313,130 +157,67 @@ public class GDataImporter implements UrlImporter {
|
||||
* @throws Exception
|
||||
* if error in retrieving the spreadsheet information
|
||||
*/
|
||||
public List<SpreadsheetEntry> getSpreadsheetEntries(
|
||||
SpreadsheetService service) throws Exception {
|
||||
static public List<SpreadsheetEntry> getSpreadsheetEntries(
|
||||
SpreadsheetService service
|
||||
) throws Exception {
|
||||
SpreadsheetFeed feed = service.getFeed(
|
||||
factory.getSpreadsheetsFeedUrl(), SpreadsheetFeed.class);
|
||||
GDataExtension.getFeedUrlFactory().getSpreadsheetsFeedUrl(),
|
||||
SpreadsheetFeed.class);
|
||||
return feed.getEntries();
|
||||
}
|
||||
|
||||
public List<WorksheetEntry> getWorksheetEntries(SpreadsheetService service,
|
||||
String spreadsheetKey) throws MalformedURLException, IOException,
|
||||
ServiceException {
|
||||
WorksheetFeed feed = service
|
||||
.getFeed(factory.getWorksheetFeedUrl(spreadsheetKey, "public",
|
||||
"values"), WorksheetFeed.class);
|
||||
static public List<WorksheetEntry> getWorksheetEntries(
|
||||
SpreadsheetService service, String spreadsheetKey
|
||||
) throws MalformedURLException, IOException, ServiceException {
|
||||
WorksheetFeed feed = service.getFeed(
|
||||
GDataExtension.getFeedUrlFactory().getWorksheetFeedUrl(spreadsheetKey, "public", "values"),
|
||||
WorksheetFeed.class);
|
||||
return feed.getEntries();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the columns headers from the cell feed of the worksheet entry.
|
||||
*
|
||||
* @param worksheet
|
||||
* worksheet entry containing the cell feed in question
|
||||
* @return a list of column headers
|
||||
* @throws Exception
|
||||
* if error in retrieving the spreadsheet information
|
||||
*/
|
||||
public List<String> getColumnHeaders(SpreadsheetService service,
|
||||
WorksheetEntry worksheet, int startRow, int rows) throws Exception {
|
||||
List<String> headers = new ArrayList<String>();
|
||||
|
||||
// Get the appropriate URL for a cell feed
|
||||
static public List<List<Object>> getRowsOfCells(
|
||||
SpreadsheetService service,
|
||||
WorksheetEntry worksheet,
|
||||
int startRow, // 1-based
|
||||
int rowCount
|
||||
) throws IOException, ServiceException {
|
||||
URL cellFeedUrl = worksheet.getCellFeedUrl();
|
||||
|
||||
// Create a query for the cells in the header row(s) (1-based)
|
||||
CellQuery cellQuery = new CellQuery(cellFeedUrl);
|
||||
if (startRow > 0) {
|
||||
cellQuery.setMinimumRow(startRow + 1);
|
||||
}
|
||||
cellQuery.setMaximumRow(startRow + rows);
|
||||
|
||||
// Get the cell feed matching the query
|
||||
CellFeed topRowCellFeed = service.query(cellQuery, CellFeed.class);
|
||||
|
||||
// Get the cell entries from the feed
|
||||
List<CellEntry> cellEntries = topRowCellFeed.getEntries();
|
||||
for (CellEntry entry : cellEntries) {
|
||||
|
||||
// Get the cell element from the entry
|
||||
com.google.gdata.data.spreadsheet.Cell cell = entry.getCell();
|
||||
int r = cell.getRow() - 1;
|
||||
if (cell != null) {
|
||||
if (r == startRow) {
|
||||
headers.add(cell.getValue().trim());
|
||||
} else if (r < startRow + rows) {
|
||||
headers.set(r, headers.get(r) + " "
|
||||
+ cell.getValue().trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
|
||||
public List<CellEntry> getCells(SpreadsheetService service,
|
||||
WorksheetEntry worksheet, int startRow) throws IOException,
|
||||
ServiceException {
|
||||
|
||||
URL cellFeedUrl = worksheet.getCellFeedUrl();
|
||||
|
||||
// Create a query skipping the desired number of rows
|
||||
CellQuery cellQuery = new CellQuery(cellFeedUrl);
|
||||
cellQuery.setMinimumRow(startRow + 1); // 1-based
|
||||
int rows = worksheet.getRowCount();
|
||||
cellQuery.setMaximumRow(rows);
|
||||
// cellQuery.setMinimumCol(1);
|
||||
int minRow = Math.max(1, startRow);
|
||||
int maxRow = Math.min(worksheet.getRowCount(), startRow + rowCount - 1);
|
||||
int rows = maxRow - minRow + 1;
|
||||
int cols = worksheet.getColCount();
|
||||
|
||||
CellQuery cellQuery = new CellQuery(cellFeedUrl);
|
||||
cellQuery.setMinimumRow(minRow);
|
||||
cellQuery.setMaximumRow(maxRow);
|
||||
cellQuery.setMaximumCol(cols);
|
||||
cellQuery.setMaxResults(rows * cols);
|
||||
cellQuery.setReturnEmpty(false);
|
||||
|
||||
CellFeed cellFeed = service.query(cellQuery, CellFeed.class);
|
||||
return cellFeed.getEntries();
|
||||
List<CellEntry> cellEntries = cellFeed.getEntries();
|
||||
|
||||
List<List<Object>> rowsOfCells = new ArrayList<List<Object>>(rows);
|
||||
for (CellEntry cellEntry : cellEntries) {
|
||||
Cell cell = cellEntry.getCell();
|
||||
int row = cell.getRow();
|
||||
int col = cell.getCol();
|
||||
|
||||
while (row > rowsOfCells.size()) {
|
||||
rowsOfCells.add(new ArrayList<Object>(cols));
|
||||
}
|
||||
List<Object> rowOfCells = rowsOfCells.get(row - 1); // 1-based
|
||||
|
||||
while (col > rowOfCells.size()) {
|
||||
rowOfCells.add(null);
|
||||
}
|
||||
rowOfCells.set(col - 1, cell.getValue());
|
||||
}
|
||||
return rowsOfCells;
|
||||
}
|
||||
|
||||
List<ListEntry> getListEntries(SpreadsheetService service,
|
||||
WorksheetEntry worksheet) throws IOException, ServiceException {
|
||||
URL listFeedUrl = worksheet.getListFeedUrl();
|
||||
ListFeed feed = service.getFeed(listFeedUrl, ListFeed.class);
|
||||
return feed.getEntries();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String filename) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canImportData(URL url) {
|
||||
return isSpreadsheetURL(url) || isFusionTableURL(url);
|
||||
}
|
||||
|
||||
private boolean isSpreadsheetURL(URL url) {
|
||||
String host = url.getHost();
|
||||
String query = url.getQuery();
|
||||
if (query == null) {
|
||||
query = "";
|
||||
}
|
||||
// http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
|
||||
return host.endsWith(".google.com")
|
||||
&& host.contains("spreadsheet")
|
||||
&& getSpreadsheetKey(url) != null;
|
||||
}
|
||||
|
||||
private boolean isFusionTableURL(URL url) {
|
||||
// http://www.google.com/fusiontables/DataSource?dsrcid=1219
|
||||
String query = url.getQuery();
|
||||
if (query == null) {
|
||||
query = "";
|
||||
}
|
||||
return url.getHost().endsWith(".google.com")
|
||||
&& url.getPath().startsWith("/fusiontables/DataSource")
|
||||
&& getFusionTableKey(url) != null;
|
||||
}
|
||||
|
||||
// Modified version of FeedURLFactor.getSpreadsheetKeyFromUrl()
|
||||
// Modified version of FeedURLFactory.getSpreadsheetKeyFromUrl()
|
||||
private String getSpreadsheetKey(URL url) {
|
||||
String query = url.getQuery();
|
||||
if (query != null) {
|
||||
@ -472,23 +253,4 @@ public class GDataImporter implements UrlImporter {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String getFusionTableKey(URL url) {
|
||||
String query = url.getQuery();
|
||||
if (query != null) {
|
||||
String[] parts = query.split("&");
|
||||
for (String part : parts) {
|
||||
if (part.startsWith("dsrcid=")) {
|
||||
int offset = ("dsrcid=").length();
|
||||
String tableId = part.substring(offset);
|
||||
// TODO: Any special id format considerations to worry about?
|
||||
// if (tableId.startsWith("p") || !tableId.contains(".")) {
|
||||
// return tableId;
|
||||
// }
|
||||
return tableId;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2010, Thomas F. Morris
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
* - Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* Neither the name of Google nor the names of its contributors may be used to
|
||||
* endorse or promote products derived from this software without specific
|
||||
* prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
package com.google.refine.extension.gdata;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
||||
import com.google.refine.importing.UrlRewriter;
|
||||
|
||||
/**
|
||||
* @author Tom Morris <tfmorris@gmail.com>
|
||||
* @copyright 2010 Thomas F. Morris
|
||||
* @license New BSD http://www.opensource.org/licenses/bsd-license.php
|
||||
*/
|
||||
public class GDataUrlRewriter implements UrlRewriter {
|
||||
|
||||
@Override
|
||||
public Result rewrite(String urlString) {
|
||||
try {
|
||||
URL url = new URL(urlString);
|
||||
if (isSpreadsheetURL(url)) {
|
||||
Result result = new Result();
|
||||
result.rewrittenUrl = urlString;
|
||||
result.format = "service/gdata/spreadsheet";
|
||||
result.download = false;
|
||||
return result;
|
||||
}
|
||||
} catch (MalformedURLException e) {
|
||||
// Ignore
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static public boolean isSpreadsheetURL(URL url) {
|
||||
String host = url.getHost();
|
||||
String query = url.getQuery();
|
||||
if (query == null) {
|
||||
query = "";
|
||||
}
|
||||
// http://spreadsheets.google.com/ccc?key=tI36b9Fxk1lFBS83iR_3XQA&hl=en
|
||||
return host.endsWith(".google.com") && host.contains("spreadsheet") && query.contains("key=");
|
||||
}
|
||||
}
|
19
main/src/com/google/refine/HttpResponder.java
Normal file
19
main/src/com/google/refine/HttpResponder.java
Normal file
@ -0,0 +1,19 @@
|
||||
package com.google.refine;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import com.google.refine.RefineServlet;
|
||||
|
||||
public interface HttpResponder {
|
||||
public void init(RefineServlet servlet);
|
||||
|
||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException;
|
||||
|
||||
public void doGet(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException;
|
||||
}
|
@ -50,7 +50,7 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.commands.importing.ImportManager;
|
||||
import com.google.refine.importing.ImportingManager;
|
||||
import com.google.refine.io.FileProjectManager;
|
||||
|
||||
import edu.mit.simile.butterfly.Butterfly;
|
||||
@ -125,7 +125,7 @@ public class RefineServlet extends Butterfly {
|
||||
|
||||
s_dataDir = new File(data);
|
||||
FileProjectManager.initialize(s_dataDir);
|
||||
ImportManager.initialize(this);
|
||||
ImportingManager.initialize(this);
|
||||
|
||||
if (_timer == null) {
|
||||
_timer = new Timer("autosave");
|
||||
|
180
main/src/com/google/refine/commands/HttpUtilities.java
Normal file
180
main/src/com/google/refine/commands/HttpUtilities.java
Normal file
@ -0,0 +1,180 @@
|
||||
package com.google.refine.commands;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.velocity.VelocityContext;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.Jsonizable;
|
||||
import com.google.refine.RefineServlet;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
abstract public class HttpUtilities {
|
||||
final static protected Logger logger = LoggerFactory.getLogger("command");
|
||||
|
||||
static public void respond(HttpServletResponse response, String content)
|
||||
throws IOException, ServletException {
|
||||
|
||||
response.setCharacterEncoding("UTF-8");
|
||||
response.setStatus(HttpServletResponse.SC_OK);
|
||||
Writer w = response.getWriter();
|
||||
if (w != null) {
|
||||
w.write(content);
|
||||
w.flush();
|
||||
w.close();
|
||||
} else {
|
||||
throw new ServletException("response returned a null writer");
|
||||
}
|
||||
}
|
||||
|
||||
static public void respond(HttpServletResponse response, String status, String message)
|
||||
throws IOException {
|
||||
|
||||
Writer w = response.getWriter();
|
||||
try {
|
||||
JSONWriter writer = new JSONWriter(w);
|
||||
writer.object();
|
||||
writer.key("status"); writer.value(status);
|
||||
writer.key("message"); writer.value(message);
|
||||
writer.endObject();
|
||||
w.flush();
|
||||
w.close();
|
||||
} catch (JSONException e) {
|
||||
// This can never occue
|
||||
}
|
||||
}
|
||||
|
||||
static public void respondJSON(HttpServletResponse response, Jsonizable o)
|
||||
throws IOException, JSONException {
|
||||
|
||||
respondJSON(response, o, new Properties());
|
||||
}
|
||||
|
||||
static public void respondJSON(
|
||||
HttpServletResponse response, Jsonizable o, Properties options)
|
||||
throws IOException, JSONException {
|
||||
|
||||
response.setCharacterEncoding("UTF-8");
|
||||
response.setHeader("Content-Type", "application/json");
|
||||
|
||||
Writer w = response.getWriter();
|
||||
JSONWriter writer = new JSONWriter(w);
|
||||
|
||||
o.write(writer, options);
|
||||
w.flush();
|
||||
w.close();
|
||||
}
|
||||
|
||||
static public void respondException(HttpServletResponse response, Exception e)
|
||||
throws IOException, ServletException {
|
||||
|
||||
logger.warn("Exception caught", e);
|
||||
|
||||
if (response == null) {
|
||||
throw new ServletException("Response object can't be null");
|
||||
}
|
||||
|
||||
try {
|
||||
JSONObject o = new JSONObject();
|
||||
o.put("code", "error");
|
||||
o.put("message", e.getMessage());
|
||||
|
||||
StringWriter sw = new StringWriter();
|
||||
PrintWriter pw = new PrintWriter(sw);
|
||||
e.printStackTrace(pw);
|
||||
pw.flush();
|
||||
sw.flush();
|
||||
|
||||
o.put("stack", sw.toString());
|
||||
|
||||
response.setCharacterEncoding("UTF-8");
|
||||
response.setHeader("Content-Type", "application/json");
|
||||
respond(response, o.toString());
|
||||
} catch (JSONException e1) {
|
||||
e.printStackTrace(response.getWriter());
|
||||
}
|
||||
}
|
||||
|
||||
static public void redirect(HttpServletResponse response, String url) throws IOException {
|
||||
response.sendRedirect(url);
|
||||
}
|
||||
|
||||
static public int getIntegerParameter(HttpServletRequest request, String name, int def) {
|
||||
if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null");
|
||||
try {
|
||||
return Integer.parseInt(request.getParameter(name));
|
||||
} catch (Exception e) {
|
||||
logger.warn("Error getting integer parameter", e);
|
||||
}
|
||||
return def;
|
||||
}
|
||||
|
||||
static public JSONObject getJsonParameter(HttpServletRequest request, String name) {
|
||||
if (request == null) throw new IllegalArgumentException("parameter 'request' should not be null");
|
||||
String value = request.getParameter(name);
|
||||
if (value != null) {
|
||||
try {
|
||||
return ParsingUtilities.evaluateJsonStringToObject(value);
|
||||
} catch (JSONException e) {
|
||||
logger.warn("Error getting json parameter", e);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static public void respondWithErrorPage(
|
||||
RefineServlet servlet,
|
||||
HttpServletRequest request,
|
||||
HttpServletResponse response,
|
||||
String message,
|
||||
Throwable e
|
||||
) {
|
||||
respondWithErrorPage(servlet, request, response, message,
|
||||
HttpServletResponse.SC_INTERNAL_SERVER_ERROR, e);
|
||||
}
|
||||
|
||||
static public void respondWithErrorPage(
|
||||
RefineServlet servlet,
|
||||
HttpServletRequest request,
|
||||
HttpServletResponse response,
|
||||
String message,
|
||||
int status,
|
||||
Throwable e
|
||||
) {
|
||||
VelocityContext context = new VelocityContext();
|
||||
|
||||
context.put("message", message);
|
||||
|
||||
if (e != null) {
|
||||
StringWriter writer = new StringWriter();
|
||||
|
||||
e.printStackTrace(new PrintWriter(writer));
|
||||
|
||||
context.put("stack", writer.toString());
|
||||
} else {
|
||||
context.put("stack", "");
|
||||
}
|
||||
|
||||
try {
|
||||
response.setStatus(status);
|
||||
|
||||
servlet.getModule("core").sendTextFromTemplate(
|
||||
request, response, context, "error.vt", "UTF-8", "text/html", true);
|
||||
|
||||
} catch (Exception e1) {
|
||||
e1.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,61 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.commands.importing;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.commands.HttpUtilities;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingManager;
|
||||
|
||||
public class CancelImportingJobCommand extends Command {
|
||||
@Override
|
||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
long jobID = Long.parseLong(request.getParameter("jobID"));
|
||||
ImportingJob job = ImportingManager.getJob(jobID);
|
||||
if (job == null) {
|
||||
HttpUtilities.respond(response, "error", "No such import job");
|
||||
} else {
|
||||
job.canceled = true;
|
||||
HttpUtilities.respond(response, "ok", "Job canceled");
|
||||
}
|
||||
}
|
||||
}
|
@ -43,8 +43,9 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.importing.ImportingManager;
|
||||
|
||||
public class CreateImportJobCommand extends Command {
|
||||
public class CreateImportingJobCommand extends Command {
|
||||
|
||||
final static Logger logger = LoggerFactory.getLogger("create-import-job_command");
|
||||
|
||||
@ -52,7 +53,7 @@ public class CreateImportJobCommand extends Command {
|
||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
long id = ImportManager.singleton().createJob().id;
|
||||
long id = ImportingManager.createJob().id;
|
||||
|
||||
response.setCharacterEncoding("UTF-8");
|
||||
response.setHeader("Content-Type", "application/json");
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -31,24 +31,38 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers;
|
||||
package com.google.refine.commands.importing;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
import java.util.Properties;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.model.Project;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
public interface StreamImporter extends Importer {
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
/**
|
||||
* @param inputStream stream to be imported
|
||||
* @param project project to import stream into
|
||||
* @param metadata metadata of new project
|
||||
* @param options
|
||||
* @throws ImportException
|
||||
*/
|
||||
public void read(InputStream inputStream, Project project,
|
||||
ProjectMetadata metadata, Properties options) throws ImportException;
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.importing.ImportingManager;
|
||||
|
||||
public class GetImportingConfigurationCommand extends Command {
|
||||
@Override
|
||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
Writer w = response.getWriter();
|
||||
JSONWriter writer = new JSONWriter(w);
|
||||
try {
|
||||
writer.object();
|
||||
writer.key("config"); ImportingManager.writeConfiguration(writer, new Properties());
|
||||
writer.endObject();
|
||||
} catch (JSONException e) {
|
||||
throw new ServletException(e);
|
||||
} finally {
|
||||
w.flush();
|
||||
w.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -34,9 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
package com.google.refine.commands.importing;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
@ -44,22 +43,18 @@ import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONWriter;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.commands.importing.ImportJob.State;
|
||||
|
||||
public class GetImportJobStatusCommand extends Command {
|
||||
|
||||
final static Logger logger = LoggerFactory.getLogger("get-import-job-status_command");
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingManager;
|
||||
|
||||
public class GetImportingJobStatusCommand extends Command {
|
||||
@Override
|
||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
long jobID = Long.parseLong(request.getParameter("jobID"));
|
||||
ImportJob job = ImportManager.singleton().getJob(jobID);
|
||||
ImportingJob job = ImportingManager.getJob(jobID);
|
||||
|
||||
Writer w = response.getWriter();
|
||||
JSONWriter writer = new JSONWriter(w);
|
||||
@ -70,32 +65,11 @@ public class GetImportJobStatusCommand extends Command {
|
||||
writer.key("message"); writer.value("No such import job");
|
||||
} else {
|
||||
writer.key("code"); writer.value("ok");
|
||||
writer.key("state");
|
||||
if (job.state == State.NEW) {
|
||||
writer.value("new");
|
||||
} else if (job.state == State.RETRIEVING_DATA) {
|
||||
writer.value("retrieving");
|
||||
writer.key("progress"); writer.value(job.retrievingProgress);
|
||||
writer.key("bytesSaved"); writer.value(job.bytesSaved);
|
||||
} else if (job.state == State.READY) {
|
||||
writer.value("ready");
|
||||
} else if (job.state == State.ERROR) {
|
||||
writer.value("error");
|
||||
writer.key("message"); writer.value(job.errorMessage);
|
||||
if (job.exception != null) {
|
||||
StringWriter sw = new StringWriter();
|
||||
PrintWriter pw = new PrintWriter(sw);
|
||||
job.exception.printStackTrace(pw);
|
||||
pw.flush();
|
||||
sw.flush();
|
||||
|
||||
writer.key("stack"); writer.value(sw.toString());
|
||||
}
|
||||
}
|
||||
writer.key("job"); job.write(writer, new Properties());
|
||||
}
|
||||
writer.endObject();
|
||||
} catch (JSONException e) {
|
||||
throw new IOException(e);
|
||||
throw new ServletException(e);
|
||||
} finally {
|
||||
w.flush();
|
||||
w.close();
|
@ -1,49 +0,0 @@
|
||||
package com.google.refine.commands.importing;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
|
||||
import com.google.refine.model.meta.ImportSource;
|
||||
|
||||
public class ImportJob {
|
||||
static public enum State {
|
||||
NEW,
|
||||
RETRIEVING_DATA,
|
||||
READY,
|
||||
ERROR
|
||||
}
|
||||
|
||||
final public long id;
|
||||
final public File dir;
|
||||
|
||||
public long lastTouched;
|
||||
public State state = State.NEW;
|
||||
|
||||
// Data for retrieving phase
|
||||
public int retrievingProgress = 0; // from 0 to 100
|
||||
public long bytesSaved = 0; // in case percentage is unknown
|
||||
public String errorMessage;
|
||||
public Throwable exception;
|
||||
|
||||
public ImportSource importSource;
|
||||
|
||||
public ImportJob(long id, File dir) {
|
||||
this.id = id;
|
||||
this.dir = dir;
|
||||
|
||||
dir.mkdirs();
|
||||
}
|
||||
|
||||
public void touch() {
|
||||
lastTouched = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public void dispose() {
|
||||
try {
|
||||
FileUtils.deleteDirectory(dir);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
}
|
||||
}
|
@ -1,101 +0,0 @@
|
||||
package com.google.refine.commands.importing;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
|
||||
import com.google.refine.RefineServlet;
|
||||
import com.google.refine.model.meta.ImportSource;
|
||||
|
||||
public class ImportManager {
|
||||
static final private Map<String, Class<? extends ImportSource>> nameToImportSourceClass =
|
||||
new HashMap<String, Class<? extends ImportSource>>();
|
||||
|
||||
static final private Map<String, String> importSourceClassNameToName =
|
||||
new HashMap<String, String>();
|
||||
|
||||
/**
|
||||
* Register a single import source class.
|
||||
*
|
||||
* @param name importer verb for importer
|
||||
* @param importerObject object implementing the importer
|
||||
*
|
||||
* @return true if importer was loaded and registered successfully
|
||||
*/
|
||||
static public boolean registerImportSourceClass(String name, Class<? extends ImportSource> klass) {
|
||||
if (nameToImportSourceClass.containsKey(name)) {
|
||||
return false;
|
||||
}
|
||||
nameToImportSourceClass.put(name, klass);
|
||||
importSourceClassNameToName.put(klass.getName(), name);
|
||||
return true;
|
||||
}
|
||||
|
||||
static public Class<? extends ImportSource> getImportSourceClass(String name) {
|
||||
return nameToImportSourceClass.get(name);
|
||||
}
|
||||
|
||||
static public String getImportSourceClassName(Class<? extends ImportSource> klass) {
|
||||
return importSourceClassNameToName.get(klass.getName());
|
||||
}
|
||||
|
||||
final private RefineServlet servlet;
|
||||
final private Map<Long, ImportJob> jobs = new HashMap<Long, ImportJob>();
|
||||
private File importDir;
|
||||
|
||||
static private ImportManager singleton;
|
||||
|
||||
static public void initialize(RefineServlet servlet) {
|
||||
singleton = new ImportManager(servlet);
|
||||
}
|
||||
|
||||
static public ImportManager singleton() {
|
||||
return singleton;
|
||||
}
|
||||
|
||||
private ImportManager(RefineServlet servlet) {
|
||||
this.servlet = servlet;
|
||||
}
|
||||
|
||||
private File getImportDir() {
|
||||
if (importDir == null) {
|
||||
File tempDir = servlet.getTempDir();
|
||||
importDir = tempDir == null ? new File(".import-temp") : new File(tempDir, "import");
|
||||
|
||||
if (importDir.exists()) {
|
||||
try {
|
||||
// start fresh
|
||||
FileUtils.deleteDirectory(importDir);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
}
|
||||
importDir.mkdirs();
|
||||
}
|
||||
return importDir;
|
||||
}
|
||||
|
||||
public ImportJob createJob() {
|
||||
long id = System.currentTimeMillis() + (long) (Math.random() * 1000000);
|
||||
File jobDir = new File(getImportDir(), Long.toString(id));
|
||||
|
||||
ImportJob job = new ImportJob(id, jobDir);
|
||||
jobs.put(id, job);
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
public ImportJob getJob(long id) {
|
||||
return jobs.get(id);
|
||||
}
|
||||
|
||||
public void disposeJob(long id) {
|
||||
ImportJob job = getJob(id);
|
||||
if (job != null) {
|
||||
job.dispose();
|
||||
jobs.remove(id);
|
||||
}
|
||||
}
|
||||
}
|
@ -44,18 +44,40 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.commands.importing.ImportJob.State;
|
||||
import com.google.refine.model.meta.ImportSource;
|
||||
import com.google.refine.commands.HttpUtilities;
|
||||
import com.google.refine.importing.ImportingController;
|
||||
import com.google.refine.importing.ImportingManager;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
public class RetrieveImportContentCommand extends Command {
|
||||
public class ImportingControllerCommand extends Command {
|
||||
|
||||
final static Logger logger = LoggerFactory.getLogger("retrieve-import-content_command");
|
||||
final static Logger logger = LoggerFactory.getLogger("importing-controller_command");
|
||||
|
||||
@Override
|
||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
ImportingController controller = getController(request);
|
||||
if (controller != null) {
|
||||
controller.doPost(request, response);
|
||||
} else {
|
||||
HttpUtilities.respond(response, "error", "No such import controller");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doGet(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
ImportingController controller = getController(request);
|
||||
if (controller != null) {
|
||||
controller.doPost(request, response);
|
||||
} else {
|
||||
HttpUtilities.respond(response, "error", "No such import controller");
|
||||
}
|
||||
}
|
||||
|
||||
private ImportingController getController(HttpServletRequest request) {
|
||||
/*
|
||||
* The uploaded file is in the POST body as a "file part". If
|
||||
* we call request.getParameter() then the POST body will get
|
||||
@ -64,39 +86,10 @@ public class RetrieveImportContentCommand extends Command {
|
||||
* Don't call request.getParameter() before calling internalImport().
|
||||
*/
|
||||
Properties options = ParsingUtilities.parseUrlParameters(request);
|
||||
|
||||
long jobID = Long.parseLong(options.getProperty("jobID"));
|
||||
ImportJob job = ImportManager.singleton().getJob(jobID);
|
||||
if (job == null) {
|
||||
respondWithErrorPage(request, response, "No such import job", null);
|
||||
return;
|
||||
} else if (job.state != State.NEW) {
|
||||
respondWithErrorPage(request, response, "Import job already started", null);
|
||||
return;
|
||||
}
|
||||
|
||||
Class<? extends ImportSource> importSourceClass =
|
||||
ImportManager.getImportSourceClass(options.getProperty("source"));
|
||||
if (importSourceClass == null) {
|
||||
respondWithErrorPage(request, response, "No such import source class", null);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
ImportSource importSource = importSourceClass.newInstance();
|
||||
job.importSource = importSource;
|
||||
job.state = State.RETRIEVING_DATA;
|
||||
|
||||
importSource.retrieveContent(request, options, job);
|
||||
|
||||
job.retrievingProgress = 100;
|
||||
job.state = State.READY;
|
||||
} catch (Throwable e) {e.printStackTrace();
|
||||
job.state = State.ERROR;
|
||||
job.errorMessage = e.getLocalizedMessage();
|
||||
job.exception = e;
|
||||
|
||||
respondWithErrorPage(request, response, "Failed to kick start import job", e);
|
||||
String name = options.getProperty("controller");
|
||||
if (name != null) {
|
||||
return ImportingManager.controllers.get(name);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
|
||||
Copyright 2010,2011. Google Inc.
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -33,60 +33,22 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.commands.project;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FilterInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Properties;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.commons.fileupload.FileItemIterator;
|
||||
import org.apache.commons.fileupload.FileItemStream;
|
||||
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
||||
import org.apache.commons.fileupload.util.Streams;
|
||||
import org.apache.tools.bzip2.CBZip2InputStream;
|
||||
import org.apache.tools.tar.TarEntry;
|
||||
import org.apache.tools.tar.TarInputStream;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.ProjectManager;
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.importers.Importer;
|
||||
import com.google.refine.importers.ImporterRegistry;
|
||||
import com.google.refine.importers.ReaderImporter;
|
||||
import com.google.refine.importers.StreamImporter;
|
||||
import com.google.refine.importers.TsvCsvImporter;
|
||||
import com.google.refine.importers.UrlImporter;
|
||||
import com.google.refine.commands.HttpUtilities;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.IOUtils;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
import com.ibm.icu.text.CharsetDetector;
|
||||
import com.ibm.icu.text.CharsetMatch;
|
||||
|
||||
public class CreateProjectCommand extends Command {
|
||||
|
||||
@ -99,13 +61,6 @@ public class CreateProjectCommand extends Command {
|
||||
ProjectManager.singleton.setBusy(true);
|
||||
try {
|
||||
|
||||
/*
|
||||
* Set UTF-8 as request encoding, then ServletFileUpload will use it as default encoding
|
||||
*/
|
||||
if (request.getCharacterEncoding() == null) {
|
||||
request.setCharacterEncoding("UTF-8");
|
||||
}
|
||||
|
||||
/*
|
||||
* The uploaded file is in the POST body as a "file part". If
|
||||
* we call request.getParameter() then the POST body will get
|
||||
@ -118,7 +73,7 @@ public class CreateProjectCommand extends Command {
|
||||
Project project = new Project();
|
||||
ProjectMetadata pm = new ProjectMetadata();
|
||||
|
||||
internalImport(request, project, pm, options);
|
||||
//internalImport(request, project, pm, options);
|
||||
|
||||
/*
|
||||
* The import process above populates options with parameters
|
||||
@ -133,382 +88,11 @@ public class CreateProjectCommand extends Command {
|
||||
|
||||
project.update();
|
||||
|
||||
redirect(response, "/project?project=" + project.id);
|
||||
HttpUtilities.redirect(response, "/project?project=" + project.id);
|
||||
} catch (Exception e) {
|
||||
respondWithErrorPage(request, response, "Failed to import file", e);
|
||||
} finally {
|
||||
ProjectManager.singleton.setBusy(false);
|
||||
}
|
||||
}
|
||||
|
||||
protected void internalImport(
|
||||
HttpServletRequest request,
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
Properties options
|
||||
) throws Exception {
|
||||
|
||||
ServletFileUpload upload = new ServletFileUpload();
|
||||
String url = options.getProperty("url");
|
||||
boolean imported = false;
|
||||
|
||||
FileItemIterator iter = upload.getItemIterator(request);
|
||||
while (iter.hasNext()) {
|
||||
FileItemStream item = iter.next();
|
||||
String name = item.getFieldName().toLowerCase();
|
||||
InputStream stream = item.openStream();
|
||||
if (item.isFormField()) {
|
||||
if (name.equals("raw-text")) {
|
||||
Reader reader = new InputStreamReader(stream,request.getCharacterEncoding());
|
||||
try {
|
||||
internalInvokeImporter(project, new TsvCsvImporter(), metadata, options, reader);
|
||||
imported = true;
|
||||
} finally {
|
||||
reader.close();
|
||||
}
|
||||
} else if (name.equals("project-url")) {
|
||||
url = Streams.asString(stream, request.getCharacterEncoding());
|
||||
} else {
|
||||
options.put(name, Streams.asString(stream, request.getCharacterEncoding()));
|
||||
}
|
||||
} else {
|
||||
String fileName = item.getName().toLowerCase();
|
||||
if (fileName.length() > 0) {
|
||||
try {
|
||||
internalImportFile(project, metadata, options, fileName, stream);
|
||||
imported = true;
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!imported && url != null && url.length() > 0) {
|
||||
internalImportURL(request, project, metadata, options, url);
|
||||
}
|
||||
}
|
||||
|
||||
static class SafeInputStream extends FilterInputStream {
|
||||
public SafeInputStream(InputStream stream) {
|
||||
super(stream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// some libraries attempt to close the input stream while they can't
|
||||
// read anymore from it... unfortunately this behavior prevents
|
||||
// the zip input stream from functioning correctly so we just have
|
||||
// to ignore those close() calls and just close it ourselves
|
||||
// forcefully later
|
||||
}
|
||||
|
||||
public void reallyClose() throws IOException {
|
||||
super.close();
|
||||
}
|
||||
}
|
||||
|
||||
protected void internalImportFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
Properties options,
|
||||
String fileName,
|
||||
InputStream inputStream
|
||||
) throws Exception {
|
||||
|
||||
logger.info("Importing '{}'", fileName);
|
||||
|
||||
if (fileName.endsWith(".zip") || fileName.endsWith(".tar") || fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz") || fileName.endsWith(".tar.bz2")) {
|
||||
|
||||
// first, save the file on disk, since we need two passes and we might
|
||||
// not have enough memory to keep it all in there
|
||||
File file = save(inputStream);
|
||||
|
||||
// in the first pass, gather statistics about what files are in there
|
||||
// unfortunately, we have to rely on files extensions, which is horrible but
|
||||
// better than nothing
|
||||
HashMap<String,Integer> ext_map = new HashMap<String,Integer>();
|
||||
|
||||
FileInputStream fis = new FileInputStream(file);
|
||||
InputStream is = getStream(fileName, fis);
|
||||
|
||||
// NOTE(SM): unfortunately, java.io does not provide any generalized class for
|
||||
// archive-like input streams so while both TarInputStream and ZipInputStream
|
||||
// behave precisely the same, there is no polymorphic behavior so we have
|
||||
// to treat each instance explicitly... one of those times you wish you had
|
||||
// closures
|
||||
try {
|
||||
if (is instanceof TarInputStream) {
|
||||
TarInputStream tis = (TarInputStream) is;
|
||||
TarEntry te;
|
||||
while ((te = tis.getNextEntry()) != null) {
|
||||
if (!te.isDirectory()) {
|
||||
mapExtension(te.getName(),ext_map);
|
||||
}
|
||||
}
|
||||
} else if (is instanceof ZipInputStream) {
|
||||
ZipInputStream zis = (ZipInputStream) is;
|
||||
ZipEntry ze;
|
||||
while ((ze = zis.getNextEntry()) != null) {
|
||||
if (!ze.isDirectory()) {
|
||||
mapExtension(ze.getName(),ext_map);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
is.close();
|
||||
fis.close();
|
||||
} catch (IOException e) {}
|
||||
}
|
||||
|
||||
// sort extensions by how often they appear
|
||||
List<Entry<String,Integer>> values = new ArrayList<Entry<String,Integer>>(ext_map.entrySet());
|
||||
Collections.sort(values, new ValuesComparator());
|
||||
|
||||
if (values.size() == 0) {
|
||||
throw new RuntimeException("The archive contains no files.");
|
||||
}
|
||||
|
||||
// this will contain the set of extensions we'll load from the archive
|
||||
HashSet<String> exts = new HashSet<String>();
|
||||
|
||||
// find the extension that is most frequent or those who share the highest frequency value
|
||||
if (values.size() == 1) {
|
||||
exts.add(values.get(0).getKey());
|
||||
} else {
|
||||
Entry<String,Integer> most_frequent = values.get(0);
|
||||
Entry<String,Integer> second_most_frequent = values.get(1);
|
||||
if (most_frequent.getValue() > second_most_frequent.getValue()) { // we have a winner
|
||||
exts.add(most_frequent.getKey());
|
||||
} else { // multiple extensions have the same frequency
|
||||
int winning_frequency = most_frequent.getValue();
|
||||
for (Entry<String,Integer> e : values) {
|
||||
if (e.getValue() == winning_frequency) {
|
||||
exts.add(e.getKey());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("Most frequent extensions: {}", exts.toString());
|
||||
|
||||
// second pass, load the data for real
|
||||
is = getStream(fileName, new FileInputStream(file));
|
||||
SafeInputStream sis = new SafeInputStream(is);
|
||||
try {
|
||||
if (is instanceof TarInputStream) {
|
||||
TarInputStream tis = (TarInputStream) is;
|
||||
TarEntry te;
|
||||
while ((te = tis.getNextEntry()) != null) {
|
||||
if (!te.isDirectory()) {
|
||||
String name = te.getName();
|
||||
String ext = getExtension(name)[1];
|
||||
if (exts.contains(ext)) {
|
||||
internalImportFile(project, metadata, options, name, sis);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (is instanceof ZipInputStream) {
|
||||
ZipInputStream zis = (ZipInputStream) is;
|
||||
ZipEntry ze;
|
||||
while ((ze = zis.getNextEntry()) != null) {
|
||||
if (!ze.isDirectory()) {
|
||||
String name = ze.getName();
|
||||
String ext = getExtension(name)[1];
|
||||
if (exts.contains(ext)) {
|
||||
internalImportFile(project, metadata, options, name, sis);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
sis.reallyClose();
|
||||
} catch (IOException e) {}
|
||||
}
|
||||
|
||||
} else if (fileName.endsWith(".gz")) {
|
||||
internalImportFile(project, metadata, options, getExtension(fileName)[0], new GZIPInputStream(inputStream));
|
||||
} else if (fileName.endsWith(".bz2")) {
|
||||
internalImportFile(project, metadata, options, getExtension(fileName)[0], new CBZip2InputStream(inputStream));
|
||||
} else {
|
||||
load(project, metadata, options, fileName, inputStream);
|
||||
}
|
||||
}
|
||||
|
||||
public static class ValuesComparator implements Comparator<Entry<String,Integer>>, Serializable {
|
||||
private static final long serialVersionUID = 8845863616149837657L;
|
||||
|
||||
public int compare(Entry<String,Integer> o1, Entry<String,Integer> o2) {
|
||||
return o2.getValue() - o1.getValue();
|
||||
}
|
||||
}
|
||||
|
||||
private void load(Project project, ProjectMetadata metadata, Properties options, String fileName, InputStream inputStream) throws Exception {
|
||||
Importer importer = ImporterRegistry.guessImporter(null, fileName);
|
||||
internalInvokeImporter(project, importer, metadata, options, inputStream, null);
|
||||
}
|
||||
|
||||
private File save(InputStream is) throws IOException {
|
||||
File temp = this.servlet.getTempFile(Long.toString(System.currentTimeMillis()));
|
||||
temp.deleteOnExit();
|
||||
IOUtils.copy(is,temp);
|
||||
is.close();
|
||||
return temp;
|
||||
}
|
||||
|
||||
private void mapExtension(String name, Map<String,Integer> ext_map) {
|
||||
String ext = getExtension(name)[1];
|
||||
if (ext_map.containsKey(ext)) {
|
||||
ext_map.put(ext, ext_map.get(ext) + 1);
|
||||
} else {
|
||||
ext_map.put(ext, 1);
|
||||
}
|
||||
}
|
||||
|
||||
private InputStream getStream(String fileName, InputStream is) throws IOException {
|
||||
if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
|
||||
return new TarInputStream(new GZIPInputStream(is));
|
||||
} else if (fileName.endsWith(".tar.bz2")) {
|
||||
return new TarInputStream(new CBZip2InputStream(is));
|
||||
} else if (fileName.endsWith(".tar")) {
|
||||
return new TarInputStream(is);
|
||||
} else {
|
||||
return new ZipInputStream(is);
|
||||
}
|
||||
}
|
||||
|
||||
private String[] getExtension(String filename) {
|
||||
String[] result = new String[2];
|
||||
int ext_index = filename.lastIndexOf('.');
|
||||
result[0] = (ext_index == -1) ? filename : filename.substring(0,ext_index);
|
||||
result[1] = (ext_index == -1) ? "" : filename.substring(ext_index + 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
protected void internalImportURL(
|
||||
HttpServletRequest request,
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
Properties options,
|
||||
String urlString) throws Exception {
|
||||
|
||||
// Little dance to get URL properly encoded (e.g. for funky Fusion Tables queries)
|
||||
URL url = new URL(urlString);
|
||||
url = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), null).toURL();
|
||||
|
||||
URLConnection connection = null;
|
||||
|
||||
// Try for a URL importer first
|
||||
Importer importer = ImporterRegistry.guessUrlImporter(url);
|
||||
if (importer instanceof UrlImporter) {
|
||||
((UrlImporter) importer).read(url, project, metadata, options);
|
||||
} else {
|
||||
// If we couldn't find one, try opening URL and treating as a stream
|
||||
try {
|
||||
connection = url.openConnection();
|
||||
connection.setConnectTimeout(5000);
|
||||
connection.connect();
|
||||
} catch (Exception e) {
|
||||
throw new Exception("Cannot connect to " + urlString, e);
|
||||
}
|
||||
|
||||
InputStream inputStream = null;
|
||||
try {
|
||||
inputStream = connection.getInputStream();
|
||||
} catch (Exception e) {
|
||||
throw new Exception("Cannot retrieve content from " + url, e);
|
||||
}
|
||||
|
||||
try {
|
||||
String contentType = connection.getContentType();
|
||||
int semicolon = contentType.indexOf(';');
|
||||
if (semicolon >= 0) {
|
||||
contentType = contentType.substring(0, semicolon);
|
||||
}
|
||||
|
||||
importer = ImporterRegistry.guessImporter(contentType, url.getPath());
|
||||
|
||||
internalInvokeImporter(project, importer, metadata, options, inputStream, connection.getContentEncoding());
|
||||
} finally {
|
||||
inputStream.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void internalInvokeImporter(
|
||||
Project project,
|
||||
Importer importer,
|
||||
ProjectMetadata metadata,
|
||||
Properties options,
|
||||
InputStream rawInputStream,
|
||||
String encoding
|
||||
) throws Exception {
|
||||
if (importer instanceof ReaderImporter) {
|
||||
|
||||
// NOTE: The ICU4J char detection code requires the input stream to support mark/reset.
|
||||
InputStream inputStream = rawInputStream;
|
||||
if (!inputStream.markSupported()) {
|
||||
inputStream = new BufferedInputStream(rawInputStream);
|
||||
}
|
||||
|
||||
CharsetDetector detector = new CharsetDetector();
|
||||
detector.setDeclaredEncoding("utf8"); // most of the content on the web is encoded in UTF-8 so start with that
|
||||
options.setProperty("encoding_confidence", "0"); // in case we don't find anything suitable
|
||||
|
||||
InputStreamReader reader = null;
|
||||
CharsetMatch[] charsetMatches = detector.setText(inputStream).detectAll();
|
||||
for (CharsetMatch charsetMatch : charsetMatches) { // matches are ordered - first is best match
|
||||
String matchName = charsetMatch.getName();
|
||||
int confidence = charsetMatch.getConfidence();
|
||||
// Threshold was 50. Do we ever want to not use our best guess even if it's low confidence? - tfmorris
|
||||
if (confidence >= 20) {
|
||||
logger.info("Encoding guess: {} [confidence: {}]", matchName, confidence);
|
||||
try {
|
||||
reader = new InputStreamReader(inputStream, matchName);
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
logger.debug("Unsupported InputStreamReader charset encoding: {} [confidence: {}]; skipping", matchName, confidence);
|
||||
continue;
|
||||
}
|
||||
// Encoding will be set later at common exit point
|
||||
options.setProperty("encoding_confidence", Integer.toString(confidence));
|
||||
break;
|
||||
} else {
|
||||
logger.debug("Poor encoding guess: {} [confidence: {}]; skipping", matchName, confidence);
|
||||
}
|
||||
}
|
||||
|
||||
if (reader == null) { // when all else fails
|
||||
if (encoding != null) {
|
||||
reader = new InputStreamReader(inputStream, encoding);
|
||||
} else {
|
||||
reader = new InputStreamReader(inputStream);
|
||||
}
|
||||
}
|
||||
// Get the actual encoding which will be used and save it for project metadata
|
||||
options.setProperty("encoding", reader.getEncoding());
|
||||
|
||||
((ReaderImporter) importer).read(reader, project, metadata, options);
|
||||
} else {
|
||||
// TODO: How do we set character encoding here?
|
||||
// Things won't work right if it's not set, so pick some arbitrary values
|
||||
if (encoding != null) {
|
||||
options.setProperty("encoding", encoding);
|
||||
}
|
||||
options.setProperty("encoding_confidence", "0");
|
||||
((StreamImporter) importer).read(rawInputStream, project, metadata, options);
|
||||
}
|
||||
}
|
||||
|
||||
protected void internalInvokeImporter(
|
||||
Project project,
|
||||
ReaderImporter importer,
|
||||
ProjectMetadata metadata,
|
||||
Properties options,
|
||||
Reader reader
|
||||
) throws Exception {
|
||||
importer.read(reader, project, metadata, options);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -44,17 +44,43 @@ import org.json.JSONException;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.commands.HttpUtilities;
|
||||
import com.google.refine.expr.MetaParser;
|
||||
import com.google.refine.expr.MetaParser.LanguageInfo;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingManager;
|
||||
import com.google.refine.model.OverlayModel;
|
||||
import com.google.refine.model.Project;
|
||||
|
||||
public class GetModelsCommand extends Command {
|
||||
@Override
|
||||
public void doGet(HttpServletRequest request, HttpServletResponse response)
|
||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
internalRespond(request, response);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
|
||||
internalRespond(request, response);
|
||||
}
|
||||
|
||||
protected void internalRespond(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
Project project = getProject(request);
|
||||
Project project = null;
|
||||
|
||||
// This command also supports retrieving rows for an importing job.
|
||||
String importingJobID = request.getParameter("importingJobID");
|
||||
if (importingJobID != null) {
|
||||
long jobID = Long.parseLong(importingJobID);
|
||||
ImportingJob job = ImportingManager.getJob(jobID);
|
||||
if (job != null) {
|
||||
project = job.project;
|
||||
}
|
||||
}
|
||||
if (project == null) {
|
||||
project = getProject(request);
|
||||
}
|
||||
|
||||
try {
|
||||
response.setCharacterEncoding("UTF-8");
|
||||
@ -92,7 +118,7 @@ public class GetModelsCommand extends Command {
|
||||
|
||||
writer.endObject();
|
||||
} catch (JSONException e) {
|
||||
respondException(response, e);
|
||||
HttpUtilities.respondException(response, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -52,6 +52,8 @@ import com.google.refine.browsing.RecordVisitor;
|
||||
import com.google.refine.browsing.RowVisitor;
|
||||
import com.google.refine.browsing.Engine.Mode;
|
||||
import com.google.refine.commands.Command;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingManager;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Record;
|
||||
import com.google.refine.model.Row;
|
||||
@ -77,7 +79,21 @@ public class GetRowsCommand extends Command {
|
||||
throws ServletException, IOException {
|
||||
|
||||
try {
|
||||
Project project = getProject(request);
|
||||
Project project = null;
|
||||
|
||||
// This command also supports retrieving rows for an importing job.
|
||||
String importingJobID = request.getParameter("importingJobID");
|
||||
if (importingJobID != null) {
|
||||
long jobID = Long.parseLong(importingJobID);
|
||||
ImportingJob job = ImportingManager.getJob(jobID);
|
||||
if (job != null) {
|
||||
project = job.project;
|
||||
}
|
||||
}
|
||||
if (project == null) {
|
||||
project = getProject(request);
|
||||
}
|
||||
|
||||
Engine engine = getEngine(request, project);
|
||||
String callback = request.getParameter("callback");
|
||||
|
||||
|
@ -33,16 +33,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.poi.common.usermodel.Hyperlink;
|
||||
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
|
||||
@ -51,184 +50,152 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Recon;
|
||||
import com.google.refine.model.ReconCandidate;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.model.Recon.Judgment;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class ExcelImporter implements StreamImporter {
|
||||
protected boolean _xmlBased;
|
||||
public class ExcelImporter extends TabularImportingParserBase {
|
||||
public ExcelImporter() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void read(InputStream inputStream, Project project, ProjectMetadata metadata, Properties options) throws ImportException {
|
||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
||||
int limit = ImporterUtilities.getIntegerOption("limit", options, -1);
|
||||
int skip = ImporterUtilities.getIntegerOption("skip", options, 0);
|
||||
public JSONObject createParserUIInitializationData(
|
||||
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||
|
||||
boolean xmlBased = "text/xml/xlsx".equals(format);
|
||||
JSONUtilities.safePut(options, "xmlBased", xmlBased);
|
||||
|
||||
JSONArray sheetRecords = new JSONArray();
|
||||
JSONUtilities.safePut(options, "sheetRecords", sheetRecords);
|
||||
try {
|
||||
JSONObject firstFileRecord = fileRecords.get(0);
|
||||
File file = ImportingUtilities.getFile(job, firstFileRecord);
|
||||
InputStream is = new FileInputStream(file);
|
||||
try {
|
||||
Workbook wb = xmlBased ?
|
||||
new XSSFWorkbook(is) :
|
||||
new HSSFWorkbook(new POIFSFileSystem(is));
|
||||
|
||||
int sheetCount = wb.getNumberOfSheets();
|
||||
boolean hasData = false;
|
||||
for (int i = 0; i < sheetCount; i++) {
|
||||
Sheet sheet = wb.getSheetAt(i);
|
||||
int rows = sheet.getLastRowNum() - sheet.getFirstRowNum() + 1;
|
||||
|
||||
JSONObject sheetRecord = new JSONObject();
|
||||
JSONUtilities.safePut(sheetRecord, "name", sheet.getSheetName());
|
||||
JSONUtilities.safePut(sheetRecord, "rows", rows);
|
||||
if (hasData) {
|
||||
JSONUtilities.safePut(sheetRecord, "selected", false);
|
||||
} else if (rows > 1) {
|
||||
JSONUtilities.safePut(sheetRecord, "selected", true);
|
||||
hasData = true;
|
||||
}
|
||||
JSONUtilities.append(sheetRecords, sheetRecord);
|
||||
}
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// Ignore
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
InputStream inputStream,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
boolean xmlBased = JSONUtilities.getBoolean(options, "xmlBased", false);
|
||||
Workbook wb = null;
|
||||
try {
|
||||
wb = _xmlBased ?
|
||||
wb = xmlBased ?
|
||||
new XSSFWorkbook(inputStream) :
|
||||
new HSSFWorkbook(new POIFSFileSystem(inputStream));
|
||||
} catch (IOException e) {
|
||||
throw new ImportException(
|
||||
exceptions.add(new ImportException(
|
||||
"Attempted to parse as an Excel file but failed. " +
|
||||
"Try to use Excel to re-save the file as a different Excel version or as TSV and upload again.",
|
||||
e
|
||||
);
|
||||
));
|
||||
return;
|
||||
} catch (ArrayIndexOutOfBoundsException e){
|
||||
throw new ImportException(
|
||||
exceptions.add(new ImportException(
|
||||
"Attempted to parse file as an Excel file but failed. " +
|
||||
"This is probably caused by a corrupt excel file, or due to the file having previously been created or saved by a non-Microsoft application. " +
|
||||
"Please try opening the file in Microsoft Excel and resaving it, then try re-uploading the file. " +
|
||||
"See https://issues.apache.org/bugzilla/show_bug.cgi?id=48261 for further details",
|
||||
e);
|
||||
e
|
||||
));
|
||||
return;
|
||||
}
|
||||
|
||||
Sheet sheet = wb.getSheetAt(0);
|
||||
int[] sheets = JSONUtilities.getIntArray(options, "sheets");
|
||||
for (int sheetIndex : sheets) {
|
||||
final Sheet sheet = wb.getSheetAt(sheetIndex);
|
||||
final int lastRow = sheet.getLastRowNum();
|
||||
|
||||
int firstRow = sheet.getFirstRowNum();
|
||||
int lastRow = sheet.getLastRowNum();
|
||||
|
||||
List<String> columnNames = new ArrayList<String>();
|
||||
Set<String> columnNameSet = new HashSet<String>();
|
||||
Map<String, Integer> columnRootNameToIndex = new HashMap<String, Integer>();
|
||||
|
||||
int rowsWithData = 0;
|
||||
TableDataReader dataReader = new TableDataReader() {
|
||||
int nextRow = 0;
|
||||
Map<String, Recon> reconMap = new HashMap<String, Recon>();
|
||||
|
||||
for (int r = firstRow; r <= lastRow; r++) {
|
||||
org.apache.poi.ss.usermodel.Row row = sheet.getRow(r);
|
||||
if (row == null) {
|
||||
continue;
|
||||
} else if (ignoreLines > 0) {
|
||||
ignoreLines--;
|
||||
continue;
|
||||
@Override
|
||||
public List<Object> getNextRowOfCells() throws IOException {
|
||||
if (nextRow >= lastRow) {
|
||||
return null;
|
||||
}
|
||||
|
||||
short firstCell = row.getFirstCellNum();
|
||||
List<Object> cells = new ArrayList<Object>();
|
||||
org.apache.poi.ss.usermodel.Row row = sheet.getRow(nextRow++);
|
||||
if (row != null) {
|
||||
short lastCell = row.getLastCellNum();
|
||||
if (firstCell < 0 || firstCell > lastCell) {
|
||||
continue;
|
||||
for (short cellIndex = 0; cellIndex <= lastCell; cellIndex++) {
|
||||
Cell cell = null;
|
||||
|
||||
org.apache.poi.ss.usermodel.Cell sourceCell = row.getCell(cellIndex);
|
||||
if (sourceCell != null) {
|
||||
cell = extractCell(sourceCell, reconMap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Still processing header lines
|
||||
*/
|
||||
if (headerLines > 0) {
|
||||
headerLines--;
|
||||
|
||||
for (int c = firstCell; c <= lastCell; c++) {
|
||||
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
|
||||
if (cell != null) {
|
||||
Serializable value = extractCell(cell);
|
||||
String text = value != null ? value.toString() : null;
|
||||
if (text != null && text.length() > 0) {
|
||||
while (columnNames.size() < c + 1) {
|
||||
columnNames.add(null);
|
||||
cells.add(cell);
|
||||
}
|
||||
|
||||
String existingName = columnNames.get(c);
|
||||
String name = (existingName == null) ? text : (existingName + " " + text);
|
||||
|
||||
columnNames.set(c, name);
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
};
|
||||
|
||||
readTable(
|
||||
project,
|
||||
metadata,
|
||||
job,
|
||||
dataReader,
|
||||
fileSource + "#" + sheet.getSheetName(),
|
||||
limit,
|
||||
options,
|
||||
exceptions
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (headerLines == 0) {
|
||||
for (int i = 0; i < columnNames.size(); i++) {
|
||||
String rootName = columnNames.get(i);
|
||||
if (rootName == null) {
|
||||
continue;
|
||||
}
|
||||
setUnduplicatedColumnName(rootName, columnNames, i, columnNameSet, columnRootNameToIndex);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Processing data rows
|
||||
*/
|
||||
} else {
|
||||
Row newRow = new Row(columnNames.size());
|
||||
boolean hasData = false;
|
||||
|
||||
for (int c = firstCell; c <= lastCell; c++) {
|
||||
org.apache.poi.ss.usermodel.Cell cell = row.getCell(c);
|
||||
if (cell == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Cell ourCell = extractCell(cell, reconMap);
|
||||
if (ourCell != null) {
|
||||
while (columnNames.size() < c + 1) {
|
||||
columnNames.add(null);
|
||||
}
|
||||
if (columnNames.get(c) == null) {
|
||||
setUnduplicatedColumnName("Column", columnNames, c, columnNameSet, columnRootNameToIndex);
|
||||
}
|
||||
|
||||
newRow.setCell(c, ourCell);
|
||||
hasData = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasData) {
|
||||
rowsWithData++;
|
||||
|
||||
if (skip <= 0 || rowsWithData > skip) {
|
||||
project.rows.add(newRow);
|
||||
project.columnModel.setMaxCellIndex(newRow.cells.size());
|
||||
|
||||
if (limit > 0 && project.rows.size() >= limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Create columns
|
||||
*/
|
||||
for (int c = 0; c < columnNames.size(); c++) {
|
||||
String name = columnNames.get(c);
|
||||
if (name != null) {
|
||||
Column column = new Column(c, name);
|
||||
project.columnModel.columns.add(column);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void setUnduplicatedColumnName(
|
||||
String rootName, List<String> columnNames, int index, Set<String> columnNameSet, Map<String, Integer> columnRootNameToIndex) {
|
||||
if (columnNameSet.contains(rootName)) {
|
||||
int startIndex = columnRootNameToIndex.containsKey(rootName) ? columnRootNameToIndex.get(rootName) : 2;
|
||||
while (true) {
|
||||
String name = rootName + " " + startIndex;
|
||||
if (columnNameSet.contains(name)) {
|
||||
startIndex++;
|
||||
} else {
|
||||
columnNames.set(index, name);
|
||||
columnNameSet.add(name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
columnRootNameToIndex.put(rootName, startIndex + 1);
|
||||
} else {
|
||||
columnNames.set(index, rootName);
|
||||
columnNameSet.add(rootName);
|
||||
}
|
||||
}
|
||||
|
||||
protected Serializable extractCell(org.apache.poi.ss.usermodel.Cell cell) {
|
||||
static protected Serializable extractCell(org.apache.poi.ss.usermodel.Cell cell) {
|
||||
int cellType = cell.getCellType();
|
||||
if (cellType == org.apache.poi.ss.usermodel.Cell.CELL_TYPE_FORMULA) {
|
||||
cellType = cell.getCachedFormulaResultType();
|
||||
@ -259,7 +226,7 @@ public class ExcelImporter implements StreamImporter {
|
||||
return value;
|
||||
}
|
||||
|
||||
protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
|
||||
static protected Cell extractCell(org.apache.poi.ss.usermodel.Cell cell, Map<String, Recon> reconMap) {
|
||||
Serializable value = extractCell(cell);
|
||||
|
||||
if (value != null) {
|
||||
@ -312,33 +279,4 @@ public class ExcelImporter implements StreamImporter {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
if ("application/msexcel".equals(contentType) ||
|
||||
"application/x-msexcel".equals(contentType) ||
|
||||
"application/x-ms-excel".equals(contentType) ||
|
||||
"application/vnd.ms-excel".equals(contentType) ||
|
||||
"application/x-excel".equals(contentType) ||
|
||||
"application/xls".equals(contentType)) {
|
||||
this._xmlBased = false;
|
||||
return true;
|
||||
} else if("application/x-xls".equals(contentType)) {
|
||||
this._xmlBased = true;
|
||||
return true;
|
||||
}
|
||||
} else if (fileName != null) {
|
||||
fileName = fileName.toLowerCase();
|
||||
if (fileName.endsWith(".xls")) {
|
||||
this._xmlBased = false;
|
||||
return true;
|
||||
} else if (fileName.endsWith(".xlsx")) {
|
||||
this._xmlBased = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -1,177 +1,105 @@
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.expr.ExpressionUtils;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class FixedWidthImporter implements ReaderImporter, StreamImporter { //TODO this class is almost an exact copy of TsvCsvImporter. Could we combine the two, or combine common functions into a common abstract supertype?
|
||||
|
||||
final static Logger logger = LoggerFactory.getLogger("FixedWidthImporter");
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
|
||||
//filter out tree structure data
|
||||
if("application/json".equals(contentType)||
|
||||
"text/json".equals(contentType)||
|
||||
"application/xml".equals(contentType) ||
|
||||
"text/xml".equals(contentType) ||
|
||||
"application/rss+xml".equals(contentType) ||
|
||||
"application/atom+xml".equals(contentType) ||
|
||||
"application/rdf+xml".equals(contentType)) //TODO add more tree data types.
|
||||
return false;
|
||||
|
||||
return
|
||||
"text/plain".equals(contentType)
|
||||
|| "text/fixed-width".equals(contentType); //FIXME Is text/fixed-width a valid contentType?
|
||||
}
|
||||
return false;
|
||||
public class FixedWidthImporter extends TabularImportingParserBase {
|
||||
public FixedWidthImporter() {
|
||||
super(false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void read(InputStream inputStream, Project project,
|
||||
ProjectMetadata metadata, Properties options)
|
||||
throws ImportException {
|
||||
read(new InputStreamReader(inputStream), project, metadata, options);
|
||||
public JSONObject createParserUIInitializationData(
|
||||
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||
JSONArray columnWidths = new JSONArray();
|
||||
|
||||
JSONObject firstFileRecord = fileRecords.get(0);
|
||||
String encoding = ImportingUtilities.getEncoding(firstFileRecord);
|
||||
String location = JSONUtilities.getString(firstFileRecord, "location", null);
|
||||
if (location != null) {
|
||||
File file = new File(job.getRawDataDir(), location);
|
||||
int[] columnWidthsA = guessColumnWidths(file, encoding);
|
||||
if (columnWidthsA != null) {
|
||||
for (int w : columnWidthsA) {
|
||||
JSONUtilities.append(columnWidths, w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
JSONUtilities.safePut(options, "lineSeparator", "\n");
|
||||
JSONUtilities.safePut(options, "headerLines", 0);
|
||||
JSONUtilities.safePut(options, "columnWidths", columnWidths);
|
||||
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void read(Reader reader, Project project, ProjectMetadata metadata,
|
||||
Properties options) throws ImportException {
|
||||
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
|
||||
String columnWidths = options.getProperty("fixed-column-widths");
|
||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
||||
|
||||
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
||||
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
||||
boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true);
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(reader);
|
||||
|
||||
|
||||
read(lnReader, project, columnWidths,
|
||||
limit, skip, ignoreLines, headerLines,
|
||||
guessValueType, splitIntoColumns
|
||||
);
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
Reader reader,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n");
|
||||
final int[] columnWidths = JSONUtilities.getIntArray(options, "columnWidths");
|
||||
|
||||
final List<Object> columnNames;
|
||||
if (options.has("columnNames")) {
|
||||
columnNames = new ArrayList<Object>();
|
||||
String[] strings = JSONUtilities.getStringArray(options, "columnNames");
|
||||
for (String s : strings) {
|
||||
columnNames.add(s);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param lnReader
|
||||
* LineNumberReader used to read file or string contents
|
||||
* @param project
|
||||
* The project into which the parsed data will be added
|
||||
* @param columnWidths
|
||||
* Expects a comma separated string of integers which indicate the number of characters in each line
|
||||
* @param limit
|
||||
* The maximum number of rows of data to import
|
||||
* @param skip
|
||||
* The number of initial data rows to skip
|
||||
* @param ignoreLines
|
||||
* The number of initial lines within the data source which should be ignored entirely
|
||||
* @param headerLines
|
||||
* The number of lines in the data source which describe each column
|
||||
* @param guessValueType
|
||||
* Whether the parser should try and guess the type of the value being parsed
|
||||
* @param splitIntoColumns
|
||||
* Whether the parser should try and split the data source into columns
|
||||
* @throws IOException
|
||||
*/
|
||||
public void read(LineNumberReader lnReader, Project project,
|
||||
String sep, int limit, int skip, int ignoreLines,
|
||||
int headerLines, boolean guessValueType, boolean splitIntoColumns) throws ImportException{
|
||||
|
||||
int[] columnWidths = null;
|
||||
|
||||
columnWidths = getColumnWidthsFromString( sep );
|
||||
|
||||
if(columnWidths.length < 2)
|
||||
splitIntoColumns = false;
|
||||
|
||||
List<String> columnNames = new ArrayList<String>();
|
||||
String line = null;
|
||||
int rowsWithData = 0;
|
||||
|
||||
try {
|
||||
while ((line = lnReader.readLine()) != null) {
|
||||
if (ignoreLines > 0) {
|
||||
ignoreLines--;
|
||||
continue;
|
||||
} else if (StringUtils.isBlank(line)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (headerLines > 0) {
|
||||
//column headers
|
||||
headerLines--;
|
||||
|
||||
ArrayList<String> cells = getCells(line, columnWidths, splitIntoColumns);
|
||||
|
||||
for (int c = 0; c < cells.size(); c++) {
|
||||
String cell = cells.get(c).trim();
|
||||
//add column even if cell is blank
|
||||
ImporterUtilities.appendColumnName(columnNames, c, cell);
|
||||
}
|
||||
JSONUtilities.safePut(options, "headerLines", 1);
|
||||
} else {
|
||||
//data
|
||||
Row row = new Row(columnNames.size());
|
||||
columnNames = null;
|
||||
}
|
||||
|
||||
ArrayList<String> cells = getCells(line, columnWidths, splitIntoColumns);
|
||||
final LineNumberReader lnReader = new LineNumberReader(reader);
|
||||
|
||||
if( cells != null && cells.size() > 0 )
|
||||
rowsWithData++;
|
||||
TableDataReader dataReader = new TableDataReader() {
|
||||
boolean usedColumnNames = false;
|
||||
|
||||
if (skip <=0 || rowsWithData > skip){
|
||||
//add parsed data to row
|
||||
for(String s : cells){
|
||||
if (ExpressionUtils.isNonBlankData(s)) {
|
||||
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s) : s;
|
||||
row.cells.add(new Cell(value, null));
|
||||
@Override
|
||||
public List<Object> getNextRowOfCells() throws IOException {
|
||||
if (columnNames != null && !usedColumnNames) {
|
||||
usedColumnNames = true;
|
||||
return columnNames;
|
||||
} else {
|
||||
row.cells.add(null);
|
||||
String line = lnReader.readLine();
|
||||
if (line == null) {
|
||||
return null;
|
||||
} else {
|
||||
return getCells(line, columnWidths);
|
||||
}
|
||||
}
|
||||
project.rows.add(row);
|
||||
project.columnModel.setMaxCellIndex(row.cells.size());
|
||||
|
||||
ImporterUtilities.ensureColumnsInRowExist(columnNames, row);
|
||||
|
||||
if (limit > 0 && project.rows.size() >= limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new ImportException("The fixed width importer could not read the next line", e);
|
||||
}
|
||||
|
||||
ImporterUtilities.setupColumns(project, columnNames);
|
||||
}
|
||||
};
|
||||
|
||||
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -181,9 +109,9 @@ public class FixedWidthImporter implements ReaderImporter, StreamImporter { //TO
|
||||
* @param splitIntoColumns
|
||||
* @return
|
||||
*/
|
||||
private ArrayList<String> getCells(String line, int[] widths, boolean splitIntoColumns) {
|
||||
ArrayList<String> cells = new ArrayList<String>();
|
||||
if(splitIntoColumns){
|
||||
static private ArrayList<Object> getCells(String line, int[] widths) {
|
||||
ArrayList<Object> cells = new ArrayList<Object>();
|
||||
|
||||
int columnStartCursor = 0;
|
||||
int columnEndCursor = 0;
|
||||
for (int width : widths) {
|
||||
@ -194,8 +122,9 @@ public class FixedWidthImporter implements ReaderImporter, StreamImporter { //TO
|
||||
|
||||
columnEndCursor = columnStartCursor + width;
|
||||
|
||||
if(columnEndCursor > line.length())
|
||||
if (columnEndCursor > line.length()) {
|
||||
columnEndCursor = line.length();
|
||||
}
|
||||
if (columnEndCursor <= columnStartCursor) {
|
||||
cells.add(null); //FIXME is adding a null cell (to represent no data, or a zero width column) OK?
|
||||
continue;
|
||||
@ -205,36 +134,83 @@ public class FixedWidthImporter implements ReaderImporter, StreamImporter { //TO
|
||||
|
||||
columnStartCursor = columnEndCursor;
|
||||
}
|
||||
}else{
|
||||
cells.add(line);
|
||||
|
||||
// Residual text
|
||||
if (columnStartCursor < line.length()) {
|
||||
cells.add(line.substring(columnStartCursor));
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the expected string of comma separated integers into an array of integers.
|
||||
* Also performs a basic sanity check on the provided data.
|
||||
*
|
||||
* @param sep
|
||||
* A comma separated string of integers. e.g. 4,2,5,22,19
|
||||
* @return
|
||||
* @throws ServletException
|
||||
*/
|
||||
public int[] getColumnWidthsFromString(String sep) throws ImportException {
|
||||
String[] splitSep = Pattern.compile(",").split(sep);
|
||||
|
||||
int[] widths = new int[splitSep.length];
|
||||
for(int i = 0; i < splitSep.length; i++){
|
||||
static public int[] guessColumnWidths(File file, String encoding) {
|
||||
try {
|
||||
int parsedInt = Integer.parseInt(splitSep[i]);
|
||||
if( parsedInt < 0 )
|
||||
throw new ImportException("A column cannot have a width of less than zero", null);
|
||||
widths[i] = parsedInt;
|
||||
}catch(NumberFormatException e){
|
||||
throw new ImportException("For a fixed column width import, the column widths must be given as a comma separated string of integers. e.g. 1,3,5,22,19", e);
|
||||
InputStream is = new FileInputStream(file);
|
||||
try {
|
||||
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
|
||||
LineNumberReader lineNumberReader = new LineNumberReader(reader);
|
||||
|
||||
int[] counts = null;
|
||||
int totalBytes = 0;
|
||||
int lineCount = 0;
|
||||
String s;
|
||||
while (totalBytes < 64 * 1024 &&
|
||||
lineCount < 100 &&
|
||||
(s = lineNumberReader.readLine()) != null) {
|
||||
|
||||
totalBytes += s.length() + 1; // count the new line character
|
||||
if (s.length() == 0) {
|
||||
continue;
|
||||
}
|
||||
lineCount++;
|
||||
|
||||
if (counts == null) {
|
||||
counts = new int[s.length()];
|
||||
for (int c = 0; c < counts.length; c++) {
|
||||
counts[c] = 0;
|
||||
}
|
||||
return widths;
|
||||
}
|
||||
|
||||
for (int c = 0; c < counts.length && c < s.length(); c++) {
|
||||
char ch = s.charAt(c);
|
||||
if (ch == ' ') {
|
||||
counts[c]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (counts != null) {
|
||||
List<Integer> widths = new ArrayList<Integer>();
|
||||
|
||||
int startIndex = 0;
|
||||
for (int c = 0; c < counts.length; c++) {
|
||||
int count = counts[c];
|
||||
if (count == lineCount && c > startIndex) {
|
||||
widths.add(c - startIndex + 1);
|
||||
startIndex = c + 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = widths.size() - 1; i > 0; i--) {
|
||||
if (widths.get(i) == 1) {
|
||||
widths.remove(i);
|
||||
widths.set(i - 1, widths.get(i - 1) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
int[] widthA = new int[widths.size()];
|
||||
for (int i = 0; i < widthA.length; i++) {
|
||||
widthA[i] = widths.get(i);
|
||||
}
|
||||
return widthA;
|
||||
}
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -38,11 +38,9 @@ package com.google.refine.importers;
|
||||
* indicating the underlying cause of the problem.
|
||||
*/
|
||||
public class ImportException extends Exception {
|
||||
|
||||
private static final long serialVersionUID = 7077314805989174181L;
|
||||
|
||||
public ImportException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,138 +0,0 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.net.URL;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
abstract public class ImporterRegistry {
|
||||
final static Logger logger = LoggerFactory.getLogger("importer-registry");
|
||||
|
||||
static final private Map<String, Importer> importers = new HashMap<String, Importer>();
|
||||
|
||||
private static final String[][] importerNames = {
|
||||
{"ExcelImporter", "com.google.refine.importers.ExcelImporter"},
|
||||
{"XmlImporter", "com.google.refine.importers.XmlImporter"},
|
||||
{"RdfTripleImporter", "com.google.refine.importers.RdfTripleImporter"},
|
||||
{"MarcImporter", "com.google.refine.importers.MarcImporter"},
|
||||
{"TsvCsvImporter", "com.google.refine.importers.TsvCsvImporter"},
|
||||
{"JsonImporter", "com.google.refine.importers.JsonImporter"},
|
||||
{"FixedWidthImporter", "com.google.refine.importers.FixedWidthImporter"}
|
||||
};
|
||||
|
||||
static {
|
||||
registerImporters(importerNames);
|
||||
}
|
||||
|
||||
static public boolean registerImporters(String[][] importers) {
|
||||
boolean status = true;
|
||||
for (String[] importer : importerNames) {
|
||||
String importerName = importer[0];
|
||||
String className = importer[1];
|
||||
logger.debug("Loading command " + importerName + " class: " + className);
|
||||
Importer cmd;
|
||||
try {
|
||||
// TODO: May need to use the servlet container's class loader here
|
||||
cmd = (Importer) Class.forName(className).newInstance();
|
||||
} catch (InstantiationException e) {
|
||||
logger.error("Failed to load importer class " + className, e);
|
||||
status = false;
|
||||
continue;
|
||||
} catch (IllegalAccessException e) {
|
||||
logger.error("Failed to load importer class " + className, e);
|
||||
status = false;
|
||||
continue;
|
||||
} catch (ClassNotFoundException e) {
|
||||
logger.error("Failed to load importer class " + className, e);
|
||||
status = false;
|
||||
continue;
|
||||
}
|
||||
status |= registerImporter(importerName, cmd);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a single importer.
|
||||
*
|
||||
* @param name importer verb for importer
|
||||
* @param importerObject object implementing the importer
|
||||
*
|
||||
* @return true if importer was loaded and registered successfully
|
||||
*/
|
||||
static public boolean registerImporter(String name, Importer importerObject) {
|
||||
if (importers.containsKey(name)) {
|
||||
return false;
|
||||
}
|
||||
importers.put(name, importerObject);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Currently only for test purposes
|
||||
static protected boolean unregisterImporter(String verb) {
|
||||
return importers.remove(verb) != null;
|
||||
}
|
||||
|
||||
static public Importer guessImporter(String contentType, String fileName, boolean provideDefault) {
|
||||
for (Importer i : importers.values()){
|
||||
if(i.canImportData(contentType, fileName)){
|
||||
return i;
|
||||
}
|
||||
}
|
||||
if (provideDefault) {
|
||||
return new TsvCsvImporter(); // default
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static public Importer guessImporter(String contentType, String filename) {
|
||||
return guessImporter(contentType, filename, true);
|
||||
}
|
||||
|
||||
static public Importer guessUrlImporter(URL url) {
|
||||
for (Importer importer : importers.values()){
|
||||
if (importer instanceof UrlImporter
|
||||
&& ((UrlImporter) importer).canImportData(url)) {
|
||||
return importer;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
@ -33,15 +33,25 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.ModelException;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.util.TrackingInputStream;
|
||||
|
||||
public class ImporterUtilities {
|
||||
|
||||
@ -118,6 +128,33 @@ public class ImporterUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
static public Column getOrAllocateColumn(Project project, List<String> currentFileColumnNames, int index) {
|
||||
if (index < currentFileColumnNames.size()) {
|
||||
return project.columnModel.getColumnByName(currentFileColumnNames.get(index));
|
||||
} else if (index == currentFileColumnNames.size()) {
|
||||
String prefix = "Column ";
|
||||
int i = 1;
|
||||
while (true) {
|
||||
String columnName = prefix + i;
|
||||
if (project.columnModel.getColumnByName(columnName) != null) {
|
||||
// Already taken name
|
||||
i++;
|
||||
} else {
|
||||
Column column = new Column(project.columnModel.allocateNewCellIndex(), columnName);
|
||||
try {
|
||||
project.columnModel.addColumn(project.columnModel.columns.size(), column, false);
|
||||
} catch (ModelException e) {
|
||||
// Ignore: shouldn't get in here since we just checked for duplicate names.
|
||||
}
|
||||
currentFileColumnNames.add(columnName);
|
||||
return column;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new RuntimeException("Unexpected code path");
|
||||
}
|
||||
}
|
||||
|
||||
static public void setupColumns(Project project, List<String> columnNames) {
|
||||
Map<String, Integer> nameToIndex = new HashMap<String, Integer>();
|
||||
for (int c = 0; c < columnNames.size(); c++) {
|
||||
@ -125,7 +162,8 @@ public class ImporterUtilities {
|
||||
if (cell.isEmpty()) {
|
||||
cell = "Column";
|
||||
} else if (cell.startsWith("\"") && cell.endsWith("\"")) {
|
||||
cell = cell.substring(1, cell.length() - 1).trim(); //FIXME is trimming quotation marks appropriate?
|
||||
// FIXME: is trimming quotation marks appropriate?
|
||||
cell = cell.substring(1, cell.length() - 1).trim();
|
||||
}
|
||||
|
||||
if (nameToIndex.containsKey(cell)) {
|
||||
@ -137,10 +175,74 @@ public class ImporterUtilities {
|
||||
nameToIndex.put(cell, 2);
|
||||
}
|
||||
|
||||
Column column = new Column(c, cell);
|
||||
|
||||
project.columnModel.columns.add(column);
|
||||
columnNames.set(c, cell);
|
||||
if (project.columnModel.getColumnByName(cell) == null) {
|
||||
Column column = new Column(project.columnModel.allocateNewCellIndex(), cell);
|
||||
try {
|
||||
project.columnModel.addColumn(project.columnModel.columns.size(), column, false);
|
||||
} catch (ModelException e) {
|
||||
// Ignore: shouldn't get in here since we just checked for duplicate names.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static public interface MultiFileReadingProgress {
|
||||
public void startFile(String fileSource);
|
||||
public void readingFile(String fileSource, long bytesRead);
|
||||
public void endFile(String fileSource, long bytesRead);
|
||||
}
|
||||
|
||||
static public MultiFileReadingProgress createMultiFileReadingProgress(
|
||||
final ImportingJob job, List<JSONObject> fileRecords) {
|
||||
long totalSize = 0;
|
||||
for (JSONObject fileRecord : fileRecords) {
|
||||
File file = ImportingUtilities.getFile(job, fileRecord);
|
||||
totalSize += file.length();
|
||||
}
|
||||
|
||||
final long totalSize2 = totalSize;
|
||||
return new MultiFileReadingProgress() {
|
||||
long totalBytesRead = 0;
|
||||
|
||||
void setProgress(String fileSource, long bytesRead) {
|
||||
ImportingUtilities.setCreatingProjectProgress(
|
||||
job,
|
||||
"Reading " + fileSource,
|
||||
(int) (100 * (totalBytesRead + bytesRead) / totalSize2));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startFile(String fileSource) {
|
||||
setProgress(fileSource, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readingFile(String fileSource, long bytesRead) {
|
||||
setProgress(fileSource, bytesRead);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endFile(String fileSource, long bytesRead) {
|
||||
totalBytesRead += bytesRead;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static public InputStream openAndTrackFile(
|
||||
final String fileSource,
|
||||
final File file,
|
||||
final MultiFileReadingProgress progress) throws FileNotFoundException {
|
||||
InputStream inputStream = new FileInputStream(file);
|
||||
return progress == null ? inputStream : new TrackingInputStream(inputStream) {
|
||||
@Override
|
||||
protected long track(long bytesRead) {
|
||||
long l = super.track(bytesRead);
|
||||
|
||||
progress.readingFile(fileSource, this.bytesRead);
|
||||
|
||||
return l;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
138
main/src/com/google/refine/importers/ImportingParserBase.java
Normal file
138
main/src/com/google/refine/importers/ImportingParserBase.java
Normal file
@ -0,0 +1,138 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang.NotImplementedException;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingParser;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Project;
|
||||
|
||||
abstract public class ImportingParserBase implements ImportingParser {
|
||||
final protected boolean useInputStream;
|
||||
|
||||
protected ImportingParserBase(boolean useInputStream) {
|
||||
this.useInputStream = useInputStream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parse(Project project, ProjectMetadata metadata,
|
||||
final ImportingJob job, List<JSONObject> fileRecords, String format,
|
||||
int limit, JSONObject options, List<Exception> exceptions) {
|
||||
MultiFileReadingProgress progress = ImporterUtilities.createMultiFileReadingProgress(job, fileRecords);
|
||||
for (JSONObject fileRecord : fileRecords) {
|
||||
if (job.canceled) {
|
||||
break;
|
||||
}
|
||||
|
||||
try {
|
||||
parseOneFile(project, metadata, job, fileRecord, limit, options, exceptions, progress);
|
||||
} catch (IOException e) {
|
||||
exceptions.add(e);
|
||||
}
|
||||
|
||||
if (limit > 0 && project.rows.size() >= limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
JSONObject fileRecord,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions,
|
||||
final MultiFileReadingProgress progress
|
||||
) throws IOException {
|
||||
final File file = ImportingUtilities.getFile(job, fileRecord);
|
||||
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
||||
|
||||
progress.startFile(fileSource);
|
||||
try {
|
||||
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
|
||||
try {
|
||||
if (useInputStream) {
|
||||
parseOneFile(project, metadata, job, fileSource, inputStream, limit, options, exceptions);
|
||||
} else {
|
||||
Reader reader = ImportingUtilities.getReaderFromStream(inputStream, fileRecord);
|
||||
|
||||
parseOneFile(project, metadata, job, fileSource, reader, limit, options, exceptions);
|
||||
}
|
||||
} finally {
|
||||
inputStream.close();
|
||||
}
|
||||
} finally {
|
||||
progress.endFile(fileSource, file.length());
|
||||
}
|
||||
}
|
||||
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
Reader reader,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
InputStream inputStream,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
}
|
@ -33,95 +33,328 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.util.Properties;
|
||||
import java.io.Reader;
|
||||
import java.util.List;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
|
||||
import org.codehaus.jackson.JsonFactory;
|
||||
import org.codehaus.jackson.JsonParseException;
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
|
||||
import com.google.refine.importers.parsers.JSONParser;
|
||||
import com.google.refine.importers.parsers.TreeParser;
|
||||
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||
import com.google.refine.importers.tree.TreeReader;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class JsonImporter implements StreamImporter{
|
||||
final static Logger logger = LoggerFactory.getLogger("JsonImporter");
|
||||
public class JsonImporter extends TreeImportingParserBase {
|
||||
public JsonImporter() {
|
||||
super(false);
|
||||
}
|
||||
|
||||
public static final int BUFFER_SIZE = 64 * 1024;
|
||||
static private class PreviewParsingState {
|
||||
int tokenCount;
|
||||
}
|
||||
|
||||
final static private int PREVIEW_PARSING_LIMIT = 1000;
|
||||
|
||||
@Override
|
||||
public void read(InputStream inputStream, Project project,
|
||||
ProjectMetadata metadata, Properties options)
|
||||
throws ImportException {
|
||||
//FIXME the below is a close duplicate of the XmlImporter code.
|
||||
//Should wrap a lot of the below into methods and put them in a common superclass
|
||||
logger.trace("JsonImporter.read");
|
||||
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
||||
|
||||
String[] recordPath = null;
|
||||
{
|
||||
byte[] buffer = new byte[BUFFER_SIZE];
|
||||
int bytes_read = 0;
|
||||
try {//fill the buffer with data
|
||||
while (bytes_read < BUFFER_SIZE) {
|
||||
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
|
||||
if (c == -1) break;
|
||||
bytes_read +=c ;
|
||||
}
|
||||
pis.unread(buffer, 0, bytes_read);
|
||||
} catch (IOException e) {
|
||||
throw new ImportException("Read error",e);
|
||||
}
|
||||
|
||||
InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read);
|
||||
TreeParser parser = new JSONParser(iStream);
|
||||
if (options.containsKey("importer-record-tag")) {
|
||||
public JSONObject createParserUIInitializationData(
|
||||
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||
try {
|
||||
recordPath = XmlImportUtilities.detectPathFromTag(
|
||||
parser,
|
||||
options.getProperty("importer-record-tag"));
|
||||
JSONObject firstFileRecord = fileRecords.get(0);
|
||||
File file = ImportingUtilities.getFile(job, firstFileRecord);
|
||||
InputStream is = new FileInputStream(file);
|
||||
try {
|
||||
JsonFactory factory = new JsonFactory();
|
||||
JsonParser parser = factory.createJsonParser(is);
|
||||
|
||||
PreviewParsingState state = new PreviewParsingState();
|
||||
Object rootValue = parseForPreview(parser, state);
|
||||
if (rootValue != null) {
|
||||
JSONUtilities.safePut(options, "dom", rootValue);
|
||||
}
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// Ignore
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
final static private Object parseForPreview(JsonParser parser, PreviewParsingState state, JsonToken token)
|
||||
throws JsonParseException, IOException {
|
||||
if (token != null) {
|
||||
switch (token) {
|
||||
case START_ARRAY:
|
||||
return parseArrayForPreview(parser, state);
|
||||
case START_OBJECT:
|
||||
return parseObjectForPreview(parser, state);
|
||||
case VALUE_STRING:
|
||||
return parser.getText();
|
||||
case VALUE_NUMBER_INT:
|
||||
return Integer.valueOf(parser.getIntValue());
|
||||
case VALUE_NUMBER_FLOAT:
|
||||
return Float.valueOf(parser.getFloatValue());
|
||||
case VALUE_TRUE:
|
||||
return Boolean.TRUE;
|
||||
case VALUE_FALSE:
|
||||
return Boolean.FALSE;
|
||||
case VALUE_NULL:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
final static private Object parseForPreview(JsonParser parser, PreviewParsingState state) {
|
||||
try {
|
||||
JsonToken token = parser.nextToken();
|
||||
state.tokenCount++;
|
||||
return parseForPreview(parser, state, token);
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
final static private JSONObject parseObjectForPreview(JsonParser parser, PreviewParsingState state) {
|
||||
JSONObject result = new JSONObject();
|
||||
loop:while (state.tokenCount < PREVIEW_PARSING_LIMIT) {
|
||||
try {
|
||||
JsonToken token = parser.nextToken();
|
||||
if (token == null) {
|
||||
break;
|
||||
}
|
||||
state.tokenCount++;
|
||||
|
||||
switch (token) {
|
||||
case FIELD_NAME:
|
||||
String fieldName = parser.getText();
|
||||
Object fieldValue = parseForPreview(parser, state);
|
||||
JSONUtilities.safePut(result, fieldName, fieldValue);
|
||||
break;
|
||||
case END_OBJECT:
|
||||
break loop;
|
||||
default:
|
||||
break loop;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
final static private JSONArray parseArrayForPreview(JsonParser parser, PreviewParsingState state) {
|
||||
JSONArray result = new JSONArray();
|
||||
loop:while (state.tokenCount < PREVIEW_PARSING_LIMIT) {
|
||||
try {
|
||||
JsonToken token = parser.nextToken();
|
||||
if (token == null) {
|
||||
break;
|
||||
}
|
||||
state.tokenCount++;
|
||||
|
||||
switch (token) {
|
||||
case END_ARRAY:
|
||||
break loop;
|
||||
default:
|
||||
Object element = parseForPreview(parser, state, token);
|
||||
JSONUtilities.append(result, element);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||
ImportingJob job, String fileSource, Reader reader,
|
||||
ImportColumnGroup rootColumnGroup, int limit, JSONObject options, List<Exception> exceptions) {
|
||||
|
||||
parseOneFile(project, metadata, job, fileSource,
|
||||
new JSONTreeReader(reader), rootColumnGroup, limit, options, exceptions);
|
||||
}
|
||||
|
||||
static public class JSONTreeReader implements TreeReader {
|
||||
final static Logger logger = LoggerFactory.getLogger("JsonParser");
|
||||
|
||||
JsonFactory factory = new JsonFactory();
|
||||
JsonParser parser = null;
|
||||
|
||||
//The following is a workaround for inconsistent Jackson JsonParser
|
||||
Boolean lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||
Boolean thisTokenIsAFieldName = false;
|
||||
String lastFieldName = null;
|
||||
//end of workaround
|
||||
|
||||
public JSONTreeReader(Reader reader) {
|
||||
try {
|
||||
parser = factory.createJsonParser(reader);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public int getAttributeCount() {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public String getAttributeLocalName(int index) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public String getAttributePrefix(int index) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public String getAttributeValue(int index) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token current() throws ServletException {
|
||||
return this.mapToToken(parser.getCurrentToken());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getFieldName() throws ServletException{
|
||||
try {
|
||||
String text = parser.getCurrentName();
|
||||
|
||||
//The following is a workaround for inconsistent Jackson JsonParser
|
||||
if(text == null){
|
||||
if(this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity)
|
||||
text = this.lastFieldName;
|
||||
else
|
||||
text = "__anonymous__";
|
||||
}
|
||||
//end of workaround
|
||||
|
||||
return text;
|
||||
} catch (Exception e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. Json does not have prefixes
|
||||
*/
|
||||
@Override
|
||||
public String getPrefix() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getFieldValue() throws ServletException {
|
||||
try {
|
||||
return parser.getText();
|
||||
} catch (Exception e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() throws ServletException {
|
||||
return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?)
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token next() throws ServletException {
|
||||
JsonToken next;
|
||||
try {
|
||||
next = parser.nextToken();
|
||||
} catch (JsonParseException e) {
|
||||
throw new ServletException(e);
|
||||
} catch (IOException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
|
||||
if(next == null)
|
||||
throw new ServletException("No more Json Tokens in stream");
|
||||
|
||||
//The following is a workaround for inconsistent Jackson JsonParser
|
||||
if(next == JsonToken.FIELD_NAME){
|
||||
try {
|
||||
this.thisTokenIsAFieldName = true;
|
||||
this.lastFieldName = parser.getCurrentName();
|
||||
} catch (Exception e) {
|
||||
//silent
|
||||
// e.printStackTrace();
|
||||
}
|
||||
}else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){
|
||||
if(this.thisTokenIsAFieldName){
|
||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = true;
|
||||
this.thisTokenIsAFieldName = false;
|
||||
}else{
|
||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||
this.lastFieldName = null;
|
||||
}
|
||||
}else{
|
||||
recordPath = XmlImportUtilities.detectRecordElement(parser);
|
||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||
this.lastFieldName = null;
|
||||
this.thisTokenIsAFieldName = false;
|
||||
}
|
||||
//end of workaround
|
||||
|
||||
return mapToToken(next);
|
||||
}
|
||||
|
||||
if (recordPath == null)
|
||||
return;
|
||||
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
||||
XmlImportUtilities.importTreeData(new JSONParser(pis), project, recordPath, rootColumnGroup);
|
||||
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
||||
|
||||
project.columnModel.update();
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
|
||||
if("application/json".equals(contentType) ||
|
||||
"text/json".equals(contentType)) {
|
||||
return true;
|
||||
}
|
||||
} else if (fileName != null) {
|
||||
fileName = fileName.toLowerCase();
|
||||
if (
|
||||
fileName.endsWith(".json") ||
|
||||
fileName.endsWith(".js")
|
||||
) {
|
||||
return true;
|
||||
protected Token mapToToken(JsonToken token){
|
||||
switch(token){
|
||||
case START_ARRAY: return Token.StartEntity;
|
||||
case END_ARRAY: return Token.EndEntity;
|
||||
case START_OBJECT: return Token.StartEntity;
|
||||
case END_OBJECT: return Token.EndEntity;
|
||||
case VALUE_STRING: return Token.Value;
|
||||
case FIELD_NAME: return Token.Ignorable; //returned by the getLocalName function()
|
||||
case VALUE_NUMBER_INT: return Token.Value;
|
||||
//Json does not have START_DOCUMENT token type (so ignored as default)
|
||||
//Json does not have END_DOCUMENT token type (so ignored as default)
|
||||
case VALUE_TRUE : return Token.Value;
|
||||
case VALUE_NUMBER_FLOAT : return Token.Value;
|
||||
case VALUE_NULL : return Token.Value;
|
||||
case VALUE_FALSE : return Token.Value;
|
||||
case VALUE_EMBEDDED_OBJECT : return Token.Ignorable;
|
||||
case NOT_AVAILABLE : return Token.Ignorable;
|
||||
default: return Token.Ignorable;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,21 @@
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import com.google.refine.importing.FormatGuesser;
|
||||
|
||||
public class LineBasedFormatGuesser implements FormatGuesser {
|
||||
|
||||
@Override
|
||||
public String guess(File file, String encoding, String seedFormat) {
|
||||
SeparatorBasedImporter.Separator sep = SeparatorBasedImporter.guessSeparator(file, encoding);
|
||||
if (sep != null) {
|
||||
return "text/line-based/*sv";
|
||||
}
|
||||
int[] widths = FixedWidthImporter.guessColumnWidths(file, encoding);
|
||||
if (widths != null) {
|
||||
return "text/line-based/fixed-width";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
105
main/src/com/google/refine/importers/LineBasedImporter.java
Normal file
105
main/src/com/google/refine/importers/LineBasedImporter.java
Normal file
@ -0,0 +1,105 @@
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class LineBasedImporter extends TabularImportingParserBase {
|
||||
public LineBasedImporter() {
|
||||
super(false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject createParserUIInitializationData(
|
||||
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||
|
||||
JSONUtilities.safePut(options, "lineSeparator", "\n");
|
||||
JSONUtilities.safePut(options, "linesPerRow", 1);
|
||||
JSONUtilities.safePut(options, "headerLines", 0);
|
||||
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
Reader reader,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
final int linesPerRow = JSONUtilities.getInt(options, "linesPerRow", 1);
|
||||
|
||||
final List<Object> columnNames;
|
||||
if (options.has("columnNames")) {
|
||||
columnNames = new ArrayList<Object>();
|
||||
String[] strings = JSONUtilities.getStringArray(options, "columnNames");
|
||||
for (String s : strings) {
|
||||
columnNames.add(s);
|
||||
}
|
||||
JSONUtilities.safePut(options, "headerLines", 1);
|
||||
} else {
|
||||
columnNames = null;
|
||||
JSONUtilities.safePut(options, "headerLines", 0);
|
||||
}
|
||||
|
||||
final LineNumberReader lnReader = new LineNumberReader(reader);
|
||||
|
||||
try {
|
||||
int skip = JSONUtilities.getInt(options, "ignoreLines", -1);
|
||||
while (skip > 0) {
|
||||
lnReader.readLine();
|
||||
skip--;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
JSONUtilities.safePut(options, "ignoreLines", -1);
|
||||
|
||||
TableDataReader dataReader = new TableDataReader() {
|
||||
boolean usedColumnNames = false;
|
||||
|
||||
@Override
|
||||
public List<Object> getNextRowOfCells() throws IOException {
|
||||
if (columnNames != null && !usedColumnNames) {
|
||||
usedColumnNames = true;
|
||||
return columnNames;
|
||||
} else {
|
||||
List<Object> cells = null;
|
||||
for (int i = 0; i < linesPerRow; i++) {
|
||||
String line = lnReader.readLine();
|
||||
if (i == 0) {
|
||||
if (line == null) {
|
||||
return null;
|
||||
} else {
|
||||
cells = new ArrayList<Object>(linesPerRow);
|
||||
cells.add(line);
|
||||
}
|
||||
} else if (line != null) {
|
||||
cells.add(line);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||
}
|
||||
}
|
@ -40,56 +40,44 @@ import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Properties;
|
||||
import java.util.List;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import org.marc4j.MarcPermissiveStreamReader;
|
||||
import org.marc4j.MarcWriter;
|
||||
import org.marc4j.MarcXmlWriter;
|
||||
import org.marc4j.marc.Record;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.model.Project;
|
||||
|
||||
public class MarcImporter implements StreamImporter {
|
||||
|
||||
public class MarcImporter extends XmlImporter {
|
||||
@Override
|
||||
public void read(
|
||||
InputStream inputStream,
|
||||
Project project,
|
||||
ProjectMetadata metadata, Properties options
|
||||
) throws ImportException {
|
||||
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
||||
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
||||
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||
ImportingJob job, String fileSource, InputStream inputStream,
|
||||
ImportColumnGroup rootColumnGroup, int limit, JSONObject options,
|
||||
List<Exception> exceptions) {
|
||||
|
||||
File tempFile;
|
||||
try {
|
||||
tempFile = File.createTempFile("refine-import-", ".marc.xml");
|
||||
} catch (IOException e) {
|
||||
throw new ImportException("Unexpected error creating temp file",e);
|
||||
exceptions.add(new ImportException("Unexpected error creating temp file", e));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
OutputStream os = new FileOutputStream(tempFile);
|
||||
try {
|
||||
MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader(
|
||||
inputStream,
|
||||
true,
|
||||
true
|
||||
);
|
||||
MarcWriter writer = new MarcXmlWriter(os, true);
|
||||
|
||||
int count = 0;
|
||||
MarcPermissiveStreamReader reader = new MarcPermissiveStreamReader(
|
||||
inputStream, true, true);
|
||||
while (reader.hasNext()) {
|
||||
Record record = reader.next();
|
||||
if (skip <= 0) {
|
||||
if (limit == -1 || count < limit) {
|
||||
writer.write(record);
|
||||
count++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
skip--;
|
||||
}
|
||||
}
|
||||
writer.close();
|
||||
} finally {
|
||||
@ -102,7 +90,8 @@ public class MarcImporter implements StreamImporter {
|
||||
|
||||
InputStream is = new FileInputStream(tempFile);
|
||||
try {
|
||||
new XmlImporter().read(is, project, metadata, options);
|
||||
super.parseOneFile(project, metadata, job, fileSource, inputStream,
|
||||
rootColumnGroup, limit, options, exceptions);
|
||||
} finally {
|
||||
try {
|
||||
is.close();
|
||||
@ -111,31 +100,10 @@ public class MarcImporter implements StreamImporter {
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ImportException("Input file not found", e);
|
||||
exceptions.add(new ImportException("Input file not found", e));
|
||||
return;
|
||||
} finally {
|
||||
tempFile.delete();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
|
||||
if ("application/marc".equals(contentType)) {
|
||||
return true;
|
||||
}
|
||||
} else if (fileName != null) {
|
||||
fileName = fileName.toLowerCase();
|
||||
if (
|
||||
fileName.endsWith(".mrc") ||
|
||||
fileName.endsWith(".marc") ||
|
||||
fileName.contains(".mrc.") ||
|
||||
fileName.contains(".marc.")
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -33,64 +33,72 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.commons.lang.NotImplementedException;
|
||||
import org.jrdf.JRDFFactory;
|
||||
import org.jrdf.SortedMemoryJRDFFactory;
|
||||
import org.jrdf.collection.MemMapFactory;
|
||||
import org.jrdf.graph.Graph;
|
||||
import org.jrdf.graph.Triple;
|
||||
import org.jrdf.parser.ParseException;
|
||||
import org.jrdf.parser.StatementHandlerException;
|
||||
import org.jrdf.parser.line.GraphLineParser;
|
||||
import org.jrdf.parser.line.LineHandler;
|
||||
import org.jrdf.parser.ntriples.NTriplesParserFactory;
|
||||
import org.jrdf.util.ClosableIterable;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import static org.jrdf.graph.AnyObjectNode.ANY_OBJECT_NODE;
|
||||
import static org.jrdf.graph.AnyPredicateNode.ANY_PREDICATE_NODE;
|
||||
import static org.jrdf.graph.AnySubjectNode.ANY_SUBJECT_NODE;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.expr.ExpressionUtils;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.ModelException;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class RdfTripleImporter implements ReaderImporter{
|
||||
public class RdfTripleImporter extends ImportingParserBase {
|
||||
private JRDFFactory _jrdfFactory;
|
||||
private NTriplesParserFactory _nTriplesParserFactory;
|
||||
private MemMapFactory _newMapFactory;
|
||||
|
||||
public RdfTripleImporter() {
|
||||
super(false);
|
||||
_jrdfFactory = SortedMemoryJRDFFactory.getFactory();
|
||||
_nTriplesParserFactory = new NTriplesParserFactory();
|
||||
_newMapFactory = new MemMapFactory();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) throws ImportException {
|
||||
String baseUrl = options.getProperty("base-url");
|
||||
public JSONObject createParserUIInitializationData(ImportingJob job,
|
||||
List<JSONObject> fileRecords, String format) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||
ImportingJob job, String fileSource, Reader reader, int limit,
|
||||
JSONObject options, List<Exception> exceptions) {
|
||||
|
||||
String baseUrl = JSONUtilities.getString(options, "baseUrl", "");
|
||||
|
||||
Graph graph = _jrdfFactory.getNewGraph();
|
||||
LineHandler lineHandler = _nTriplesParserFactory.createParser(graph, _newMapFactory);
|
||||
GraphLineParser parser = new GraphLineParser(graph, lineHandler);
|
||||
try {
|
||||
parser.parse(reader, baseUrl); // fills JRDF graph
|
||||
} catch (IOException e) {
|
||||
throw new ImportException("i/o error while parsing RDF",e);
|
||||
} catch (ParseException e) {
|
||||
throw new ImportException("error parsing RDF",e);
|
||||
} catch (StatementHandlerException e) {
|
||||
throw new ImportException("error parsing RDF",e);
|
||||
} catch (Exception e) {
|
||||
exceptions.add(e);
|
||||
return;
|
||||
}
|
||||
|
||||
Map<String, List<Row>> subjectToRows = new HashMap<String, List<Row>>();
|
||||
@ -152,24 +160,4 @@ public class RdfTripleImporter implements ReaderImporter{
|
||||
triples.iterator().close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
|
||||
if("application/rdf+xml".equals(contentType)) {
|
||||
return true;
|
||||
}
|
||||
} else if (fileName != null) {
|
||||
fileName = fileName.toLowerCase();
|
||||
if (
|
||||
fileName.endsWith(".rdf")) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
245
main/src/com/google/refine/importers/SeparatorBasedImporter.java
Normal file
245
main/src/com/google/refine/importers/SeparatorBasedImporter.java
Normal file
@ -0,0 +1,245 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.Reader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import au.com.bytecode.opencsv.CSVParser;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class SeparatorBasedImporter extends TabularImportingParserBase {
|
||||
public SeparatorBasedImporter() {
|
||||
super(false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject createParserUIInitializationData(ImportingJob job,
|
||||
List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||
|
||||
JSONUtilities.safePut(options, "lineSeparator", "\n");
|
||||
|
||||
String separator = guessSeparator(job, fileRecords);
|
||||
JSONUtilities.safePut(options, "separator", separator != null ? separator : "\t");
|
||||
|
||||
JSONUtilities.safePut(options, "guessCellValueTypes", true);
|
||||
JSONUtilities.safePut(options, "processQuotes", true);
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
Reader reader,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
// String lineSeparator = JSONUtilities.getString(options, "lineSeparator", "\n");
|
||||
String sep = JSONUtilities.getString(options, "separator", "\t");
|
||||
boolean processQuotes = JSONUtilities.getBoolean(options, "processQuotes", true);
|
||||
|
||||
final CSVParser parser = new CSVParser(
|
||||
sep.toCharArray()[0],//HACK changing string to char - won't work for multi-char separators.
|
||||
CSVParser.DEFAULT_QUOTE_CHARACTER,
|
||||
(char) 0, // escape character
|
||||
CSVParser.DEFAULT_STRICT_QUOTES,
|
||||
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
|
||||
!processQuotes);
|
||||
|
||||
final LineNumberReader lnReader = new LineNumberReader(reader);
|
||||
|
||||
TableDataReader dataReader = new TableDataReader() {
|
||||
long bytesRead = 0;
|
||||
|
||||
@Override
|
||||
public List<Object> getNextRowOfCells() throws IOException {
|
||||
String line = lnReader.readLine();
|
||||
if (line == null) {
|
||||
return null;
|
||||
} else {
|
||||
bytesRead += line.length();
|
||||
return getCells(line, parser, lnReader);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
readTable(project, metadata, job, dataReader, fileSource, limit, options, exceptions);
|
||||
}
|
||||
|
||||
static protected ArrayList<Object> getCells(String line, CSVParser parser, LineNumberReader lnReader)
|
||||
throws IOException{
|
||||
|
||||
ArrayList<Object> cells = new ArrayList<Object>();
|
||||
String[] tokens = parser.parseLineMulti(line);
|
||||
for (String s : tokens){
|
||||
cells.add(s);
|
||||
}
|
||||
while (parser.isPending()) {
|
||||
tokens = parser.parseLineMulti(lnReader.readLine());
|
||||
for (String s : tokens) {
|
||||
cells.add(s);
|
||||
}
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
|
||||
static public String guessSeparator(ImportingJob job, List<JSONObject> fileRecords) {
|
||||
for (int i = 0; i < 5 && i < fileRecords.size(); i++) {
|
||||
JSONObject fileRecord = fileRecords.get(i);
|
||||
String encoding = ImportingUtilities.getEncoding(fileRecord);
|
||||
String location = JSONUtilities.getString(fileRecord, "location", null);
|
||||
|
||||
if (location != null) {
|
||||
File file = new File(job.getRawDataDir(), location);
|
||||
Separator separator = guessSeparator(file, encoding);
|
||||
if (separator != null) {
|
||||
return Character.toString(separator.separator);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static public class Separator {
|
||||
char separator;
|
||||
int totalCount = 0;
|
||||
int totalOfSquaredCount = 0;
|
||||
int currentLineCount = 0;
|
||||
|
||||
double averagePerLine;
|
||||
double stddev;
|
||||
}
|
||||
|
||||
static public Separator guessSeparator(File file, String encoding) {
|
||||
try {
|
||||
InputStream is = new FileInputStream(file);
|
||||
try {
|
||||
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
|
||||
LineNumberReader lineNumberReader = new LineNumberReader(reader);
|
||||
|
||||
List<Separator> separators = new ArrayList<SeparatorBasedImporter.Separator>();
|
||||
Map<Character, Separator> separatorMap = new HashMap<Character, SeparatorBasedImporter.Separator>();
|
||||
|
||||
int totalBytes = 0;
|
||||
int lineCount = 0;
|
||||
String s;
|
||||
while (totalBytes < 64 * 1024 &&
|
||||
lineCount < 100 &&
|
||||
(s = lineNumberReader.readLine()) != null) {
|
||||
|
||||
totalBytes += s.length() + 1; // count the new line character
|
||||
if (s.length() == 0) {
|
||||
continue;
|
||||
}
|
||||
lineCount++;
|
||||
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
char c = s.charAt(i);
|
||||
if (!Character.isLetterOrDigit(c) &&
|
||||
!"\"' .-".contains(s.subSequence(i, i + 1))) {
|
||||
Separator separator = separatorMap.get(c);
|
||||
if (separator == null) {
|
||||
separator = new Separator();
|
||||
separator.separator = c;
|
||||
|
||||
separatorMap.put(c, separator);
|
||||
separators.add(separator);
|
||||
}
|
||||
separator.currentLineCount++;
|
||||
}
|
||||
}
|
||||
|
||||
for (Separator separator : separators) {
|
||||
separator.totalCount += separator.currentLineCount;
|
||||
separator.totalOfSquaredCount += separator.currentLineCount * separator.currentLineCount;
|
||||
separator.currentLineCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (separators.size() > 0) {
|
||||
for (Separator separator : separators) {
|
||||
separator.averagePerLine = separator.totalCount / (double) lineCount;
|
||||
separator.stddev = Math.sqrt(
|
||||
separator.totalOfSquaredCount / (double) lineCount -
|
||||
separator.averagePerLine * separator.averagePerLine);
|
||||
}
|
||||
|
||||
Collections.sort(separators, new Comparator<Separator>() {
|
||||
@Override
|
||||
public int compare(Separator sep0, Separator sep1) {
|
||||
return Double.compare(sep0.stddev, sep1.stddev);
|
||||
}
|
||||
});
|
||||
for (Separator separator : separators) {
|
||||
if (separator.stddev / separator.averagePerLine < 0.1) {
|
||||
return separator;
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
@ -0,0 +1,205 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.expr.ExpressionUtils;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.ModelException;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
abstract public class TabularImportingParserBase extends ImportingParserBase {
|
||||
static public interface TableDataReader {
|
||||
public List<Object> getNextRowOfCells() throws IOException;
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject createParserUIInitializationData(ImportingJob job,
|
||||
List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = new JSONObject();
|
||||
|
||||
JSONUtilities.safePut(options, "ignoreLines", -1); // number of blank lines at the beginning to ignore
|
||||
JSONUtilities.safePut(options, "headerLines", 1); // number of header lines
|
||||
|
||||
JSONUtilities.safePut(options, "skipDataLines", 0); // number of initial data lines to skip
|
||||
JSONUtilities.safePut(options, "storeBlankRows", true);
|
||||
JSONUtilities.safePut(options, "storeBlankCellsAsNulls", true);
|
||||
|
||||
JSONUtilities.safePut(options, "includeFileSources", fileRecords.size() > 1);
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
protected TabularImportingParserBase(boolean useInputStream) {
|
||||
super(useInputStream);
|
||||
}
|
||||
|
||||
protected void readTable(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
TableDataReader reader,
|
||||
String fileSource,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
int ignoreLines = JSONUtilities.getInt(options, "ignoreLines", -1);
|
||||
int headerLines = JSONUtilities.getInt(options, "headerLines", 1);
|
||||
int skipDataLines = JSONUtilities.getInt(options, "skipDataLines", 0);
|
||||
int limit2 = JSONUtilities.getInt(options, "limit", -1);
|
||||
if (limit > 0) {
|
||||
if (limit2 > 0) {
|
||||
limit2 = Math.min(limit, limit2);
|
||||
} else {
|
||||
limit2 = limit;
|
||||
}
|
||||
}
|
||||
|
||||
boolean guessCellValueTypes = JSONUtilities.getBoolean(options, "guessCellValueTypes", true);
|
||||
|
||||
boolean storeBlankRows = JSONUtilities.getBoolean(options, "storeBlankRows", true);
|
||||
boolean storeBlankCellsAsNulls = JSONUtilities.getBoolean(options, "storeBlankCellsAsNulls", true);
|
||||
boolean includeFileSources = JSONUtilities.getBoolean(options, "includeFileSources", false);
|
||||
|
||||
String fileNameColumnName = "File";
|
||||
if (includeFileSources) {
|
||||
if (project.columnModel.getColumnByName(fileNameColumnName) == null) {
|
||||
try {
|
||||
project.columnModel.addColumn(
|
||||
0, new Column(project.columnModel.allocateNewCellIndex(), fileNameColumnName), false);
|
||||
} catch (ModelException e) {
|
||||
// Ignore: We already checked for duplicate name.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<String> columnNames = new ArrayList<String>();
|
||||
|
||||
List<Object> cells = null;
|
||||
int rowsWithData = 0;
|
||||
|
||||
try {
|
||||
while (!job.canceled && (cells = reader.getNextRowOfCells()) != null) {
|
||||
if (ignoreLines > 0) {
|
||||
ignoreLines--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (headerLines > 0) { // header lines
|
||||
for (int c = 0; c < cells.size(); c++) {
|
||||
Object cell = cells.get(c);
|
||||
|
||||
String columnName;
|
||||
if (cell == null) {
|
||||
// add column even if cell is blank
|
||||
columnName = "";
|
||||
} else if (cell instanceof Cell) {
|
||||
columnName = ((Cell) cell).value.toString().trim();
|
||||
} else {
|
||||
columnName = cell.toString().trim();
|
||||
}
|
||||
|
||||
ImporterUtilities.appendColumnName(columnNames, c, columnName);
|
||||
}
|
||||
|
||||
headerLines--;
|
||||
if (headerLines == 0) {
|
||||
ImporterUtilities.setupColumns(project, columnNames);
|
||||
}
|
||||
} else { // data lines
|
||||
Row row = new Row(columnNames.size());
|
||||
|
||||
if (storeBlankRows) {
|
||||
rowsWithData++;
|
||||
} else if (cells.size() > 0) {
|
||||
rowsWithData++;
|
||||
}
|
||||
|
||||
if (skipDataLines <= 0 || rowsWithData > skipDataLines) {
|
||||
boolean rowHasData = false;
|
||||
for (int c = 0; c < cells.size(); c++) {
|
||||
Column column = ImporterUtilities.getOrAllocateColumn(project, columnNames, c);
|
||||
|
||||
Object value = cells.get(c);
|
||||
if (value != null && value instanceof Cell) {
|
||||
row.setCell(column.getCellIndex(), (Cell) value);
|
||||
rowHasData = true;
|
||||
} else if (ExpressionUtils.isNonBlankData(value)) {
|
||||
Serializable storedValue;
|
||||
if (value instanceof String) {
|
||||
storedValue = guessCellValueTypes ?
|
||||
ImporterUtilities.parseCellValue((String) value) : (String) value;
|
||||
} else {
|
||||
storedValue = ExpressionUtils.wrapStorable(value);
|
||||
}
|
||||
|
||||
row.setCell(column.getCellIndex(), new Cell(storedValue, null));
|
||||
rowHasData = true;
|
||||
} else if (!storeBlankCellsAsNulls) {
|
||||
row.setCell(column.getCellIndex(), new Cell("", null));
|
||||
}
|
||||
}
|
||||
|
||||
if (rowHasData || storeBlankRows) {
|
||||
if (includeFileSources) {
|
||||
row.setCell(
|
||||
project.columnModel.getColumnByName(fileNameColumnName).getCellIndex(),
|
||||
new Cell(fileSource, null));
|
||||
}
|
||||
project.rows.add(row);
|
||||
}
|
||||
|
||||
if (limit2 > 0 && project.rows.size() >= limit2) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
exceptions.add(e);
|
||||
}
|
||||
}
|
||||
}
|
63
main/src/com/google/refine/importers/TextFormatGuesser.java
Normal file
63
main/src/com/google/refine/importers/TextFormatGuesser.java
Normal file
@ -0,0 +1,63 @@
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.CharBuffer;
|
||||
|
||||
import com.google.refine.importing.FormatGuesser;
|
||||
|
||||
public class TextFormatGuesser implements FormatGuesser {
|
||||
|
||||
@Override
|
||||
public String guess(File file, String encoding, String seedFormat) {
|
||||
try {
|
||||
InputStream is = new FileInputStream(file);
|
||||
try {
|
||||
Reader reader = encoding != null ? new InputStreamReader(is, encoding) : new InputStreamReader(is);
|
||||
|
||||
int totalBytes = 0;
|
||||
int bytes;
|
||||
int lineBreaks = 0;
|
||||
|
||||
CharBuffer charBuffer = CharBuffer.allocate(4096);
|
||||
while (totalBytes < 64 * 1024 && (bytes = reader.read(charBuffer)) > 0) {
|
||||
lineBreaks += countSubstrings(charBuffer.toString(), "\n");
|
||||
|
||||
charBuffer.clear();
|
||||
totalBytes += bytes;
|
||||
}
|
||||
|
||||
if (lineBreaks > 3) {
|
||||
return "text/line-based";
|
||||
}
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static public int countSubstrings(String s, String sub) {
|
||||
int count = 0;
|
||||
int from = 0;
|
||||
while (from < s.length()) {
|
||||
int i = s.indexOf(sub, from);
|
||||
if (i < 0) {
|
||||
break;
|
||||
} else {
|
||||
from = i + sub.length();
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
}
|
@ -1,238 +0,0 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import au.com.bytecode.opencsv.CSVParser;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.expr.ExpressionUtils;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
|
||||
public class TsvCsvImporter implements ReaderImporter,StreamImporter {
|
||||
|
||||
@Override
|
||||
public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options) throws ImportException {
|
||||
boolean splitIntoColumns = ImporterUtilities.getBooleanOption("split-into-columns", options, true);
|
||||
|
||||
String sep = options.getProperty("separator"); // auto-detect if not present
|
||||
int ignoreLines = ImporterUtilities.getIntegerOption("ignore", options, -1);
|
||||
int headerLines = ImporterUtilities.getIntegerOption("header-lines", options, 1);
|
||||
|
||||
int limit = ImporterUtilities.getIntegerOption("limit",options,-1);
|
||||
int skip = ImporterUtilities.getIntegerOption("skip",options,0);
|
||||
boolean guessValueType = ImporterUtilities.getBooleanOption("guess-value-type", options, true);
|
||||
boolean ignoreQuotes = ImporterUtilities.getBooleanOption("ignore-quotes", options, false);
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(reader);
|
||||
|
||||
try {
|
||||
read(lnReader, project, sep,
|
||||
limit, skip, ignoreLines, headerLines,
|
||||
guessValueType, splitIntoColumns, ignoreQuotes
|
||||
);
|
||||
} catch (IOException e) {
|
||||
throw new ImportException("Import failed",e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param lnReader
|
||||
* LineNumberReader used to read file or string contents
|
||||
* @param project
|
||||
* The project into which the parsed data will be added
|
||||
* @param sep
|
||||
* The character used to denote different the break between data points
|
||||
* @param limit
|
||||
* The maximum number of rows of data to import
|
||||
* @param skip
|
||||
* The number of initial data rows to skip
|
||||
* @param ignoreLines
|
||||
* The number of initial lines within the data source which should be ignored entirely
|
||||
* @param headerLines
|
||||
* The number of lines in the data source which describe each column
|
||||
* @param guessValueType
|
||||
* Whether the parser should try and guess the type of the value being parsed
|
||||
* @param splitIntoColumns
|
||||
* Whether the parser should try and split the data source into columns
|
||||
* @param ignoreQuotes
|
||||
* Quotation marks are ignored, and all separators and newlines treated as such regardless of whether they are within quoted values
|
||||
* @throws IOException
|
||||
*/
|
||||
public void read(LineNumberReader lnReader, Project project, String sep, int limit, int skip, int ignoreLines, int headerLines, boolean guessValueType, boolean splitIntoColumns, boolean ignoreQuotes ) throws IOException{
|
||||
CSVParser parser = (sep != null && sep.length() > 0 && splitIntoColumns) ?
|
||||
new CSVParser(sep.toCharArray()[0],//HACK changing string to char - won't work for multi-char separators.
|
||||
CSVParser.DEFAULT_QUOTE_CHARACTER,
|
||||
(char) 0, // escape character
|
||||
CSVParser.DEFAULT_STRICT_QUOTES,
|
||||
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
|
||||
ignoreQuotes) : null;
|
||||
List<String> columnNames = new ArrayList<String>();
|
||||
String line = null;
|
||||
int rowsWithData = 0;
|
||||
|
||||
while ((line = lnReader.readLine()) != null) {
|
||||
if (ignoreLines > 0) {
|
||||
ignoreLines--;
|
||||
continue;
|
||||
} else if (StringUtils.isBlank(line)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
//guess separator
|
||||
if (parser == null) {
|
||||
int tab = line.indexOf('\t');
|
||||
if (tab >= 0) {
|
||||
parser = new CSVParser('\t',
|
||||
CSVParser.DEFAULT_QUOTE_CHARACTER,
|
||||
(char) 0, // escape character
|
||||
CSVParser.DEFAULT_STRICT_QUOTES,
|
||||
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
|
||||
ignoreQuotes);
|
||||
} else {
|
||||
parser = new CSVParser(',',
|
||||
CSVParser.DEFAULT_QUOTE_CHARACTER,
|
||||
(char) 0, // escape character
|
||||
CSVParser.DEFAULT_STRICT_QUOTES,
|
||||
CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
|
||||
ignoreQuotes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (headerLines > 0) {
|
||||
//column headers
|
||||
headerLines--;
|
||||
|
||||
ArrayList<String> cells = getCells(line, parser, lnReader, splitIntoColumns);
|
||||
|
||||
for (int c = 0; c < cells.size(); c++) {
|
||||
String cell = cells.get(c).trim();
|
||||
//add column even if cell is blank
|
||||
ImporterUtilities.appendColumnName(columnNames, c, cell);
|
||||
}
|
||||
} else {
|
||||
//data
|
||||
Row row = new Row(columnNames.size());
|
||||
|
||||
ArrayList<String> cells = getCells(line, parser, lnReader, splitIntoColumns);
|
||||
|
||||
if( cells != null && cells.size() > 0 )
|
||||
rowsWithData++;
|
||||
|
||||
if (skip <=0 || rowsWithData > skip){
|
||||
//add parsed data to row
|
||||
for(String s : cells){
|
||||
if (ExpressionUtils.isNonBlankData(s)) {
|
||||
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(s) : s;
|
||||
row.cells.add(new Cell(value, null));
|
||||
}else{
|
||||
row.cells.add(null);
|
||||
}
|
||||
}
|
||||
project.rows.add(row);
|
||||
project.columnModel.setMaxCellIndex(row.cells.size());
|
||||
|
||||
ImporterUtilities.ensureColumnsInRowExist(columnNames, row);
|
||||
|
||||
if (limit > 0 && project.rows.size() >= limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ImporterUtilities.setupColumns(project, columnNames);
|
||||
}
|
||||
|
||||
protected ArrayList<String> getCells(String line, CSVParser parser, LineNumberReader lnReader, boolean splitIntoColumns) throws IOException{
|
||||
ArrayList<String> cells = new ArrayList<String>();
|
||||
if(splitIntoColumns){
|
||||
String[] tokens = parser.parseLineMulti(line);
|
||||
for(String s : tokens){
|
||||
cells.add(s);
|
||||
}
|
||||
while(parser.isPending()){
|
||||
tokens = parser.parseLineMulti(lnReader.readLine());
|
||||
for(String s : tokens){
|
||||
cells.add(s);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
cells.add(line);
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void read(InputStream inputStream, Project project,
|
||||
ProjectMetadata metadata, Properties options) throws ImportException {
|
||||
read(new InputStreamReader(inputStream), project, metadata, options);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
return
|
||||
"text/plain".equals(contentType) ||
|
||||
"text/csv".equals(contentType) ||
|
||||
"text/x-csv".equals(contentType) ||
|
||||
"text/tab-separated-value".equals(contentType);
|
||||
|
||||
} else if (fileName != null) {
|
||||
fileName = fileName.toLowerCase();
|
||||
if (fileName.endsWith(".tsv")) {
|
||||
return true;
|
||||
}else if (fileName.endsWith(".csv")){
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
@ -33,99 +33,274 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.importers;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.util.Properties;
|
||||
import java.util.List;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
|
||||
import com.google.refine.importers.parsers.TreeParser;
|
||||
import com.google.refine.importers.parsers.XmlParser;
|
||||
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||
import com.google.refine.importers.tree.TreeReader;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class XmlImporter implements StreamImporter {
|
||||
public class XmlImporter extends TreeImportingParserBase {
|
||||
public XmlImporter() {
|
||||
super(true);
|
||||
}
|
||||
|
||||
final static Logger logger = LoggerFactory.getLogger("XmlImporter");
|
||||
static private class PreviewParsingState {
|
||||
int tokenCount;
|
||||
}
|
||||
|
||||
public static final int BUFFER_SIZE = 64 * 1024;
|
||||
final static private int PREVIEW_PARSING_LIMIT = 1000;
|
||||
|
||||
@Override
|
||||
public void read(
|
||||
InputStream inputStream,
|
||||
Project project,
|
||||
ProjectMetadata metadata, Properties options
|
||||
) throws ImportException {
|
||||
logger.trace("XmlImporter.read");
|
||||
PushbackInputStream pis = new PushbackInputStream(inputStream,BUFFER_SIZE);
|
||||
|
||||
String[] recordPath = null;
|
||||
{
|
||||
byte[] buffer = new byte[BUFFER_SIZE];
|
||||
int bytes_read = 0;
|
||||
try {//fill the buffer with data
|
||||
while (bytes_read < BUFFER_SIZE) {
|
||||
int c = pis.read(buffer, bytes_read, BUFFER_SIZE - bytes_read);
|
||||
if (c == -1) break;
|
||||
bytes_read +=c ;
|
||||
}
|
||||
pis.unread(buffer, 0, bytes_read);
|
||||
} catch (IOException e) {
|
||||
throw new ImportException("Read error",e);
|
||||
}
|
||||
|
||||
InputStream iStream = new ByteArrayInputStream(buffer, 0, bytes_read);
|
||||
TreeParser parser = new XmlParser(iStream);
|
||||
if (options.containsKey("importer-record-tag")) {
|
||||
public JSONObject createParserUIInitializationData(
|
||||
ImportingJob job, List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = super.createParserUIInitializationData(job, fileRecords, format);
|
||||
try {
|
||||
recordPath = XmlImportUtilities.detectPathFromTag(
|
||||
parser,
|
||||
options.getProperty("importer-record-tag"));
|
||||
}catch(Exception e){
|
||||
// silent
|
||||
// e.printStackTrace();
|
||||
JSONObject firstFileRecord = fileRecords.get(0);
|
||||
File file = ImportingUtilities.getFile(job, firstFileRecord);
|
||||
InputStream is = new FileInputStream(file);
|
||||
try {
|
||||
XMLStreamReader parser = createXMLStreamReader(is);
|
||||
PreviewParsingState state = new PreviewParsingState();
|
||||
|
||||
while (parser.hasNext() && state.tokenCount < PREVIEW_PARSING_LIMIT) {
|
||||
int tokenType = parser.next();
|
||||
state.tokenCount++;
|
||||
if (tokenType == XMLStreamConstants.START_ELEMENT) {
|
||||
JSONObject rootElement = descendElement(parser, state);
|
||||
if (rootElement != null) {
|
||||
JSONUtilities.safePut(options, "dom", rootElement);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
recordPath = XmlImportUtilities.detectRecordElement(parser);
|
||||
// ignore everything else
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
} catch (XMLStreamException e) {
|
||||
// Ignore
|
||||
} catch (IOException e) {
|
||||
// Ignore
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
final static private JSONObject descendElement(XMLStreamReader parser, PreviewParsingState state) throws XMLStreamException {
|
||||
JSONObject result = new JSONObject();
|
||||
{
|
||||
String name = parser.getLocalName();
|
||||
JSONUtilities.safePut(result, "n", name);
|
||||
|
||||
String prefix = parser.getPrefix();
|
||||
if (prefix != null) {
|
||||
JSONUtilities.safePut(result, "p", prefix);
|
||||
}
|
||||
String nsUri = parser.getNamespaceURI();
|
||||
if (nsUri != null) {
|
||||
JSONUtilities.safePut(result, "uri", nsUri);
|
||||
}
|
||||
}
|
||||
|
||||
if (recordPath == null)
|
||||
return;
|
||||
int namespaceCount = parser.getNamespaceCount();
|
||||
if (namespaceCount > 0) {
|
||||
JSONArray namespaces = new JSONArray();
|
||||
JSONUtilities.safePut(result, "ns", namespaces);
|
||||
|
||||
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
||||
XmlImportUtilities.importTreeData(new XmlParser(pis), project, recordPath, rootColumnGroup);
|
||||
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
||||
for (int i = 0; i < namespaceCount; i++) {
|
||||
JSONObject namespace = new JSONObject();
|
||||
JSONUtilities.append(namespaces, namespace);
|
||||
JSONUtilities.safePut(namespace, "p", parser.getNamespacePrefix(i));
|
||||
JSONUtilities.safePut(namespace, "uri", parser.getNamespaceURI(i));
|
||||
}
|
||||
}
|
||||
|
||||
project.columnModel.update();
|
||||
int attributeCount = parser.getAttributeCount();
|
||||
if (attributeCount > 0) {
|
||||
JSONArray attributes = new JSONArray();
|
||||
JSONUtilities.safePut(result, "a", attributes);
|
||||
|
||||
for (int i = 0; i < attributeCount; i++) {
|
||||
JSONObject attribute = new JSONObject();
|
||||
JSONUtilities.append(attributes, attribute);
|
||||
JSONUtilities.safePut(attribute, "n", parser.getAttributeLocalName(i));
|
||||
JSONUtilities.safePut(attribute, "v", parser.getAttributeValue(i));
|
||||
String prefix = parser.getAttributePrefix(i);
|
||||
if (prefix != null) {
|
||||
JSONUtilities.safePut(attribute, "p", prefix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
JSONArray children = new JSONArray();
|
||||
while (parser.hasNext() && state.tokenCount < PREVIEW_PARSING_LIMIT) {
|
||||
int tokenType = parser.next();
|
||||
state.tokenCount++;
|
||||
if (tokenType == XMLStreamConstants.END_ELEMENT) {
|
||||
break;
|
||||
} else if (tokenType == XMLStreamConstants.START_ELEMENT) {
|
||||
JSONObject childElement = descendElement(parser, state);
|
||||
if (childElement != null) {
|
||||
JSONUtilities.append(children, childElement);
|
||||
}
|
||||
} else if (tokenType == XMLStreamConstants.CHARACTERS ||
|
||||
tokenType == XMLStreamConstants.CDATA ||
|
||||
tokenType == XMLStreamConstants.SPACE) {
|
||||
JSONObject childElement = new JSONObject();
|
||||
JSONUtilities.safePut(childElement, "t", parser.getText());
|
||||
JSONUtilities.append(children, childElement);
|
||||
} else {
|
||||
// ignore everything else
|
||||
}
|
||||
}
|
||||
|
||||
if (children.length() > 0) {
|
||||
JSONUtilities.safePut(result, "c", children);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canImportData(String contentType, String fileName) {
|
||||
if (contentType != null) {
|
||||
contentType = contentType.toLowerCase().trim();
|
||||
public void parseOneFile(Project project, ProjectMetadata metadata,
|
||||
ImportingJob job, String fileSource, InputStream inputStream,
|
||||
ImportColumnGroup rootColumnGroup, int limit, JSONObject options,
|
||||
List<Exception> exceptions) {
|
||||
|
||||
if("application/xml".equals(contentType) ||
|
||||
"text/xml".equals(contentType) ||
|
||||
"application/rss+xml".equals(contentType) ||
|
||||
"application/atom+xml".equals(contentType)) {
|
||||
return true;
|
||||
}
|
||||
} else if (fileName != null) {
|
||||
fileName = fileName.toLowerCase();
|
||||
if (
|
||||
fileName.endsWith(".xml") ||
|
||||
fileName.endsWith(".atom") ||
|
||||
fileName.endsWith(".rss")
|
||||
) {
|
||||
return true;
|
||||
try {
|
||||
parseOneFile(project, metadata, job, fileSource,
|
||||
new XmlParser(inputStream), rootColumnGroup, limit, options, exceptions);
|
||||
} catch (XMLStreamException e) {
|
||||
exceptions.add(e);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
||||
static public class XmlParser implements TreeReader {
|
||||
final protected XMLStreamReader parser;
|
||||
|
||||
public XmlParser(InputStream inputStream) throws XMLStreamException {
|
||||
parser = createXMLStreamReader(inputStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token next() throws ServletException {
|
||||
try {
|
||||
if (!parser.hasNext()) {
|
||||
throw new ServletException("End of XML stream");
|
||||
}
|
||||
} catch (XMLStreamException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
|
||||
int currentToken = -1;
|
||||
try {
|
||||
currentToken = parser.next();
|
||||
} catch (XMLStreamException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
|
||||
return mapToToken(currentToken);
|
||||
}
|
||||
|
||||
protected Token mapToToken(int token) throws ServletException {
|
||||
switch(token){
|
||||
case XMLStreamConstants.START_ELEMENT: return Token.StartEntity;
|
||||
case XMLStreamConstants.END_ELEMENT: return Token.EndEntity;
|
||||
case XMLStreamConstants.CHARACTERS: return Token.Value;
|
||||
case XMLStreamConstants.START_DOCUMENT: return Token.Ignorable;
|
||||
case XMLStreamConstants.END_DOCUMENT: return Token.Ignorable;
|
||||
case XMLStreamConstants.SPACE: return Token.Value;
|
||||
case XMLStreamConstants.PROCESSING_INSTRUCTION: return Token.Ignorable;
|
||||
case XMLStreamConstants.NOTATION_DECLARATION: return Token.Ignorable;
|
||||
case XMLStreamConstants.NAMESPACE: return Token.Ignorable;
|
||||
case XMLStreamConstants.ENTITY_REFERENCE: return Token.Ignorable;
|
||||
case XMLStreamConstants.DTD: return Token.Ignorable;
|
||||
case XMLStreamConstants.COMMENT: return Token.Ignorable;
|
||||
case XMLStreamConstants.CDATA: return Token.Ignorable;
|
||||
case XMLStreamConstants.ATTRIBUTE: return Token.Ignorable;
|
||||
default:
|
||||
return Token.Ignorable;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Token current() throws ServletException{
|
||||
return this.mapToToken(parser.getEventType());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() throws ServletException{
|
||||
try {
|
||||
return parser.hasNext();
|
||||
} catch (XMLStreamException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getFieldName() throws ServletException{
|
||||
try{
|
||||
return parser.getLocalName();
|
||||
}catch(IllegalStateException e){
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPrefix(){
|
||||
return parser.getPrefix();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getFieldValue(){
|
||||
return parser.getText();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getAttributeCount(){
|
||||
return parser.getAttributeCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAttributeValue(int index){
|
||||
return parser.getAttributeValue(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAttributePrefix(int index){
|
||||
return parser.getAttributePrefix(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAttributeLocalName(int index){
|
||||
return parser.getAttributeLocalName(index);
|
||||
}
|
||||
}
|
||||
|
||||
final static private XMLStreamReader createXMLStreamReader(InputStream inputStream) throws XMLStreamException {
|
||||
XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
factory.setProperty(XMLInputFactory.IS_COALESCING, true);
|
||||
factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true);
|
||||
|
||||
return factory.createXMLStreamReader(inputStream);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,210 +0,0 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers.parsers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import org.codehaus.jackson.JsonFactory;
|
||||
import org.codehaus.jackson.JsonParseException;
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class JSONParser implements TreeParser{
|
||||
final static Logger logger = LoggerFactory.getLogger("JsonParser");
|
||||
|
||||
JsonFactory factory = new JsonFactory();
|
||||
JsonParser parser = null;
|
||||
|
||||
//The following is a workaround for inconsistent Jackson JsonParser
|
||||
Boolean lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||
Boolean thisTokenIsAFieldName = false;
|
||||
String lastFieldName = null;
|
||||
//end of workaround
|
||||
|
||||
public JSONParser(InputStream inputStream){
|
||||
try {
|
||||
parser = factory.createJsonParser(inputStream);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public int getAttributeCount() {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public String getAttributeLocalName(int index) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public String getAttributePrefix(int index) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. All Json is treated as elements
|
||||
*/
|
||||
@Override
|
||||
public String getAttributeValue(int index) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TreeParserToken getEventType() throws ServletException {
|
||||
return this.mapToTreeParserToken(parser.getCurrentToken());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLocalName() throws ServletException{
|
||||
try {
|
||||
String text = parser.getCurrentName();
|
||||
|
||||
//The following is a workaround for inconsistent Jackson JsonParser
|
||||
if(text == null){
|
||||
if(this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity)
|
||||
text = this.lastFieldName;
|
||||
else
|
||||
text = "__anonymous__";
|
||||
}
|
||||
//end of workaround
|
||||
|
||||
return text;
|
||||
} catch (Exception e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing. Json does not have prefixes
|
||||
*/
|
||||
@Override
|
||||
public String getPrefix() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText() throws ServletException {
|
||||
try {
|
||||
return parser.getText();
|
||||
} catch (Exception e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() throws ServletException {
|
||||
return true; //FIXME fairly obtuse, is there a better way (advancing, then rewinding?)
|
||||
}
|
||||
|
||||
@Override
|
||||
public TreeParserToken next() throws ServletException {
|
||||
JsonToken next;
|
||||
try {
|
||||
next = parser.nextToken();
|
||||
} catch (JsonParseException e) {
|
||||
throw new ServletException(e);
|
||||
} catch (IOException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
|
||||
if(next == null)
|
||||
throw new ServletException("No more Json Tokens in stream");
|
||||
|
||||
//The following is a workaround for inconsistent Jackson JsonParser
|
||||
if(next == JsonToken.FIELD_NAME){
|
||||
try {
|
||||
this.thisTokenIsAFieldName = true;
|
||||
this.lastFieldName = parser.getCurrentName();
|
||||
} catch (Exception e) {
|
||||
//silent
|
||||
}
|
||||
}else if(next == JsonToken.START_ARRAY || next == JsonToken.START_OBJECT){
|
||||
if(this.thisTokenIsAFieldName){
|
||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = true;
|
||||
this.thisTokenIsAFieldName = false;
|
||||
}else{
|
||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||
this.lastFieldName = null;
|
||||
}
|
||||
}else{
|
||||
this.lastTokenWasAFieldNameAndCurrentTokenIsANewEntity = false;
|
||||
this.lastFieldName = null;
|
||||
this.thisTokenIsAFieldName = false;
|
||||
}
|
||||
//end of workaround
|
||||
|
||||
return mapToTreeParserToken(next);
|
||||
}
|
||||
|
||||
protected TreeParserToken mapToTreeParserToken(JsonToken token){
|
||||
switch(token){
|
||||
case START_ARRAY: return TreeParserToken.StartEntity;
|
||||
case END_ARRAY: return TreeParserToken.EndEntity;
|
||||
case START_OBJECT: return TreeParserToken.StartEntity;
|
||||
case END_OBJECT: return TreeParserToken.EndEntity;
|
||||
case VALUE_STRING: return TreeParserToken.Value;
|
||||
case FIELD_NAME: return TreeParserToken.Ignorable; //returned by the getLocalName function()
|
||||
case VALUE_NUMBER_INT: return TreeParserToken.Value;
|
||||
//Json does not have START_DOCUMENT token type (so ignored as default)
|
||||
//Json does not have END_DOCUMENT token type (so ignored as default)
|
||||
case VALUE_TRUE : return TreeParserToken.Value;
|
||||
case VALUE_NUMBER_FLOAT : return TreeParserToken.Value;
|
||||
case VALUE_NULL : return TreeParserToken.Value;
|
||||
case VALUE_FALSE : return TreeParserToken.Value;
|
||||
case VALUE_EMBEDDED_OBJECT : return TreeParserToken.Ignorable;
|
||||
case NOT_AVAILABLE : return TreeParserToken.Ignorable;
|
||||
default: return TreeParserToken.Ignorable;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,70 +0,0 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers.parsers;
|
||||
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.google.refine.importers.ImporterUtilities;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Row;
|
||||
|
||||
public class NonSplitRowParser extends RowParser {
|
||||
|
||||
public List<String> split(String line, LineNumberReader lineReader) {
|
||||
List<String> results = new ArrayList<String>(1);
|
||||
|
||||
results.add(line.trim());
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) {
|
||||
if (line.trim().isEmpty()) {
|
||||
return false;
|
||||
} else {
|
||||
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(line) : line;
|
||||
if (value != null) {
|
||||
row.cells.add(new Cell(value, null));
|
||||
return true;
|
||||
} else {
|
||||
row.cells.add(null);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,85 +0,0 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers.parsers;
|
||||
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import com.google.refine.expr.ExpressionUtils;
|
||||
import com.google.refine.importers.ImporterUtilities;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Row;
|
||||
|
||||
public class SeparatorRowParser extends RowParser {
|
||||
|
||||
String sep;
|
||||
|
||||
public SeparatorRowParser(String sep) {
|
||||
this.sep = sep;
|
||||
}
|
||||
|
||||
public List<String> split(String line, LineNumberReader lineReader) {
|
||||
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
|
||||
|
||||
List<String> results = new ArrayList<String>();
|
||||
for (int c = 0; c < cells.length; c++) {
|
||||
results.add(cells[c]);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
public boolean parseRow(Row row, String line, boolean guessValueType, LineNumberReader lineReader) {
|
||||
boolean hasData = false;
|
||||
|
||||
String[] cells = StringUtils.splitPreserveAllTokens(line, sep);
|
||||
for (int c = 0; c < cells.length; c++) {
|
||||
String text = cells[c];
|
||||
|
||||
Serializable value = guessValueType ? ImporterUtilities.parseCellValue(text) : text;
|
||||
if (ExpressionUtils.isNonBlankData(value)) {
|
||||
row.cells.add(new Cell(value, null));
|
||||
hasData = true;
|
||||
} else {
|
||||
row.cells.add(null);
|
||||
}
|
||||
}
|
||||
return hasData;
|
||||
}
|
||||
|
||||
}
|
@ -1,160 +0,0 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers.parsers;
|
||||
|
||||
import java.io.InputStream;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.xml.stream.FactoryConfigurationError;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class XmlParser implements TreeParser{
|
||||
final static Logger logger = LoggerFactory.getLogger("XmlParser");
|
||||
|
||||
XMLStreamReader parser = null;
|
||||
|
||||
public XmlParser(InputStream inputStream){
|
||||
try {
|
||||
XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
factory.setProperty(XMLInputFactory.IS_COALESCING, true);
|
||||
factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true);
|
||||
parser = factory.createXMLStreamReader(inputStream);
|
||||
} catch (XMLStreamException e) {
|
||||
// silent
|
||||
// e.printStackTrace();
|
||||
} catch (FactoryConfigurationError e) {
|
||||
// silent
|
||||
// e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TreeParserToken next() throws ServletException{
|
||||
try {
|
||||
if(!parser.hasNext())
|
||||
throw new ServletException("End of XML stream");
|
||||
} catch (XMLStreamException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
|
||||
int currentToken = -1;
|
||||
try {
|
||||
currentToken = parser.next();
|
||||
} catch (XMLStreamException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
|
||||
return mapToTreeParserToken(currentToken);
|
||||
}
|
||||
|
||||
protected TreeParserToken mapToTreeParserToken(int token) throws ServletException {
|
||||
switch(token){
|
||||
case XMLStreamConstants.START_ELEMENT: return TreeParserToken.StartEntity;
|
||||
case XMLStreamConstants.END_ELEMENT: return TreeParserToken.EndEntity;
|
||||
case XMLStreamConstants.CHARACTERS: return TreeParserToken.Value;
|
||||
case XMLStreamConstants.START_DOCUMENT: return TreeParserToken.Ignorable;
|
||||
case XMLStreamConstants.END_DOCUMENT: return TreeParserToken.Ignorable;
|
||||
case XMLStreamConstants.SPACE: return TreeParserToken.Value;
|
||||
case XMLStreamConstants.PROCESSING_INSTRUCTION: return TreeParserToken.Ignorable;
|
||||
case XMLStreamConstants.NOTATION_DECLARATION: return TreeParserToken.Ignorable;
|
||||
case XMLStreamConstants.NAMESPACE: return TreeParserToken.Ignorable;
|
||||
case XMLStreamConstants.ENTITY_REFERENCE: return TreeParserToken.Ignorable;
|
||||
case XMLStreamConstants.DTD: return TreeParserToken.Ignorable;
|
||||
case XMLStreamConstants.COMMENT: return TreeParserToken.Ignorable;
|
||||
case XMLStreamConstants.CDATA: return TreeParserToken.Ignorable;
|
||||
case XMLStreamConstants.ATTRIBUTE: return TreeParserToken.Ignorable;
|
||||
default:
|
||||
return TreeParserToken.Ignorable;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TreeParserToken getEventType() throws ServletException{
|
||||
return this.mapToTreeParserToken(parser.getEventType());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() throws ServletException{
|
||||
try {
|
||||
return parser.hasNext();
|
||||
} catch (XMLStreamException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLocalName() throws ServletException{
|
||||
try{
|
||||
return parser.getLocalName();
|
||||
}catch(IllegalStateException e){
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPrefix(){
|
||||
return parser.getPrefix();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getText(){
|
||||
return parser.getText();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getAttributeCount(){
|
||||
return parser.getAttributeCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAttributeValue(int index){
|
||||
return parser.getAttributeValue(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAttributePrefix(int index){
|
||||
return parser.getAttributePrefix(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAttributeLocalName(int index){
|
||||
return parser.getAttributeLocalName(index);
|
||||
}
|
||||
}
|
23
main/src/com/google/refine/importers/tree/ImportColumn.java
Normal file
23
main/src/com/google/refine/importers/tree/ImportColumn.java
Normal file
@ -0,0 +1,23 @@
|
||||
package com.google.refine.importers.tree;
|
||||
|
||||
|
||||
/**
|
||||
* A column is used to describe a branch-terminating element in a tree structure
|
||||
*
|
||||
*/
|
||||
public class ImportColumn extends ImportVertical {
|
||||
public int cellIndex;
|
||||
public int nextRowIndex;
|
||||
public boolean blankOnFirstRow;
|
||||
|
||||
public ImportColumn() {}
|
||||
|
||||
public ImportColumn(String name) { //required for testing
|
||||
super.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
void tabulate() {
|
||||
// already done the tabulation elsewhere
|
||||
}
|
||||
}
|
@ -0,0 +1,33 @@
|
||||
package com.google.refine.importers.tree;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
/**
|
||||
* A column group describes a branch in tree structured data
|
||||
*/
|
||||
public class ImportColumnGroup extends ImportVertical {
|
||||
public Map<String, ImportColumnGroup> subgroups = new HashMap<String, ImportColumnGroup>();
|
||||
public Map<String, ImportColumn> columns = new HashMap<String, ImportColumn>();
|
||||
public int nextRowIndex;
|
||||
|
||||
@Override
|
||||
void tabulate() {
|
||||
for (ImportColumn c : columns.values()) {
|
||||
c.tabulate();
|
||||
nonBlankCount = Math.max(nonBlankCount, c.nonBlankCount);
|
||||
}
|
||||
for (ImportColumnGroup g : subgroups.values()) {
|
||||
g.tabulate();
|
||||
nonBlankCount = Math.max(nonBlankCount, g.nonBlankCount);
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("name=%s, columns={%s}, subgroups={{%s}}",
|
||||
name,StringUtils.join(columns.keySet(), ','),
|
||||
StringUtils.join(subgroups.keySet(),','));
|
||||
}
|
||||
}
|
14
main/src/com/google/refine/importers/tree/ImportRecord.java
Normal file
14
main/src/com/google/refine/importers/tree/ImportRecord.java
Normal file
@ -0,0 +1,14 @@
|
||||
package com.google.refine.importers.tree;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import com.google.refine.model.Cell;
|
||||
|
||||
/**
|
||||
* A record describes a data element in a tree-structure
|
||||
*
|
||||
*/
|
||||
public class ImportRecord {
|
||||
public List<List<Cell>> rows = new LinkedList<List<Cell>>();
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
package com.google.refine.importers.tree;
|
||||
|
||||
abstract class ImportVertical {
|
||||
public String name = "";
|
||||
public int nonBlankCount;
|
||||
|
||||
abstract void tabulate();
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package com.google.refine.importers.tree;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* An element which holds sub-elements we
|
||||
* shall import as records
|
||||
*/
|
||||
class RecordElementCandidate {
|
||||
String[] path;
|
||||
int count;
|
||||
|
||||
public String toString() {
|
||||
return Arrays.toString(path);
|
||||
}
|
||||
}
|
@ -31,22 +31,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers;
|
||||
package com.google.refine.importers.tree;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.importers.ImporterUtilities;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.Project;
|
||||
@ -54,83 +50,6 @@ import com.google.refine.model.Project;
|
||||
public abstract class TreeImportUtilities {
|
||||
final static Logger logger = LoggerFactory.getLogger("TreeImportUtilities");
|
||||
|
||||
/**
|
||||
* An element which holds sub-elements we
|
||||
* shall import as records
|
||||
*/
|
||||
static protected class RecordElementCandidate {
|
||||
String[] path;
|
||||
int count;
|
||||
|
||||
public String toString() {
|
||||
return Arrays.toString(path);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static protected abstract class ImportVertical {
|
||||
public String name = "";
|
||||
public int nonBlankCount;
|
||||
|
||||
abstract void tabulate();
|
||||
}
|
||||
|
||||
/**
|
||||
* A column group describes a branch in tree structured data
|
||||
*/
|
||||
static public class ImportColumnGroup extends ImportVertical {
|
||||
public Map<String, ImportColumnGroup> subgroups = new HashMap<String, ImportColumnGroup>();
|
||||
public Map<String, ImportColumn> columns = new HashMap<String, ImportColumn>();
|
||||
public int nextRowIndex;
|
||||
|
||||
@Override
|
||||
void tabulate() {
|
||||
for (ImportColumn c : columns.values()) {
|
||||
c.tabulate();
|
||||
nonBlankCount = Math.max(nonBlankCount, c.nonBlankCount);
|
||||
}
|
||||
for (ImportColumnGroup g : subgroups.values()) {
|
||||
g.tabulate();
|
||||
nonBlankCount = Math.max(nonBlankCount, g.nonBlankCount);
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("name=%s, columns={%s}, subgroups={{%s}}",
|
||||
name,StringUtils.join(columns.keySet(), ','),
|
||||
StringUtils.join(subgroups.keySet(),','));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A column is used to describe a branch-terminating element in a tree structure
|
||||
*
|
||||
*/
|
||||
static public class ImportColumn extends ImportVertical {
|
||||
public int cellIndex;
|
||||
public int nextRowIndex;
|
||||
public boolean blankOnFirstRow;
|
||||
|
||||
public ImportColumn() {}
|
||||
|
||||
public ImportColumn(String name) { //required for testing
|
||||
super.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
void tabulate() {
|
||||
// already done the tabulation elsewhere
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A record describes a data element in a tree-structure
|
||||
*
|
||||
*/
|
||||
static public class ImportRecord {
|
||||
public List<List<Cell>> rows = new LinkedList<List<Cell>>();
|
||||
}
|
||||
|
||||
static protected void sortRecordElementCandidates(List<RecordElementCandidate> list) {
|
||||
Collections.sort(list, new Comparator<RecordElementCandidate>() {
|
||||
public int compare(RecordElementCandidate o1, RecordElementCandidate o2) {
|
@ -0,0 +1,169 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers.tree;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang.NotImplementedException;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.ImporterUtilities;
|
||||
import com.google.refine.importers.ImporterUtilities.MultiFileReadingProgress;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingParser;
|
||||
import com.google.refine.importing.ImportingUtilities;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
abstract public class TreeImportingParserBase implements ImportingParser {
|
||||
final protected boolean useInputStream;
|
||||
|
||||
protected TreeImportingParserBase(boolean useInputStream) {
|
||||
this.useInputStream = useInputStream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public JSONObject createParserUIInitializationData(ImportingJob job,
|
||||
List<JSONObject> fileRecords, String format) {
|
||||
JSONObject options = new JSONObject();
|
||||
return options;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parse(Project project, ProjectMetadata metadata,
|
||||
ImportingJob job, List<JSONObject> fileRecords, String format,
|
||||
int limit, JSONObject options, List<Exception> exceptions) {
|
||||
|
||||
MultiFileReadingProgress progress = ImporterUtilities.createMultiFileReadingProgress(job, fileRecords);
|
||||
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
||||
|
||||
for (JSONObject fileRecord : fileRecords) {
|
||||
try {
|
||||
parseOneFile(project, metadata, job, fileRecord, rootColumnGroup, limit, options, exceptions, progress);
|
||||
} catch (IOException e) {
|
||||
exceptions.add(e);
|
||||
}
|
||||
|
||||
if (limit > 0 && project.rows.size() >= limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
||||
project.columnModel.update();
|
||||
}
|
||||
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
JSONObject fileRecord,
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions,
|
||||
final MultiFileReadingProgress progress
|
||||
) throws IOException {
|
||||
final File file = ImportingUtilities.getFile(job, fileRecord);
|
||||
final String fileSource = ImportingUtilities.getFileSource(fileRecord);
|
||||
|
||||
progress.startFile(fileSource);
|
||||
try {
|
||||
InputStream inputStream = ImporterUtilities.openAndTrackFile(fileSource, file, progress);
|
||||
try {
|
||||
if (useInputStream) {
|
||||
parseOneFile(project, metadata, job, fileSource, inputStream,
|
||||
rootColumnGroup, limit, options, exceptions);
|
||||
} else {
|
||||
Reader reader = ImportingUtilities.getFileReader(file, fileRecord);
|
||||
parseOneFile(project, metadata, job, fileSource, reader,
|
||||
rootColumnGroup, limit, options, exceptions);
|
||||
}
|
||||
} finally {
|
||||
inputStream.close();
|
||||
}
|
||||
} finally {
|
||||
progress.endFile(fileSource, file.length());
|
||||
}
|
||||
}
|
||||
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
Reader reader,
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
InputStream inputStream,
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
protected void parseOneFile(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
String fileSource,
|
||||
TreeReader treeParser,
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
) {
|
||||
String[] recordPath = JSONUtilities.getStringArray(options, "recordPath");
|
||||
|
||||
XmlImportUtilities.importTreeData(treeParser, project, recordPath, rootColumnGroup, limit);
|
||||
}
|
||||
}
|
@ -31,17 +31,26 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers.parsers;
|
||||
package com.google.refine.importers.tree;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
public interface TreeReader {
|
||||
public enum Token {
|
||||
Ignorable,
|
||||
StartEntity,
|
||||
EndEntity,
|
||||
Value
|
||||
//append additional tokens only if necessary (most should be just mapped to Value or Ignorable)
|
||||
}
|
||||
|
||||
public interface TreeParser {
|
||||
public TreeParserToken next() throws ServletException;
|
||||
public TreeParserToken getEventType() throws ServletException; //aka getCurrentToken
|
||||
public boolean hasNext() throws ServletException;
|
||||
public String getLocalName() throws ServletException; //aka getFieldName
|
||||
public Token current() throws Exception; //aka getCurrentToken
|
||||
|
||||
public boolean hasNext() throws Exception;
|
||||
public Token next() throws Exception;
|
||||
|
||||
public String getFieldName() throws Exception; //aka getFieldName
|
||||
public String getPrefix();
|
||||
public String getText() throws ServletException;
|
||||
public String getFieldValue() throws Exception;
|
||||
|
||||
public int getAttributeCount();
|
||||
public String getAttributeValue(int index);
|
||||
public String getAttributePrefix(int index);
|
@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers;
|
||||
package com.google.refine.importers.tree;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
@ -40,13 +40,10 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.importers.parsers.TreeParser;
|
||||
import com.google.refine.importers.parsers.TreeParserToken;
|
||||
import com.google.refine.importers.tree.TreeReader.Token;
|
||||
import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
@ -54,11 +51,11 @@ import com.google.refine.model.Row;
|
||||
public class XmlImportUtilities extends TreeImportUtilities {
|
||||
final static Logger logger = LoggerFactory.getLogger("XmlImportUtilities");
|
||||
|
||||
static public String[] detectPathFromTag(TreeParser parser, String tag) {
|
||||
static public String[] detectPathFromTag(TreeReader parser, String tag) {
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
Token eventType = parser.next();
|
||||
if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
List<String> path = detectRecordElement(parser, tag);
|
||||
if (path != null) {
|
||||
String[] path2 = new String[path.size()];
|
||||
@ -90,14 +87,14 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
* null if the the tag is not found.
|
||||
* @throws ServletException
|
||||
*/
|
||||
static protected List<String> detectRecordElement(TreeParser parser, String tag) throws ServletException {
|
||||
static protected List<String> detectRecordElement(TreeReader parser, String tag) throws Exception {
|
||||
try{
|
||||
if(parser.getEventType() == TreeParserToken.Ignorable)//XMLStreamConstants.START_DOCUMENT)
|
||||
if(parser.current() == Token.Ignorable)//XMLStreamConstants.START_DOCUMENT)
|
||||
parser.next();
|
||||
|
||||
String localName = parser.getLocalName();
|
||||
String localName = parser.getFieldName();
|
||||
String fullName = composeName(parser.getPrefix(), localName);
|
||||
if (tag.equals(parser.getLocalName()) || tag.equals(fullName)) {
|
||||
if (tag.equals(parser.getFieldName()) || tag.equals(fullName)) {
|
||||
List<String> path = new LinkedList<String>();
|
||||
path.add(localName);
|
||||
|
||||
@ -105,10 +102,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
||||
Token eventType = parser.next();
|
||||
if (eventType == Token.EndEntity) {//XMLStreamConstants.END_ELEMENT) {
|
||||
break;
|
||||
} else if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
} else if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
List<String> path = detectRecordElement(parser, tag);
|
||||
if (path != null) {
|
||||
path.add(0, localName);
|
||||
@ -116,7 +113,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
}
|
||||
}
|
||||
}catch(ServletException e){
|
||||
} catch (Exception e) {
|
||||
// silent
|
||||
// e.printStackTrace();
|
||||
}
|
||||
@ -136,18 +133,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
* The path to the most numerous of the possible candidates.
|
||||
* null if no candidates were found (less than 6 recurrences)
|
||||
*/
|
||||
static public String[] detectRecordElement(TreeParser parser) {
|
||||
static public String[] detectRecordElement(TreeReader parser) {
|
||||
logger.trace("detectRecordElement(inputStream)");
|
||||
List<RecordElementCandidate> candidates = new ArrayList<RecordElementCandidate>();
|
||||
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {
|
||||
Token eventType = parser.next();
|
||||
if (eventType == Token.StartEntity) {
|
||||
RecordElementCandidate candidate =
|
||||
detectRecordElement(
|
||||
parser,
|
||||
new String[] { parser.getLocalName() });
|
||||
new String[] { parser.getFieldName() });
|
||||
|
||||
if (candidate != null) {
|
||||
candidates.add(candidate);
|
||||
@ -168,8 +165,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
return null;
|
||||
}
|
||||
|
||||
static protected RecordElementCandidate detectRecordElement(TreeParser parser, String[] path) {
|
||||
logger.trace("detectRecordElement(TreeParser, String[])");
|
||||
static protected RecordElementCandidate detectRecordElement(TreeReader parser, String[] path) {
|
||||
logger.trace("detectRecordElement(TreeReader, String[])");
|
||||
List<RecordElementCandidate> descendantCandidates = new ArrayList<RecordElementCandidate>();
|
||||
|
||||
Map<String, Integer> immediateChildCandidateMap = new HashMap<String, Integer>();
|
||||
@ -178,21 +175,21 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.EndEntity ) {
|
||||
Token eventType = parser.next();
|
||||
if (eventType == Token.EndEntity ) {
|
||||
break;
|
||||
} else if (eventType == TreeParserToken.Value) {
|
||||
} else if (eventType == Token.Value) {
|
||||
try{
|
||||
if (parser.getText().trim().length() > 0) {
|
||||
if (parser.getFieldValue().trim().length() > 0) {
|
||||
textNodeCount++;
|
||||
}
|
||||
}catch(Exception e){
|
||||
//silent
|
||||
}
|
||||
} else if (eventType == TreeParserToken.StartEntity) {
|
||||
} else if (eventType == Token.StartEntity) {
|
||||
childElementNodeCount++;
|
||||
|
||||
String tagName = parser.getLocalName();
|
||||
String tagName = parser.getFieldName();
|
||||
|
||||
immediateChildCandidateMap.put(
|
||||
tagName,
|
||||
@ -261,17 +258,18 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
|
||||
|
||||
static public void importTreeData(
|
||||
TreeParser parser,
|
||||
TreeReader parser,
|
||||
Project project,
|
||||
String[] recordPath,
|
||||
ImportColumnGroup rootColumnGroup
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit
|
||||
) {
|
||||
logger.trace("importTreeData(TreeParser, Project, String[], ImportColumnGroup)");
|
||||
logger.trace("importTreeData(TreeReader, Project, String[], ImportColumnGroup)");
|
||||
try {
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {
|
||||
findRecord(project, parser, recordPath, 0, rootColumnGroup);
|
||||
while (parser.hasNext() && (limit <= 0 || project.rows.size() < limit)) {
|
||||
Token eventType = parser.next();
|
||||
if (eventType == Token.StartEntity) {
|
||||
findRecord(project, parser, recordPath, 0, rootColumnGroup, limit);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
@ -292,26 +290,30 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
*/
|
||||
static protected void findRecord(
|
||||
Project project,
|
||||
TreeParser parser,
|
||||
TreeReader parser,
|
||||
String[] recordPath,
|
||||
int pathIndex,
|
||||
ImportColumnGroup rootColumnGroup
|
||||
) throws ServletException {
|
||||
logger.trace("findRecord(Project, TreeParser, String[], int, ImportColumnGroup");
|
||||
ImportColumnGroup rootColumnGroup,
|
||||
int limit
|
||||
) throws Exception {
|
||||
logger.trace("findRecord(Project, TreeReader, String[], int, ImportColumnGroup");
|
||||
|
||||
if(parser.getEventType() == TreeParserToken.Ignorable){//XMLStreamConstants.START_DOCUMENT){
|
||||
if(parser.current() == Token.Ignorable){//XMLStreamConstants.START_DOCUMENT){
|
||||
logger.warn("Cannot use findRecord method for START_DOCUMENT event");
|
||||
return;
|
||||
}
|
||||
|
||||
String tagName = parser.getLocalName();
|
||||
if (tagName.equals(recordPath[pathIndex])) {
|
||||
String recordPathSegment = recordPath[pathIndex];
|
||||
|
||||
String localName = parser.getFieldName();
|
||||
String fullName = composeName(parser.getPrefix(), localName);
|
||||
if (recordPathSegment.equals(localName) || recordPathSegment.equals(fullName)) {
|
||||
if (pathIndex < recordPath.length - 1) {
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {
|
||||
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup);
|
||||
} else if (eventType == TreeParserToken.EndEntity ) {
|
||||
while (parser.hasNext() && (limit <= 0 || project.rows.size() < limit)) {
|
||||
Token eventType = parser.next();
|
||||
if (eventType == Token.StartEntity) {
|
||||
findRecord(project, parser, recordPath, pathIndex + 1, rootColumnGroup, limit);
|
||||
} else if (eventType == Token.EndEntity ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -323,12 +325,12 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
static protected void skip(TreeParser parser) throws ServletException {
|
||||
static protected void skip(TreeReader parser) throws Exception {
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
Token eventType = parser.next();
|
||||
if (eventType == Token.StartEntity) {//XMLStreamConstants.START_ELEMENT) {
|
||||
skip(parser);
|
||||
} else if (eventType == TreeParserToken.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
|
||||
} else if (eventType == Token.EndEntity) { //XMLStreamConstants.END_ELEMENT) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -344,10 +346,10 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
*/
|
||||
static protected void processRecord(
|
||||
Project project,
|
||||
TreeParser parser,
|
||||
TreeReader parser,
|
||||
ImportColumnGroup rootColumnGroup
|
||||
) throws ServletException {
|
||||
logger.trace("processRecord(Project,TreeParser,ImportColumnGroup)");
|
||||
) throws Exception {
|
||||
logger.trace("processRecord(Project,TreeReader,ImportColumnGroup)");
|
||||
ImportRecord record = new ImportRecord();
|
||||
|
||||
processSubRecord(project, parser, rootColumnGroup, record);
|
||||
@ -382,19 +384,19 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
*/
|
||||
static protected void processSubRecord(
|
||||
Project project,
|
||||
TreeParser parser,
|
||||
TreeReader parser,
|
||||
ImportColumnGroup columnGroup,
|
||||
ImportRecord record
|
||||
) throws ServletException {
|
||||
logger.trace("processSubRecord(Project,TreeParser,ImportColumnGroup,ImportRecord)");
|
||||
) throws Exception {
|
||||
logger.trace("processSubRecord(Project,TreeReader,ImportColumnGroup,ImportRecord)");
|
||||
|
||||
if(parser.getEventType() == TreeParserToken.Ignorable)
|
||||
if(parser.current() == Token.Ignorable)
|
||||
return;
|
||||
|
||||
ImportColumnGroup thisColumnGroup = getColumnGroup(
|
||||
project,
|
||||
columnGroup,
|
||||
composeName(parser.getPrefix(), parser.getLocalName()));
|
||||
composeName(parser.getPrefix(), parser.getFieldName()));
|
||||
|
||||
thisColumnGroup.nextRowIndex = Math.max(thisColumnGroup.nextRowIndex, columnGroup.nextRowIndex);
|
||||
|
||||
@ -413,8 +415,8 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
|
||||
while (parser.hasNext()) {
|
||||
TreeParserToken eventType = parser.next();
|
||||
if (eventType == TreeParserToken.StartEntity) {
|
||||
Token eventType = parser.next();
|
||||
if (eventType == Token.StartEntity) {
|
||||
processSubRecord(
|
||||
project,
|
||||
parser,
|
||||
@ -422,9 +424,9 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
record
|
||||
);
|
||||
} else if (//eventType == XMLStreamConstants.CDATA ||
|
||||
eventType == TreeParserToken.Value) { //XMLStreamConstants.CHARACTERS) {
|
||||
String text = parser.getText();
|
||||
String colName = parser.getLocalName();
|
||||
eventType == Token.Value) { //XMLStreamConstants.CHARACTERS) {
|
||||
String text = parser.getFieldValue();
|
||||
String colName = parser.getFieldName();
|
||||
if(text != null){
|
||||
text = text.trim();
|
||||
if (text.length() > 0) {
|
||||
@ -437,7 +439,7 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
);
|
||||
}
|
||||
}
|
||||
} else if (eventType == TreeParserToken.EndEntity) {
|
||||
} else if (eventType == Token.EndEntity) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -451,8 +453,4 @@ public class XmlImportUtilities extends TreeImportUtilities {
|
||||
}
|
||||
thisColumnGroup.nextRowIndex = nextRowIndex;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,264 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importing;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.RefineServlet;
|
||||
import com.google.refine.commands.HttpUtilities;
|
||||
import com.google.refine.importing.ImportingManager.Format;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
public class DefaultImportingController implements ImportingController {
|
||||
|
||||
protected RefineServlet servlet;
|
||||
|
||||
@Override
|
||||
public void init(RefineServlet servlet) {
|
||||
this.servlet = servlet;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doGet(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
// TODO Auto-generated method stub
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
|
||||
/*
|
||||
* The uploaded file is in the POST body as a "file part". If
|
||||
* we call request.getParameter() then the POST body will get
|
||||
* read and we won't have a chance to parse the body ourselves.
|
||||
* This is why we have to parse the URL for parameters ourselves.
|
||||
*/
|
||||
Properties parameters = ParsingUtilities.parseUrlParameters(request);
|
||||
String subCommand = parameters.getProperty("subCommand");
|
||||
if ("load-raw-data".equals(subCommand)) {
|
||||
doLoadRawData(request, response, parameters);
|
||||
} else if ("update-file-selection".equals(subCommand)) {
|
||||
doUpdateFileSelection(request, response, parameters);
|
||||
} else if ("initialize-parser-ui".equals(subCommand)) {
|
||||
doInitializeParserUI(request, response, parameters);
|
||||
} else if ("update-format-and-options".equals(subCommand)) {
|
||||
doUpdateFormatAndOptions(request, response, parameters);
|
||||
} else if ("create-project".equals(subCommand)) {
|
||||
doCreateProject(request, response, parameters);
|
||||
} else {
|
||||
HttpUtilities.respond(response, "error", "No such sub command");
|
||||
}
|
||||
}
|
||||
|
||||
private void doLoadRawData(HttpServletRequest request, HttpServletResponse response, Properties parameters)
|
||||
throws ServletException, IOException {
|
||||
|
||||
long jobID = Long.parseLong(parameters.getProperty("jobID"));
|
||||
ImportingJob job = ImportingManager.getJob(jobID);
|
||||
if (job == null) {
|
||||
HttpUtilities.respond(response, "error", "No such import job");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
final JSONObject config = getConfig(job);
|
||||
if (!("new".equals(config.getString("state")))) {
|
||||
HttpUtilities.respond(response, "error", "Job already started; cannot load more data");
|
||||
return;
|
||||
}
|
||||
|
||||
ImportingUtilities.loadDataAndPrepareJob(
|
||||
request, response, parameters, job, config);
|
||||
} catch (JSONException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void doUpdateFileSelection(HttpServletRequest request, HttpServletResponse response, Properties parameters)
|
||||
throws ServletException, IOException {
|
||||
|
||||
long jobID = Long.parseLong(parameters.getProperty("jobID"));
|
||||
ImportingJob job = ImportingManager.getJob(jobID);
|
||||
if (job == null) {
|
||||
HttpUtilities.respond(response, "error", "No such import job");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
JSONObject config = getConfig(job);
|
||||
if (!("ready".equals(config.getString("state")))) {
|
||||
HttpUtilities.respond(response, "error", "Job not ready");
|
||||
return;
|
||||
}
|
||||
|
||||
JSONArray fileSelectionArray = ParsingUtilities.evaluateJsonStringToArray(
|
||||
request.getParameter("fileSelection"));
|
||||
|
||||
ImportingUtilities.updateJobWithNewFileSelection(job, fileSelectionArray);
|
||||
|
||||
replyWithJobData(request, response, job);
|
||||
} catch (JSONException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void doUpdateFormatAndOptions(HttpServletRequest request, HttpServletResponse response, Properties parameters)
|
||||
throws ServletException, IOException {
|
||||
|
||||
long jobID = Long.parseLong(parameters.getProperty("jobID"));
|
||||
ImportingJob job = ImportingManager.getJob(jobID);
|
||||
if (job == null) {
|
||||
HttpUtilities.respond(response, "error", "No such import job");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
JSONObject config = getConfig(job);
|
||||
if (!("ready".equals(config.getString("state")))) {
|
||||
HttpUtilities.respond(response, "error", "Job not ready");
|
||||
return;
|
||||
}
|
||||
|
||||
String format = request.getParameter("format");
|
||||
JSONObject optionObj = ParsingUtilities.evaluateJsonStringToObject(
|
||||
request.getParameter("options"));
|
||||
|
||||
List<Exception> exceptions = new LinkedList<Exception>();
|
||||
|
||||
ImportingUtilities.previewParse(job, format, optionObj, exceptions);
|
||||
|
||||
HttpUtilities.respond(response, "ok", "done");
|
||||
} catch (JSONException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void doInitializeParserUI(HttpServletRequest request, HttpServletResponse response, Properties parameters)
|
||||
throws ServletException, IOException {
|
||||
|
||||
long jobID = Long.parseLong(parameters.getProperty("jobID"));
|
||||
ImportingJob job = ImportingManager.getJob(jobID);
|
||||
if (job == null) {
|
||||
HttpUtilities.respond(response, "error", "No such import job");
|
||||
return;
|
||||
}
|
||||
|
||||
String format = request.getParameter("format");
|
||||
Format formatRecord = ImportingManager.formatToRecord.get(format);
|
||||
if (formatRecord != null && formatRecord.parser != null) {
|
||||
JSONObject options = formatRecord.parser.createParserUIInitializationData(
|
||||
job, ImportingUtilities.getSelectedFileRecords(job), format);
|
||||
JSONObject result = new JSONObject();
|
||||
JSONUtilities.safePut(result, "status", "ok");
|
||||
JSONUtilities.safePut(result, "options", options);
|
||||
|
||||
HttpUtilities.respond(response, result.toString());
|
||||
} else {
|
||||
HttpUtilities.respond(response, "error", "Unrecognized format or format has no parser");
|
||||
}
|
||||
}
|
||||
|
||||
private void doCreateProject(HttpServletRequest request, HttpServletResponse response, Properties parameters)
|
||||
throws ServletException, IOException {
|
||||
|
||||
long jobID = Long.parseLong(parameters.getProperty("jobID"));
|
||||
ImportingJob job = ImportingManager.getJob(jobID);
|
||||
if (job == null) {
|
||||
HttpUtilities.respond(response, "error", "No such import job");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
JSONObject config = getConfig(job);
|
||||
if (!("ready".equals(config.getString("state")))) {
|
||||
HttpUtilities.respond(response, "error", "Job not ready");
|
||||
return;
|
||||
}
|
||||
|
||||
String format = request.getParameter("format");
|
||||
JSONObject optionObj = ParsingUtilities.evaluateJsonStringToObject(
|
||||
request.getParameter("options"));
|
||||
|
||||
List<Exception> exceptions = new LinkedList<Exception>();
|
||||
|
||||
ImportingUtilities.createProject(job, format, optionObj, exceptions);
|
||||
|
||||
HttpUtilities.respond(response, "ok", "done");
|
||||
} catch (JSONException e) {
|
||||
throw new ServletException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private JSONObject getConfig(ImportingJob job) {
|
||||
if (job.config == null) {
|
||||
job.config = new JSONObject();
|
||||
JSONUtilities.safePut(job.config, "state", "new");
|
||||
JSONUtilities.safePut(job.config, "hasData", false);
|
||||
}
|
||||
return job.config;
|
||||
}
|
||||
|
||||
private void replyWithJobData(HttpServletRequest request, HttpServletResponse response, ImportingJob job)
|
||||
throws ServletException, IOException {
|
||||
|
||||
Writer w = response.getWriter();
|
||||
JSONWriter writer = new JSONWriter(w);
|
||||
try {
|
||||
writer.object();
|
||||
writer.key("code"); writer.value("ok");
|
||||
writer.key("job"); job.write(writer, new Properties());
|
||||
writer.endObject();
|
||||
} catch (JSONException e) {
|
||||
throw new ServletException(e);
|
||||
} finally {
|
||||
w.flush();
|
||||
w.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -31,13 +31,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers.parsers;
|
||||
package com.google.refine.importing;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
public enum TreeParserToken {
|
||||
Ignorable,
|
||||
StartEntity,
|
||||
EndEntity,
|
||||
Value
|
||||
//append additional tokens only if necessary (most should be just mapped to Value or Ignorable)
|
||||
public interface FormatGuesser {
|
||||
public String guess(File file, String encoding, String seedFormat);
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -31,17 +31,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers;
|
||||
package com.google.refine.importing;
|
||||
|
||||
import com.google.refine.HttpResponder;
|
||||
|
||||
public interface Importer {
|
||||
|
||||
/**
|
||||
* Determine whether importer can handle given contentType and filename.
|
||||
*
|
||||
* @param contentType
|
||||
* @param fileName
|
||||
* @return true if the importer can handle this
|
||||
*/
|
||||
public boolean canImportData(String contentType, String fileName);
|
||||
public interface ImportingController extends HttpResponder {
|
||||
}
|
106
main/src/com/google/refine/importing/ImportingJob.java
Normal file
106
main/src/com/google/refine/importing/ImportingJob.java
Normal file
@ -0,0 +1,106 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importing;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.Jsonizable;
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.model.Project;
|
||||
|
||||
|
||||
public class ImportingJob implements Jsonizable {
|
||||
final public long id;
|
||||
final public File dir; // Temporary directory where the data about this job is stored
|
||||
|
||||
public long lastTouched;
|
||||
public JSONObject config = null;
|
||||
|
||||
public Project project;
|
||||
public ProjectMetadata metadata;
|
||||
public boolean canceled;
|
||||
|
||||
public ImportingJob(long id, File dir) {
|
||||
this.id = id;
|
||||
this.dir = dir;
|
||||
|
||||
dir.mkdirs();
|
||||
}
|
||||
|
||||
public void touch() {
|
||||
lastTouched = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public void prepareNewProject() {
|
||||
if (project != null) {
|
||||
project.dispose();
|
||||
}
|
||||
project = new Project();
|
||||
metadata = new ProjectMetadata();
|
||||
}
|
||||
|
||||
public void dispose() {
|
||||
if (project != null) {
|
||||
project.dispose();
|
||||
project = null;
|
||||
}
|
||||
metadata = null;
|
||||
|
||||
try {
|
||||
FileUtils.deleteDirectory(dir);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
}
|
||||
|
||||
public File getRawDataDir() {
|
||||
File dir2 = new File(dir, "raw-data");
|
||||
dir2.mkdirs();
|
||||
return dir2;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
writer.object();
|
||||
writer.key("config"); writer.value(config);
|
||||
writer.endObject();
|
||||
}
|
||||
}
|
257
main/src/com/google/refine/importing/ImportingManager.java
Normal file
257
main/src/com/google/refine/importing/ImportingManager.java
Normal file
@ -0,0 +1,257 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importing;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.RefineServlet;
|
||||
|
||||
import edu.mit.simile.butterfly.ButterflyModule;
|
||||
|
||||
public class ImportingManager {
|
||||
static public class Format {
|
||||
final public String id;
|
||||
final public String label;
|
||||
final public boolean download;
|
||||
final public String uiClass;
|
||||
final public ImportingParser parser;
|
||||
|
||||
private Format(
|
||||
String id,
|
||||
String label,
|
||||
boolean download,
|
||||
String uiClass,
|
||||
ImportingParser parser
|
||||
) {
|
||||
this.id = id;
|
||||
this.label = label;
|
||||
this.download = download;
|
||||
this.uiClass = uiClass;
|
||||
this.parser = parser;
|
||||
}
|
||||
}
|
||||
|
||||
static private RefineServlet servlet;
|
||||
static private File importDir;
|
||||
final static private Map<Long, ImportingJob> jobs = new HashMap<Long, ImportingJob>();
|
||||
|
||||
// Mapping from format to label, e.g., "text" to "Text files", "text/xml" to "XML files"
|
||||
final static public Map<String, Format> formatToRecord = new HashMap<String, Format>();
|
||||
|
||||
// Mapping from format to guessers
|
||||
final static public Map<String, List<FormatGuesser>> formatToGuessers = new HashMap<String, List<FormatGuesser>>();
|
||||
|
||||
// Mapping from file extension to format, e.g., ".xml" to "text/xml"
|
||||
final static public Map<String, String> extensionToFormat = new HashMap<String, String>();
|
||||
|
||||
// Mapping from mime type to format, e.g., "application/json" to "text/json"
|
||||
final static public Map<String, String> mimeTypeToFormat = new HashMap<String, String>();
|
||||
|
||||
// URL rewriters
|
||||
final static public Set<UrlRewriter> urlRewriters = new HashSet<UrlRewriter>();
|
||||
|
||||
// Mapping from controller name to controller
|
||||
final static public Map<String, ImportingController> controllers = new HashMap<String, ImportingController>();
|
||||
|
||||
static public void initialize(RefineServlet servlet) {
|
||||
ImportingManager.servlet = servlet;
|
||||
}
|
||||
|
||||
static public void registerFormat(String format, String label) {
|
||||
registerFormat(format, label, null, null);
|
||||
}
|
||||
|
||||
static public void registerFormat(String format, String label, String uiClass, ImportingParser parser) {
|
||||
formatToRecord.put(format, new Format(format, label, true, uiClass, parser));
|
||||
}
|
||||
|
||||
static public void registerFormat(
|
||||
String format, String label, boolean download, String uiClass, ImportingParser parser) {
|
||||
formatToRecord.put(format, new Format(format, label, download, uiClass, parser));
|
||||
}
|
||||
|
||||
static public void registerFormatGuesser(String format, FormatGuesser guesser) {
|
||||
List<FormatGuesser> guessers = formatToGuessers.get(format);
|
||||
if (guessers == null) {
|
||||
guessers = new LinkedList<FormatGuesser>();
|
||||
formatToGuessers.put(format, guessers);
|
||||
}
|
||||
guessers.add(0, guesser); // prepend so that newer guessers take priority
|
||||
}
|
||||
|
||||
static public void registerExtension(String extension, String format) {
|
||||
extensionToFormat.put(extension.startsWith(".") ? extension : ("." + extension), format);
|
||||
}
|
||||
|
||||
static public void registerMimeType(String mimeType, String format) {
|
||||
mimeTypeToFormat.put(mimeType, format);
|
||||
}
|
||||
|
||||
static public void registerUrlRewriter(UrlRewriter urlRewriter) {
|
||||
urlRewriters.add(urlRewriter);
|
||||
}
|
||||
|
||||
static public void registerController(ButterflyModule module, String name, ImportingController controller) {
|
||||
String key = module.getName() + "/" + name;
|
||||
controllers.put(key, controller);
|
||||
|
||||
controller.init(servlet);
|
||||
}
|
||||
|
||||
static public File getImportDir() {
|
||||
if (importDir == null) {
|
||||
File tempDir = servlet.getTempDir();
|
||||
importDir = tempDir == null ? new File(".import-temp") : new File(tempDir, "import");
|
||||
|
||||
if (importDir.exists()) {
|
||||
try {
|
||||
// start fresh
|
||||
FileUtils.deleteDirectory(importDir);
|
||||
} catch (IOException e) {
|
||||
}
|
||||
}
|
||||
importDir.mkdirs();
|
||||
}
|
||||
return importDir;
|
||||
}
|
||||
|
||||
static public ImportingJob createJob() {
|
||||
long id = System.currentTimeMillis() + (long) (Math.random() * 1000000);
|
||||
File jobDir = new File(getImportDir(), Long.toString(id));
|
||||
|
||||
ImportingJob job = new ImportingJob(id, jobDir);
|
||||
jobs.put(id, job);
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
static public ImportingJob getJob(long id) {
|
||||
return jobs.get(id);
|
||||
}
|
||||
|
||||
static public void disposeJob(long id) {
|
||||
ImportingJob job = getJob(id);
|
||||
if (job != null) {
|
||||
job.dispose();
|
||||
jobs.remove(id);
|
||||
}
|
||||
}
|
||||
|
||||
static public void writeConfiguration(JSONWriter writer, Properties options) throws JSONException {
|
||||
writer.object();
|
||||
|
||||
writer.key("formats");
|
||||
writer.object();
|
||||
for (String format : formatToRecord.keySet()) {
|
||||
Format record = formatToRecord.get(format);
|
||||
|
||||
writer.key(format);
|
||||
writer.object();
|
||||
writer.key("id"); writer.value(record.id);
|
||||
writer.key("label"); writer.value(record.label);
|
||||
writer.key("download"); writer.value(record.download);
|
||||
writer.key("uiClass"); writer.value(record.uiClass);
|
||||
writer.endObject();
|
||||
}
|
||||
writer.endObject();
|
||||
|
||||
writer.key("mimeTypeToFormat");
|
||||
writer.object();
|
||||
for (String mimeType : mimeTypeToFormat.keySet()) {
|
||||
writer.key(mimeType);
|
||||
writer.value(mimeTypeToFormat.get(mimeType));
|
||||
}
|
||||
writer.endObject();
|
||||
|
||||
writer.key("extensionToFormat");
|
||||
writer.object();
|
||||
for (String extension : extensionToFormat.keySet()) {
|
||||
writer.key(extension);
|
||||
writer.value(extensionToFormat.get(extension));
|
||||
}
|
||||
writer.endObject();
|
||||
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
static public String getFormatFromFileName(String fileName) {
|
||||
int start = 0;
|
||||
while (true) {
|
||||
int dot = fileName.indexOf('.', start);
|
||||
if (dot < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
String extension = fileName.substring(dot);
|
||||
String format = extensionToFormat.get(extension);
|
||||
if (format != null) {
|
||||
return format;
|
||||
} else {
|
||||
start = dot + 1;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static public String getFormatFromMimeType(String mimeType) {
|
||||
return mimeTypeToFormat.get(mimeType);
|
||||
}
|
||||
|
||||
static public String getFormat(String fileName, String mimeType) {
|
||||
String fileNameFormat = getFormatFromFileName(fileName);
|
||||
String mimeTypeFormat = mimeType == null ? null : getFormatFromMimeType(mimeType);
|
||||
if (mimeTypeFormat == null) {
|
||||
return fileNameFormat;
|
||||
} else if (fileNameFormat == null) {
|
||||
return mimeTypeFormat;
|
||||
} else if (fileNameFormat.startsWith(mimeTypeFormat)) {
|
||||
// file name-based format is more specific
|
||||
return fileNameFormat;
|
||||
} else {
|
||||
return mimeTypeFormat;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
|
||||
Copyright 2010, Google Inc.
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@ -31,33 +31,51 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importers;
|
||||
package com.google.refine.importing;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.Properties;
|
||||
import java.util.List;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.model.Project;
|
||||
|
||||
public interface ImportingParser {
|
||||
/**
|
||||
* Interface for importers which take a Reader as input.
|
||||
* Create data sufficient for the parser UI on the client side to do its work.
|
||||
* For example, an XML parser UI would need to know some sample elements so it
|
||||
* can let the user pick which the path to the record elements.
|
||||
*
|
||||
* @param job
|
||||
* @param fileRecords
|
||||
* @param format
|
||||
* @return JSONObject options
|
||||
*/
|
||||
public interface ReaderImporter extends Importer {
|
||||
public JSONObject createParserUIInitializationData(
|
||||
ImportingJob job,
|
||||
List<JSONObject> fileRecords,
|
||||
String format
|
||||
);
|
||||
|
||||
/**
|
||||
* Read data from a input reader into project.
|
||||
*
|
||||
* @param reader
|
||||
* reader to import data from. It is assumed to be positioned at
|
||||
* the correct point and ready to go.
|
||||
* @param project
|
||||
* project which will contain data
|
||||
* @param metadata
|
||||
* metadata of new project
|
||||
* @param options
|
||||
* set of properties with import options
|
||||
* @throws ImportException
|
||||
* @param fileRecords
|
||||
* @param format
|
||||
* @param limit maximum number of rows to create
|
||||
* @param options custom options put together by the UI corresponding to this parser,
|
||||
* which the parser should understand
|
||||
* @param exceptions
|
||||
*/
|
||||
public void read(Reader reader, Project project, ProjectMetadata metadata, Properties options)
|
||||
throws ImportException;
|
||||
public void parse(
|
||||
Project project,
|
||||
ProjectMetadata metadata,
|
||||
ImportingJob job,
|
||||
List<JSONObject> fileRecords,
|
||||
String format,
|
||||
int limit,
|
||||
JSONObject options,
|
||||
List<Exception> exceptions
|
||||
);
|
||||
}
|
895
main/src/com/google/refine/importing/ImportingUtilities.java
Normal file
895
main/src/com/google/refine/importing/ImportingUtilities.java
Normal file
@ -0,0 +1,895 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
package com.google.refine.importing;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.commons.fileupload.FileItem;
|
||||
import org.apache.commons.fileupload.FileUploadException;
|
||||
import org.apache.commons.fileupload.ProgressListener;
|
||||
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
||||
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
||||
import org.apache.commons.fileupload.util.Streams;
|
||||
import org.apache.commons.io.FileCleaningTracker;
|
||||
import org.apache.tools.bzip2.CBZip2InputStream;
|
||||
import org.apache.tools.tar.TarEntry;
|
||||
import org.apache.tools.tar.TarInputStream;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.refine.ProjectManager;
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importing.ImportingManager.Format;
|
||||
import com.google.refine.importing.UrlRewriter.Result;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
|
||||
public class ImportingUtilities {
|
||||
final static protected Logger logger = LoggerFactory.getLogger("importing-utilities");
|
||||
|
||||
static public interface Progress {
|
||||
public void setProgress(String message, int percent);
|
||||
public boolean isCanceled();
|
||||
}
|
||||
|
||||
static public void loadDataAndPrepareJob(
|
||||
HttpServletRequest request,
|
||||
HttpServletResponse response,
|
||||
Properties parameters,
|
||||
final ImportingJob job,
|
||||
JSONObject config) throws IOException, ServletException {
|
||||
|
||||
JSONObject retrievalRecord = new JSONObject();
|
||||
JSONUtilities.safePut(config, "retrievalRecord", retrievalRecord);
|
||||
JSONUtilities.safePut(config, "state", "loading-raw-data");
|
||||
|
||||
final JSONObject progress = new JSONObject();
|
||||
JSONUtilities.safePut(config, "progress", progress);
|
||||
try {
|
||||
ImportingUtilities.retrieveContentFromPostRequest(
|
||||
request,
|
||||
parameters,
|
||||
job.getRawDataDir(),
|
||||
retrievalRecord,
|
||||
new Progress() {
|
||||
@Override
|
||||
public void setProgress(String message, int percent) {
|
||||
if (message != null) {
|
||||
JSONUtilities.safePut(progress, "message", message);
|
||||
}
|
||||
JSONUtilities.safePut(progress, "percent", percent);
|
||||
}
|
||||
public boolean isCanceled() {
|
||||
return job.canceled;
|
||||
}
|
||||
}
|
||||
);
|
||||
} catch (FileUploadException e) {
|
||||
JSONUtilities.safePut(config, "state", "error");
|
||||
JSONUtilities.safePut(config, "error", "Error uploading data");
|
||||
|
||||
throw new ServletException(e);
|
||||
}
|
||||
|
||||
JSONArray fileSelectionIndexes = new JSONArray();
|
||||
JSONUtilities.safePut(config, "fileSelection", fileSelectionIndexes);
|
||||
|
||||
String bestFormat = ImportingUtilities.autoSelectFiles(job, retrievalRecord, fileSelectionIndexes);
|
||||
bestFormat = ImportingUtilities.guessBetterFormat(job, bestFormat);
|
||||
|
||||
JSONArray rankedFormats = new JSONArray();
|
||||
JSONUtilities.safePut(config, "rankedFormats", rankedFormats);
|
||||
ImportingUtilities.rankFormats(job, bestFormat, rankedFormats);
|
||||
|
||||
JSONUtilities.safePut(config, "state", "ready");
|
||||
JSONUtilities.safePut(config, "hasData", true);
|
||||
config.remove("progress");
|
||||
}
|
||||
|
||||
static public void updateJobWithNewFileSelection(ImportingJob job, JSONArray fileSelectionArray) {
|
||||
JSONUtilities.safePut(job.config, "fileSelection", fileSelectionArray);
|
||||
|
||||
String bestFormat = ImportingUtilities.getCommonFormatForSelectedFiles(job, fileSelectionArray);
|
||||
bestFormat = ImportingUtilities.guessBetterFormat(job, bestFormat);
|
||||
|
||||
JSONArray rankedFormats = new JSONArray();
|
||||
JSONUtilities.safePut(job.config, "rankedFormats", rankedFormats);
|
||||
ImportingUtilities.rankFormats(job, bestFormat, rankedFormats);
|
||||
}
|
||||
|
||||
static public void retrieveContentFromPostRequest(
|
||||
HttpServletRequest request,
|
||||
Properties parameters,
|
||||
File rawDataDir,
|
||||
JSONObject retrievalRecord,
|
||||
final Progress progress
|
||||
) throws FileUploadException, IOException {
|
||||
JSONArray fileRecords = new JSONArray();
|
||||
JSONUtilities.safePut(retrievalRecord, "files", fileRecords);
|
||||
|
||||
int clipboardCount = 0;
|
||||
int uploadCount = 0;
|
||||
int downloadCount = 0;
|
||||
int archiveCount = 0;
|
||||
|
||||
// This tracks the total progress, which involves uploading data from the client
|
||||
// as well as downloading data from URLs.
|
||||
final SavingUpdate update = new SavingUpdate() {
|
||||
@Override
|
||||
public void savedMore() {
|
||||
progress.setProgress(null, calculateProgressPercent(totalExpectedSize, totalRetrievedSize));
|
||||
}
|
||||
@Override
|
||||
public boolean isCanceled() {
|
||||
return progress.isCanceled();
|
||||
}
|
||||
};
|
||||
|
||||
DiskFileItemFactory fileItemFactory = new DiskFileItemFactory();
|
||||
fileItemFactory.setFileCleaningTracker(new FileCleaningTracker());
|
||||
|
||||
ServletFileUpload upload = new ServletFileUpload(fileItemFactory);
|
||||
upload.setProgressListener(new ProgressListener() {
|
||||
boolean setContentLength = false;
|
||||
long lastBytesRead = 0;
|
||||
|
||||
@Override
|
||||
public void update(long bytesRead, long contentLength, int itemCount) {
|
||||
if (!setContentLength) {
|
||||
// Only try to set the content length if we really know it.
|
||||
if (contentLength >= 0) {
|
||||
update.totalExpectedSize += contentLength;
|
||||
setContentLength = true;
|
||||
}
|
||||
}
|
||||
if (setContentLength) {
|
||||
update.totalRetrievedSize += (bytesRead - lastBytesRead);
|
||||
lastBytesRead = bytesRead;
|
||||
|
||||
update.savedMore();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
progress.setProgress("Uploading data ...", -1);
|
||||
for (Object obj : upload.parseRequest(request)) {
|
||||
if (progress.isCanceled()) {
|
||||
break;
|
||||
}
|
||||
|
||||
FileItem fileItem = (FileItem) obj;
|
||||
InputStream stream = fileItem.getInputStream();
|
||||
|
||||
String name = fileItem.getFieldName().toLowerCase();
|
||||
if (fileItem.isFormField()) {
|
||||
if (name.equals("clipboard")) {
|
||||
File file = allocateFile(rawDataDir, "clipboard.txt");
|
||||
|
||||
JSONObject fileRecord = new JSONObject();
|
||||
JSONUtilities.safePut(fileRecord, "origin", "clipboard");
|
||||
JSONUtilities.safePut(fileRecord, "declaredEncoding", request.getCharacterEncoding());
|
||||
JSONUtilities.safePut(fileRecord, "declaredMimeType", (String) null);
|
||||
JSONUtilities.safePut(fileRecord, "format", "text");
|
||||
JSONUtilities.safePut(fileRecord, "fileName", "(clipboard)");
|
||||
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
|
||||
|
||||
progress.setProgress("Uploading pasted clipboard text",
|
||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||
|
||||
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
|
||||
|
||||
clipboardCount++;
|
||||
|
||||
JSONUtilities.append(fileRecords, fileRecord);
|
||||
} else if (name.equals("download")) {
|
||||
String urlString = Streams.asString(stream);
|
||||
URL url = new URL(urlString);
|
||||
|
||||
JSONObject fileRecord = new JSONObject();
|
||||
JSONUtilities.safePut(fileRecord, "origin", "download");
|
||||
JSONUtilities.safePut(fileRecord, "url", urlString);
|
||||
|
||||
for (UrlRewriter rewriter : ImportingManager.urlRewriters) {
|
||||
Result result = rewriter.rewrite(urlString);
|
||||
if (result != null) {
|
||||
urlString = result.rewrittenUrl;
|
||||
url = new URL(urlString);
|
||||
|
||||
JSONUtilities.safePut(fileRecord, "url", urlString);
|
||||
JSONUtilities.safePut(fileRecord, "format", result.format);
|
||||
if (!result.download) {
|
||||
downloadCount++;
|
||||
JSONUtilities.append(fileRecords, fileRecord);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
URLConnection urlConnection = url.openConnection();
|
||||
InputStream stream2 = urlConnection.getInputStream();
|
||||
try {
|
||||
String fileName = url.getFile();
|
||||
File file = allocateFile(rawDataDir, fileName);
|
||||
|
||||
int contentLength = urlConnection.getContentLength();
|
||||
if (contentLength >= 0) {
|
||||
update.totalExpectedSize += contentLength;
|
||||
}
|
||||
|
||||
JSONUtilities.safePut(fileRecord, "declaredEncoding", urlConnection.getContentEncoding());
|
||||
JSONUtilities.safePut(fileRecord, "declaredMimeType", urlConnection.getContentType());
|
||||
JSONUtilities.safePut(fileRecord, "fileName", fileName);
|
||||
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
|
||||
|
||||
progress.setProgress("Downloading " + urlString,
|
||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||
|
||||
long actualLength = saveStreamToFile(stream, file, update);
|
||||
JSONUtilities.safePut(fileRecord, "size", actualLength);
|
||||
if (contentLength >= 0) {
|
||||
update.totalExpectedSize += (actualLength - contentLength);
|
||||
} else {
|
||||
update.totalExpectedSize += actualLength;
|
||||
}
|
||||
progress.setProgress("Saving " + urlString + " locally",
|
||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||
|
||||
if (postProcessRetrievedFile(file, fileRecord, fileRecords, progress)) {
|
||||
archiveCount++;
|
||||
}
|
||||
|
||||
downloadCount++;
|
||||
} finally {
|
||||
stream2.close();
|
||||
}
|
||||
}
|
||||
|
||||
} else { // is file content
|
||||
String fileName = fileItem.getName();
|
||||
if (fileName.length() > 0) {
|
||||
long fileSize = fileItem.getSize();
|
||||
|
||||
File file = allocateFile(rawDataDir, fileName);
|
||||
|
||||
JSONObject fileRecord = new JSONObject();
|
||||
JSONUtilities.safePut(fileRecord, "origin", "upload");
|
||||
JSONUtilities.safePut(fileRecord, "declaredEncoding", request.getCharacterEncoding());
|
||||
JSONUtilities.safePut(fileRecord, "declaredMimeType", fileItem.getContentType());
|
||||
JSONUtilities.safePut(fileRecord, "fileName", fileName);
|
||||
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file, rawDataDir));
|
||||
|
||||
progress.setProgress(
|
||||
"Saving file " + fileName + " locally (" + formatBytes(fileSize) + " bytes)",
|
||||
calculateProgressPercent(update.totalExpectedSize, update.totalRetrievedSize));
|
||||
|
||||
JSONUtilities.safePut(fileRecord, "size", saveStreamToFile(stream, file, null));
|
||||
if (postProcessRetrievedFile(file, fileRecord, fileRecords, progress)) {
|
||||
archiveCount++;
|
||||
}
|
||||
|
||||
uploadCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
JSONUtilities.safePut(retrievalRecord, "uploadCount", uploadCount);
|
||||
JSONUtilities.safePut(retrievalRecord, "downloadCount", downloadCount);
|
||||
JSONUtilities.safePut(retrievalRecord, "clipboardCount", clipboardCount);
|
||||
JSONUtilities.safePut(retrievalRecord, "archiveCount", archiveCount);
|
||||
}
|
||||
|
||||
static public String getRelativePath(File file, File dir) {
|
||||
String location = file.getAbsolutePath().substring(dir.getAbsolutePath().length());
|
||||
return (location.startsWith(File.separator)) ? location.substring(1) : location;
|
||||
}
|
||||
|
||||
static public File allocateFile(File dir, String name) {
|
||||
File file = new File(dir, name);
|
||||
|
||||
int dot = name.indexOf('.');
|
||||
String prefix = dot < 0 ? name : name.substring(0, dot);
|
||||
String suffix = dot < 0 ? "" : name.substring(dot);
|
||||
int index = 2;
|
||||
while (file.exists()) {
|
||||
file = new File(dir, prefix + "-" + index++ + suffix);
|
||||
}
|
||||
|
||||
file.getParentFile().mkdirs();
|
||||
|
||||
return file;
|
||||
}
|
||||
|
||||
static public Reader getFileReader(ImportingJob job, JSONObject fileRecord)
|
||||
throws FileNotFoundException {
|
||||
|
||||
return getFileReader(getFile(job, JSONUtilities.getString(fileRecord, "location", "")), fileRecord);
|
||||
}
|
||||
|
||||
static public Reader getFileReader(File file, JSONObject fileRecord) throws FileNotFoundException {
|
||||
return getReaderFromStream(new FileInputStream(file), fileRecord);
|
||||
}
|
||||
|
||||
static public Reader getReaderFromStream(InputStream inputStream, JSONObject fileRecord) {
|
||||
String encoding = getEncoding(fileRecord);
|
||||
if (encoding != null) {
|
||||
try {
|
||||
return new InputStreamReader(inputStream, encoding);
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// Ignore and fall through
|
||||
}
|
||||
}
|
||||
return new InputStreamReader(inputStream);
|
||||
}
|
||||
|
||||
static public File getFile(ImportingJob job, JSONObject fileRecord) {
|
||||
return getFile(job, JSONUtilities.getString(fileRecord, "location", ""));
|
||||
}
|
||||
|
||||
static public File getFile(ImportingJob job, String location) {
|
||||
return new File(job.getRawDataDir(), location);
|
||||
}
|
||||
|
||||
static public String getFileSource(JSONObject fileRecord) {
|
||||
return JSONUtilities.getString(
|
||||
fileRecord,
|
||||
"url",
|
||||
JSONUtilities.getString(fileRecord, "fileName", "unknown")
|
||||
);
|
||||
}
|
||||
|
||||
static private abstract class SavingUpdate {
|
||||
public long totalExpectedSize = 0;
|
||||
public long totalRetrievedSize = 0;
|
||||
|
||||
abstract public void savedMore();
|
||||
abstract public boolean isCanceled();
|
||||
}
|
||||
static public long saveStreamToFile(InputStream stream, File file, SavingUpdate update) throws IOException {
|
||||
long length = 0;
|
||||
FileOutputStream fos = new FileOutputStream(file);
|
||||
try {
|
||||
byte[] bytes = new byte[4096];
|
||||
int c;
|
||||
while ((update == null || !update.isCanceled()) && (c = stream.read(bytes)) > 0) {
|
||||
fos.write(bytes, 0, c);
|
||||
length += c;
|
||||
|
||||
if (update != null) {
|
||||
update.totalRetrievedSize += c;
|
||||
update.savedMore();
|
||||
}
|
||||
}
|
||||
return length;
|
||||
} finally {
|
||||
fos.close();
|
||||
}
|
||||
}
|
||||
|
||||
static public boolean postProcessRetrievedFile(
|
||||
File file, JSONObject fileRecord, JSONArray fileRecords, final Progress progress) {
|
||||
|
||||
String mimeType = JSONUtilities.getString(fileRecord, "declaredMimeType", null);
|
||||
File rawDataDir = file.getParentFile();
|
||||
|
||||
InputStream archiveIS = tryOpenAsArchive(file, mimeType);
|
||||
if (archiveIS != null) {
|
||||
try {
|
||||
if (explodeArchive(rawDataDir, archiveIS, fileRecord, fileRecords, progress)) {
|
||||
file.delete();
|
||||
return true;
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
archiveIS.close();
|
||||
} catch (IOException e) {
|
||||
// TODO: what to do?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
InputStream uncompressedIS = tryOpenAsCompressedFile(file, mimeType);
|
||||
if (uncompressedIS != null) {
|
||||
try {
|
||||
File file2 = uncompressFile(rawDataDir, uncompressedIS, fileRecord, progress);
|
||||
|
||||
file.delete();
|
||||
file = file2;
|
||||
} catch (IOException e) {
|
||||
// TODO: what to do?
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
archiveIS.close();
|
||||
} catch (IOException e) {
|
||||
// TODO: what to do?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
postProcessSingleRetrievedFile(file, fileRecord);
|
||||
JSONUtilities.append(fileRecords, fileRecord);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static public void postProcessSingleRetrievedFile(File file, JSONObject fileRecord) {
|
||||
if (!fileRecord.has("format")) {
|
||||
JSONUtilities.safePut(fileRecord, "format",
|
||||
ImportingManager.getFormat(
|
||||
file.getName(),
|
||||
JSONUtilities.getString(fileRecord, "declaredMimeType", null)));
|
||||
}
|
||||
}
|
||||
|
||||
static public InputStream tryOpenAsArchive(File file, String mimeType) {
|
||||
String fileName = file.getName();
|
||||
try {
|
||||
if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
|
||||
return new TarInputStream(new GZIPInputStream(new FileInputStream(file)));
|
||||
} else if (fileName.endsWith(".tar.bz2")) {
|
||||
return new TarInputStream(new CBZip2InputStream(new FileInputStream(file)));
|
||||
} else if (fileName.endsWith(".tar")) {
|
||||
return new TarInputStream(new FileInputStream(file));
|
||||
} else if (fileName.endsWith(".zip")) {
|
||||
return new ZipInputStream(new FileInputStream(file));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static public boolean explodeArchive(
|
||||
File rawDataDir,
|
||||
InputStream archiveIS,
|
||||
JSONObject archiveFileRecord,
|
||||
JSONArray fileRecords,
|
||||
final Progress progress
|
||||
) {
|
||||
if (archiveIS instanceof TarInputStream) {
|
||||
TarInputStream tis = (TarInputStream) archiveIS;
|
||||
try {
|
||||
TarEntry te;
|
||||
while (!progress.isCanceled() && (te = tis.getNextEntry()) != null) {
|
||||
if (!te.isDirectory()) {
|
||||
String fileName2 = te.getName();
|
||||
File file2 = allocateFile(rawDataDir, fileName2);
|
||||
|
||||
progress.setProgress("Extracting " + fileName2, -1);
|
||||
|
||||
JSONObject fileRecord2 = new JSONObject();
|
||||
JSONUtilities.safePut(fileRecord2, "origin", JSONUtilities.getString(archiveFileRecord, "origin", null));
|
||||
JSONUtilities.safePut(fileRecord2, "declaredEncoding", (String) null);
|
||||
JSONUtilities.safePut(fileRecord2, "declaredMimeType", (String) null);
|
||||
JSONUtilities.safePut(fileRecord2, "fileName", fileName2);
|
||||
JSONUtilities.safePut(fileRecord2, "archiveFileName", JSONUtilities.getString(archiveFileRecord, "fileName", null));
|
||||
JSONUtilities.safePut(fileRecord2, "location", getRelativePath(file2, rawDataDir));
|
||||
|
||||
JSONUtilities.safePut(fileRecord2, "size", saveStreamToFile(tis, file2, null));
|
||||
postProcessSingleRetrievedFile(file2, fileRecord2);
|
||||
|
||||
JSONUtilities.append(fileRecords, fileRecord2);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// TODO: what to do?
|
||||
e.printStackTrace();
|
||||
}
|
||||
return true;
|
||||
} else if (archiveIS instanceof ZipInputStream) {
|
||||
ZipInputStream zis = (ZipInputStream) archiveIS;
|
||||
try {
|
||||
ZipEntry ze;
|
||||
while (!progress.isCanceled() && (ze = zis.getNextEntry()) != null) {
|
||||
if (!ze.isDirectory()) {
|
||||
String fileName2 = ze.getName();
|
||||
File file2 = allocateFile(rawDataDir, fileName2);
|
||||
|
||||
progress.setProgress("Extracting " + fileName2, -1);
|
||||
|
||||
JSONObject fileRecord2 = new JSONObject();
|
||||
JSONUtilities.safePut(fileRecord2, "origin", JSONUtilities.getString(archiveFileRecord, "origin", null));
|
||||
JSONUtilities.safePut(fileRecord2, "declaredEncoding", (String) null);
|
||||
JSONUtilities.safePut(fileRecord2, "declaredMimeType", (String) null);
|
||||
JSONUtilities.safePut(fileRecord2, "fileName", fileName2);
|
||||
JSONUtilities.safePut(fileRecord2, "archiveFileName", JSONUtilities.getString(archiveFileRecord, "fileName", null));
|
||||
JSONUtilities.safePut(fileRecord2, "location", getRelativePath(file2, rawDataDir));
|
||||
|
||||
JSONUtilities.safePut(fileRecord2, "size", saveStreamToFile(zis, file2, null));
|
||||
postProcessSingleRetrievedFile(file2, fileRecord2);
|
||||
|
||||
JSONUtilities.append(fileRecords, fileRecord2);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// TODO: what to do?
|
||||
e.printStackTrace();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static public InputStream tryOpenAsCompressedFile(File file, String mimeType) {
|
||||
String fileName = file.getName();
|
||||
try {
|
||||
if (fileName.endsWith(".gz")) {
|
||||
return new GZIPInputStream(new FileInputStream(file));
|
||||
} else if (fileName.endsWith(".bz2")) {
|
||||
return new CBZip2InputStream(new FileInputStream(file));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
static public File uncompressFile(
|
||||
File rawDataDir,
|
||||
InputStream uncompressedIS,
|
||||
JSONObject fileRecord,
|
||||
final Progress progress
|
||||
) throws IOException {
|
||||
String fileName = JSONUtilities.getString(fileRecord, "fileName", "unknown");
|
||||
File file2 = allocateFile(rawDataDir, fileName);
|
||||
|
||||
progress.setProgress("Uncompressing " + fileName, -1);
|
||||
|
||||
saveStreamToFile(uncompressedIS, file2, null);
|
||||
|
||||
JSONUtilities.safePut(fileRecord, "declaredEncoding", (String) null);
|
||||
JSONUtilities.safePut(fileRecord, "declaredMimeType", (String) null);
|
||||
JSONUtilities.safePut(fileRecord, "location", getRelativePath(file2, rawDataDir));
|
||||
|
||||
return file2;
|
||||
}
|
||||
|
||||
static private int calculateProgressPercent(long totalExpectedSize, long totalRetrievedSize) {
|
||||
return totalExpectedSize == 0 ? -1 : (int) (totalRetrievedSize * 100 / totalExpectedSize);
|
||||
}
|
||||
|
||||
static private String formatBytes(long bytes) {
|
||||
return NumberFormat.getIntegerInstance().format(bytes);
|
||||
}
|
||||
|
||||
static public String getEncoding(JSONObject fileRecord) {
|
||||
String encoding = JSONUtilities.getString(fileRecord, "encoding", null);
|
||||
if (encoding == null) {
|
||||
encoding = JSONUtilities.getString(fileRecord, "declaredEncoding", null);
|
||||
}
|
||||
return encoding;
|
||||
}
|
||||
|
||||
static public String autoSelectFiles(ImportingJob job, JSONObject retrievalRecord, JSONArray fileSelectionIndexes) {
|
||||
final Map<String, Integer> formatToCount = new HashMap<String, Integer>();
|
||||
List<String> formats = new ArrayList<String>();
|
||||
|
||||
JSONArray fileRecords = JSONUtilities.getArray(retrievalRecord, "files");
|
||||
int count = fileRecords.length();
|
||||
for (int i = 0; i < count; i++) {
|
||||
JSONObject fileRecord = JSONUtilities.getObjectElement(fileRecords, i);
|
||||
String format = JSONUtilities.getString(fileRecord, "format", null);
|
||||
if (format != null) {
|
||||
if (formatToCount.containsKey(format)) {
|
||||
formatToCount.put(format, formatToCount.get(format) + 1);
|
||||
} else {
|
||||
formatToCount.put(format, 1);
|
||||
formats.add(format);
|
||||
}
|
||||
}
|
||||
}
|
||||
Collections.sort(formats, new Comparator<String>() {
|
||||
@Override
|
||||
public int compare(String o1, String o2) {
|
||||
return formatToCount.get(o2) - formatToCount.get(o1);
|
||||
}
|
||||
});
|
||||
|
||||
String bestFormat = formats.size() > 0 ? formats.get(0) : null;
|
||||
if (JSONUtilities.getInt(retrievalRecord, "archiveCount", 0) == 0) {
|
||||
// If there's no archive, then select everything
|
||||
for (int i = 0; i < count; i++) {
|
||||
JSONUtilities.append(fileSelectionIndexes, i);
|
||||
}
|
||||
} else {
|
||||
// Otherwise, select files matching the best format
|
||||
for (int i = 0; i < count; i++) {
|
||||
JSONObject fileRecord = JSONUtilities.getObjectElement(fileRecords, i);
|
||||
String format = JSONUtilities.getString(fileRecord, "format", null);
|
||||
if (format != null && format.equals(bestFormat)) {
|
||||
JSONUtilities.append(fileSelectionIndexes, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return bestFormat;
|
||||
}
|
||||
|
||||
static public String getCommonFormatForSelectedFiles(ImportingJob job, JSONArray fileSelectionIndexes) {
|
||||
JSONObject retrievalRecord = JSONUtilities.getObject(job.config, "retrievalRecord");
|
||||
|
||||
final Map<String, Integer> formatToCount = new HashMap<String, Integer>();
|
||||
List<String> formats = new ArrayList<String>();
|
||||
|
||||
JSONArray fileRecords = JSONUtilities.getArray(retrievalRecord, "files");
|
||||
int count = fileSelectionIndexes.length();
|
||||
for (int i = 0; i < count; i++) {
|
||||
int index = JSONUtilities.getIntElement(fileSelectionIndexes, i, -1);
|
||||
if (index >= 0 && index < fileRecords.length()) {
|
||||
JSONObject fileRecord = JSONUtilities.getObjectElement(fileRecords, index);
|
||||
String format = JSONUtilities.getString(fileRecord, "format", null);
|
||||
if (format != null) {
|
||||
if (formatToCount.containsKey(format)) {
|
||||
formatToCount.put(format, formatToCount.get(format) + 1);
|
||||
} else {
|
||||
formatToCount.put(format, 1);
|
||||
formats.add(format);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Collections.sort(formats, new Comparator<String>() {
|
||||
@Override
|
||||
public int compare(String o1, String o2) {
|
||||
return formatToCount.get(o2) - formatToCount.get(o1);
|
||||
}
|
||||
});
|
||||
|
||||
return formats.size() > 0 ? formats.get(0) : null;
|
||||
}
|
||||
|
||||
static String guessBetterFormat(ImportingJob job, String bestFormat) {
|
||||
JSONObject retrievalRecord = JSONUtilities.getObject(job.config, "retrievalRecord");
|
||||
return retrievalRecord != null ? guessBetterFormat(job, retrievalRecord, bestFormat) : bestFormat;
|
||||
}
|
||||
|
||||
static String guessBetterFormat(ImportingJob job, JSONObject retrievalRecord, String bestFormat) {
|
||||
JSONArray fileRecords = JSONUtilities.getArray(retrievalRecord, "files");
|
||||
return fileRecords != null ? guessBetterFormat(job, fileRecords, bestFormat) : bestFormat;
|
||||
}
|
||||
|
||||
static String guessBetterFormat(ImportingJob job, JSONArray fileRecords, String bestFormat) {
|
||||
if (bestFormat != null && fileRecords != null && fileRecords.length() > 0) {
|
||||
JSONObject firstFileRecord = JSONUtilities.getObjectElement(fileRecords, 0);
|
||||
String encoding = getEncoding(firstFileRecord);
|
||||
String location = JSONUtilities.getString(firstFileRecord, "location", null);
|
||||
|
||||
if (location != null) {
|
||||
File file = new File(job.getRawDataDir(), location);
|
||||
|
||||
while (true) {
|
||||
String betterFormat = null;
|
||||
|
||||
List<FormatGuesser> guessers = ImportingManager.formatToGuessers.get(bestFormat);
|
||||
if (guessers != null) {
|
||||
for (FormatGuesser guesser : guessers) {
|
||||
betterFormat = guesser.guess(file, encoding, bestFormat);
|
||||
if (betterFormat != null) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (betterFormat != null && !betterFormat.equals(bestFormat)) {
|
||||
bestFormat = betterFormat;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return bestFormat;
|
||||
}
|
||||
|
||||
static void rankFormats(ImportingJob job, final String bestFormat, JSONArray rankedFormats) {
|
||||
final Map<String, String[]> formatToSegments = new HashMap<String, String[]>();
|
||||
|
||||
boolean download = bestFormat == null ? true :
|
||||
ImportingManager.formatToRecord.get(bestFormat).download;
|
||||
|
||||
List<String> formats = new ArrayList<String>(ImportingManager.formatToRecord.keySet().size());
|
||||
for (String format : ImportingManager.formatToRecord.keySet()) {
|
||||
Format record = ImportingManager.formatToRecord.get(format);
|
||||
if (record.uiClass != null && record.parser != null && record.download == download) {
|
||||
formats.add(format);
|
||||
formatToSegments.put(format, format.split("/"));
|
||||
}
|
||||
}
|
||||
|
||||
if (bestFormat == null) {
|
||||
Collections.sort(formats);
|
||||
} else {
|
||||
Collections.sort(formats, new Comparator<String>() {
|
||||
@Override
|
||||
public int compare(String format1, String format2) {
|
||||
if (format1.equals(bestFormat)) {
|
||||
return -1;
|
||||
} else if (format2.equals(bestFormat)) {
|
||||
return 1;
|
||||
} else {
|
||||
return compareBySegments(format1, format2);
|
||||
}
|
||||
}
|
||||
|
||||
int compareBySegments(String format1, String format2) {
|
||||
int c = commonSegments(format2) - commonSegments(format1);
|
||||
return c != 0 ? c : format1.compareTo(format2);
|
||||
}
|
||||
|
||||
int commonSegments(String format) {
|
||||
String[] bestSegments = formatToSegments.get(bestFormat);
|
||||
String[] segments = formatToSegments.get(format);
|
||||
if (bestSegments == null || segments == null) {
|
||||
return 0;
|
||||
} else {
|
||||
int i;
|
||||
for (i = 0; i < bestSegments.length && i < segments.length; i++) {
|
||||
if (!bestSegments[i].equals(segments[i])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (String format : formats) {
|
||||
JSONUtilities.append(rankedFormats, format);
|
||||
}
|
||||
}
|
||||
|
||||
static public List<JSONObject> getSelectedFileRecords(ImportingJob job) {
|
||||
List<JSONObject> results = new ArrayList<JSONObject>();
|
||||
|
||||
JSONObject retrievalRecord = JSONUtilities.getObject(job.config, "retrievalRecord");
|
||||
if (retrievalRecord != null) {
|
||||
JSONArray fileRecordArray = JSONUtilities.getArray(retrievalRecord, "files");
|
||||
if (fileRecordArray != null) {
|
||||
JSONArray fileSelectionArray = JSONUtilities.getArray(job.config, "fileSelection");
|
||||
if (fileSelectionArray != null) {
|
||||
for (int i = 0; i < fileSelectionArray.length(); i++) {
|
||||
int index = JSONUtilities.getIntElement(fileSelectionArray, i, -1);
|
||||
if (index >= 0 && index < fileRecordArray.length()) {
|
||||
results.add(JSONUtilities.getObjectElement(fileRecordArray, index));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
static public void previewParse(ImportingJob job, String format, JSONObject optionObj, List<Exception> exceptions) {
|
||||
Format record = ImportingManager.formatToRecord.get(format);
|
||||
if (record == null || record.parser == null) {
|
||||
// TODO: what to do?
|
||||
return;
|
||||
}
|
||||
|
||||
job.prepareNewProject();
|
||||
|
||||
record.parser.parse(
|
||||
job.project,
|
||||
job.metadata,
|
||||
job,
|
||||
getSelectedFileRecords(job),
|
||||
format,
|
||||
100,
|
||||
optionObj,
|
||||
exceptions
|
||||
);
|
||||
|
||||
job.project.update(); // update all internal models, indexes, caches, etc.
|
||||
}
|
||||
|
||||
static public long createProject(
|
||||
final ImportingJob job,
|
||||
final String format,
|
||||
final JSONObject optionObj,
|
||||
final List<Exception> exceptions) {
|
||||
final Format record = ImportingManager.formatToRecord.get(format);
|
||||
if (record == null || record.parser == null) {
|
||||
// TODO: what to do?
|
||||
return -1;
|
||||
}
|
||||
|
||||
JSONUtilities.safePut(job.config, "state", "creating-project");
|
||||
|
||||
final Project project = new Project();
|
||||
new Thread() {
|
||||
public void run() {
|
||||
ProjectMetadata pm = new ProjectMetadata();
|
||||
pm.setName(JSONUtilities.getString(optionObj, "projectName", "Untitled"));
|
||||
pm.setEncoding(JSONUtilities.getString(optionObj, "encoding", "UTF-8"));
|
||||
|
||||
record.parser.parse(
|
||||
project,
|
||||
pm,
|
||||
job,
|
||||
getSelectedFileRecords(job),
|
||||
format,
|
||||
-1,
|
||||
optionObj,
|
||||
exceptions
|
||||
);
|
||||
|
||||
if (!job.canceled) {
|
||||
project.update(); // update all internal models, indexes, caches, etc.
|
||||
|
||||
ProjectManager.singleton.registerProject(project, pm);
|
||||
|
||||
JSONUtilities.safePut(job.config, "projectID", project.id);
|
||||
JSONUtilities.safePut(job.config, "state", "created-project");
|
||||
}
|
||||
}
|
||||
}.start();
|
||||
|
||||
return project.id;
|
||||
}
|
||||
|
||||
static public void setCreatingProjectProgress(ImportingJob job, String message, int percent) {
|
||||
JSONObject progress = JSONUtilities.getObject(job.config, "progress");
|
||||
if (progress == null) {
|
||||
progress = new JSONObject();
|
||||
JSONUtilities.safePut(job.config, "progress", progress);
|
||||
}
|
||||
JSONUtilities.safePut(progress, "message", message);
|
||||
JSONUtilities.safePut(progress, "percent", percent);
|
||||
}
|
||||
}
|
@ -31,23 +31,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
var theImportJob = {};
|
||||
var ui = {};
|
||||
|
||||
var Refine = {
|
||||
};
|
||||
|
||||
function resize() {
|
||||
var header = $("#header");
|
||||
|
||||
var leftPanelWidth = 300;
|
||||
var width = $(window).width();
|
||||
var top = $("#header").outerHeight();
|
||||
var height = $(window).height() - top;
|
||||
package com.google.refine.importing;
|
||||
|
||||
public interface UrlRewriter {
|
||||
static public class Result {
|
||||
public String rewrittenUrl;
|
||||
public String format;
|
||||
public boolean download;
|
||||
}
|
||||
|
||||
function onLoad() {
|
||||
$(window).bind("resize", resize);
|
||||
public Result rewrite(String url);
|
||||
}
|
||||
$(onLoad);
|
@ -1,62 +0,0 @@
|
||||
package com.google.refine.model.meta;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.util.Date;
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.apache.commons.fileupload.FileItemIterator;
|
||||
import org.apache.commons.fileupload.FileItemStream;
|
||||
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.commands.importing.ImportJob;
|
||||
|
||||
public class FileUploadImportSource extends ImportSource {
|
||||
public String originalFileName;
|
||||
|
||||
@Override
|
||||
protected void customWrite(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
writer.key("originalFileName"); writer.value(originalFileName);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void customReconstruct(JSONObject obj) throws JSONException {
|
||||
if (obj.has("originalFileName")) {
|
||||
originalFileName = obj.getString("originalFileName");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job) throws Exception {
|
||||
ServletFileUpload upload = new ServletFileUpload();
|
||||
FileItemIterator iter = upload.getItemIterator(request);
|
||||
while (iter.hasNext()) {
|
||||
FileItemStream item = iter.next();
|
||||
if (!item.isFormField()) {
|
||||
String fileName = item.getName();
|
||||
if (fileName.length() > 0) {
|
||||
InputStream stream = item.openStream();
|
||||
try {
|
||||
File file = new File(job.dir, "data");
|
||||
|
||||
this.accessTime = new Date();
|
||||
this.contentType = item.getContentType();
|
||||
this.encoding = request.getCharacterEncoding();
|
||||
this.originalFileName = fileName;
|
||||
this.size = saveStreamToFileOrDir(
|
||||
item.openStream(), file, this.contentType, fileName, job, request.getContentLength());
|
||||
this.isArchive = file.isDirectory();
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,5 +0,0 @@
|
||||
package com.google.refine.model.meta;
|
||||
|
||||
public class ImportConfig {
|
||||
|
||||
}
|
@ -1,167 +0,0 @@
|
||||
package com.google.refine.model.meta;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Date;
|
||||
import java.util.Properties;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.apache.tools.bzip2.CBZip2InputStream;
|
||||
import org.apache.tools.tar.TarEntry;
|
||||
import org.apache.tools.tar.TarInputStream;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.Jsonizable;
|
||||
import com.google.refine.commands.importing.ImportJob;
|
||||
import com.google.refine.commands.importing.ImportManager;
|
||||
import com.google.refine.util.ParsingUtilities;
|
||||
|
||||
abstract public class ImportSource implements Jsonizable {
|
||||
public Date accessTime;
|
||||
public long size;
|
||||
public boolean isArchive = false;
|
||||
|
||||
public String contentType;
|
||||
public String encoding;
|
||||
|
||||
@Override
|
||||
public void write(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
writer.object();
|
||||
writer.key("type"); writer.value(ImportManager.getImportSourceClassName(this.getClass()));
|
||||
writer.key("accessTime"); writer.value(ParsingUtilities.dateToString(accessTime));
|
||||
writer.key("size"); writer.value(size);
|
||||
writer.key("isArchive"); writer.value(isArchive);
|
||||
writer.key("contentType"); writer.value(contentType);
|
||||
writer.key("encoding"); writer.value(encoding);
|
||||
writer.endObject();
|
||||
}
|
||||
|
||||
public void reconstruct(JSONObject obj) throws JSONException {
|
||||
if (obj.has("accessTime")) {
|
||||
accessTime = ParsingUtilities.stringToDate(obj.getString("accessTime"));
|
||||
}
|
||||
if (obj.has("size")) {
|
||||
size = obj.getLong("size");
|
||||
}
|
||||
if (obj.has("isArchive")) {
|
||||
isArchive = obj.getBoolean("isArchive");
|
||||
}
|
||||
if (obj.has("contentType")) {
|
||||
contentType = obj.getString("contentType");
|
||||
}
|
||||
if (obj.has("encoding")) {
|
||||
encoding = obj.getString("encoding");
|
||||
}
|
||||
customReconstruct(obj);
|
||||
}
|
||||
|
||||
abstract public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job)
|
||||
throws Exception;
|
||||
|
||||
abstract protected void customWrite(JSONWriter writer, Properties options) throws JSONException;
|
||||
abstract protected void customReconstruct(JSONObject obj) throws JSONException;
|
||||
|
||||
static protected long saveStreamToFileOrDir(
|
||||
InputStream is,
|
||||
File file,
|
||||
String contentType,
|
||||
String fileNameOrUrl,
|
||||
ImportJob job,
|
||||
long expectedSize
|
||||
) throws IOException {
|
||||
InputStream archiveIS = null;
|
||||
if (fileNameOrUrl != null) {
|
||||
try {
|
||||
if (fileNameOrUrl.endsWith(".tar.gz") ||
|
||||
fileNameOrUrl.endsWith(".tar.gz.gz") ||
|
||||
fileNameOrUrl.endsWith(".tgz")) {
|
||||
archiveIS = new TarInputStream(new GZIPInputStream(is));
|
||||
} else if (fileNameOrUrl.endsWith(".tar.bz2")) {
|
||||
archiveIS = new TarInputStream(new CBZip2InputStream(is));
|
||||
} else if (fileNameOrUrl.endsWith(".tar")) {
|
||||
archiveIS = new TarInputStream(is);
|
||||
} else if (fileNameOrUrl.endsWith(".zip")) {
|
||||
archiveIS = new ZipInputStream(is);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
archiveIS = null;
|
||||
}
|
||||
}
|
||||
|
||||
job.bytesSaved = 0;
|
||||
if (archiveIS == null) {
|
||||
saveStreamToFile(is, file, job, true, expectedSize);
|
||||
} else {
|
||||
job.retrievingProgress = -1;
|
||||
|
||||
// NOTE(SM): unfortunately, java.io does not provide any generalized class for
|
||||
// archive-like input streams so while both TarInputStream and ZipInputStream
|
||||
// behave precisely the same, there is no polymorphic behavior so we have
|
||||
// to treat each instance explicitly... one of those times you wish you had
|
||||
// closures
|
||||
|
||||
if (archiveIS instanceof TarInputStream) {
|
||||
TarInputStream tis = (TarInputStream) archiveIS;
|
||||
TarEntry te;
|
||||
while ((te = tis.getNextEntry()) != null) {
|
||||
if (!te.isDirectory()) {
|
||||
saveStreamToFile(tis, new File(file, te.getName()), job, false, 0);
|
||||
}
|
||||
}
|
||||
} else if (archiveIS instanceof ZipInputStream) {
|
||||
ZipInputStream zis = (ZipInputStream) archiveIS;
|
||||
ZipEntry ze;
|
||||
long compressedSize = 0;
|
||||
while ((ze = zis.getNextEntry()) != null) {
|
||||
if (!ze.isDirectory()) {
|
||||
saveStreamToFile(zis, new File(file, ze.getName()), job, false, 0);
|
||||
|
||||
compressedSize += ze.getCompressedSize(); // this might be negative if not known
|
||||
if (compressedSize > 0) {
|
||||
job.retrievingProgress = (int) (compressedSize * 100 / expectedSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return job.bytesSaved;
|
||||
}
|
||||
|
||||
static private void saveStreamToFile(
|
||||
InputStream is,
|
||||
File file,
|
||||
ImportJob job,
|
||||
boolean updateProgress,
|
||||
long expectedSize
|
||||
) throws IOException {
|
||||
byte data[] = new byte[4096];
|
||||
|
||||
file.getParentFile().mkdirs();
|
||||
|
||||
FileOutputStream fos = new FileOutputStream(file);
|
||||
BufferedOutputStream bos = new BufferedOutputStream(fos, data.length);
|
||||
|
||||
int count;
|
||||
while ((count = is.read(data, 0, data.length)) != -1) {
|
||||
bos.write(data, 0, count);
|
||||
|
||||
job.bytesSaved += count;
|
||||
if (updateProgress) {
|
||||
job.retrievingProgress = (int) (job.bytesSaved * 100 / expectedSize);
|
||||
}
|
||||
}
|
||||
|
||||
bos.flush();
|
||||
bos.close();
|
||||
}
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
package com.google.refine.model.meta;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.commands.importing.ImportJob;
|
||||
|
||||
public class TextImportSource extends ImportSource {
|
||||
@Override
|
||||
protected void customWrite(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void customReconstruct(JSONObject obj) throws JSONException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job) throws Exception {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
package com.google.refine.model.meta;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
import com.google.refine.commands.importing.ImportJob;
|
||||
|
||||
public class WebImportSource extends ImportSource {
|
||||
public String url;
|
||||
|
||||
@Override
|
||||
protected void customWrite(JSONWriter writer, Properties options)
|
||||
throws JSONException {
|
||||
writer.key("url"); writer.value(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void customReconstruct(JSONObject obj) throws JSONException {
|
||||
if (obj.has("url")) {
|
||||
url = obj.getString("url");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void retrieveContent(HttpServletRequest request, Properties options, ImportJob job) throws Exception {
|
||||
// TODO Auto-generated method stub
|
||||
|
||||
}
|
||||
}
|
@ -35,8 +35,10 @@ package com.google.refine.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Collection;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
@ -44,6 +46,14 @@ import org.json.JSONObject;
|
||||
import org.json.JSONWriter;
|
||||
|
||||
public class JSONUtilities {
|
||||
static public JSONObject getObject(JSONObject obj, String key) {
|
||||
try {
|
||||
return obj.getJSONObject(key);
|
||||
} catch (JSONException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static public String getString(JSONObject obj, String key, String def) {
|
||||
try {
|
||||
return obj.getString(key);
|
||||
@ -94,6 +104,14 @@ public class JSONUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
static public JSONArray getArray(JSONObject obj, String key) {
|
||||
try {
|
||||
return obj.getJSONArray(key);
|
||||
} catch (JSONException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static public int[] getIntArray(JSONObject obj, String key) {
|
||||
try {
|
||||
JSONArray a = obj.getJSONArray(key);
|
||||
@ -144,6 +162,14 @@ public class JSONUtilities {
|
||||
writer.endArray();
|
||||
}
|
||||
|
||||
static public void writeStringArray(JSONWriter writer, String[] strings) throws JSONException {
|
||||
writer.array();
|
||||
for (String s : strings) {
|
||||
writer.value(s);
|
||||
}
|
||||
writer.endArray();
|
||||
}
|
||||
|
||||
static public void putField(JSONObject obj, String key, Object value) throws JSONException {
|
||||
if (value instanceof Integer) {
|
||||
obj.put(key, ((Integer) value).intValue());
|
||||
@ -164,6 +190,135 @@ public class JSONUtilities {
|
||||
}
|
||||
}
|
||||
|
||||
static public JSONObject getObjectElement(JSONArray a, int i) {
|
||||
try {
|
||||
return a.getJSONObject(i);
|
||||
} catch (JSONException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static public int getIntElement(JSONArray a, int i, int def) {
|
||||
try {
|
||||
return a.getInt(i);
|
||||
} catch (JSONException e) {
|
||||
return def;
|
||||
}
|
||||
}
|
||||
|
||||
static public void append(JSONArray a, JSONObject element) {
|
||||
try {
|
||||
a.put(a.length(), element);
|
||||
} catch (JSONException e) {
|
||||
}
|
||||
}
|
||||
|
||||
static public void append(JSONArray a, Object element) {
|
||||
try {
|
||||
a.put(a.length(), element);
|
||||
} catch (JSONException e) {
|
||||
}
|
||||
}
|
||||
|
||||
static public void append(JSONArray a, int element) {
|
||||
try {
|
||||
a.put(a.length(), element);
|
||||
} catch (JSONException e) {
|
||||
}
|
||||
}
|
||||
|
||||
static public void append(JSONArray a, long element) {
|
||||
try {
|
||||
a.put(a.length(), element);
|
||||
} catch (JSONException e) {
|
||||
}
|
||||
}
|
||||
|
||||
static public void append(JSONArray a, double element) {
|
||||
try {
|
||||
a.put(a.length(), element);
|
||||
} catch (JSONException e) {
|
||||
}
|
||||
}
|
||||
|
||||
static public void append(JSONArray a, boolean element) {
|
||||
try {
|
||||
a.put(a.length(), element);
|
||||
} catch (JSONException e) {
|
||||
}
|
||||
}
|
||||
|
||||
static public void append(JSONArray a, String element) {
|
||||
try {
|
||||
a.put(a.length(), element);
|
||||
} catch (JSONException e) {
|
||||
}
|
||||
}
|
||||
|
||||
static public void safePut(JSONObject obj, String key, int value) {
|
||||
try {
|
||||
obj.put(key, value);
|
||||
} catch (JSONException e) {
|
||||
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||
}
|
||||
}
|
||||
|
||||
static public void safePut(JSONObject obj, String key, long value) {
|
||||
try {
|
||||
obj.put(key, value);
|
||||
} catch (JSONException e) {
|
||||
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||
}
|
||||
}
|
||||
|
||||
static public void safePut(JSONObject obj, String key, double value) {
|
||||
try {
|
||||
obj.put(key, value);
|
||||
} catch (JSONException e) {
|
||||
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||
}
|
||||
}
|
||||
|
||||
static public void safePut(JSONObject obj, String key, boolean value) {
|
||||
try {
|
||||
obj.put(key, value);
|
||||
} catch (JSONException e) {
|
||||
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||
}
|
||||
}
|
||||
|
||||
static public void safePut(JSONObject obj, String key, String value) {
|
||||
try {
|
||||
obj.put(key, value);
|
||||
} catch (JSONException e) {
|
||||
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||
}
|
||||
}
|
||||
|
||||
static public void safePut(JSONObject obj, String key, Collection<?> value) {
|
||||
try {
|
||||
obj.put(key, value);
|
||||
} catch (JSONException e) {
|
||||
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||
}
|
||||
}
|
||||
|
||||
static public void safePut(JSONObject obj, String key, Map<?, ?> value) {
|
||||
try {
|
||||
obj.put(key, value);
|
||||
} catch (JSONException e) {
|
||||
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||
}
|
||||
}
|
||||
|
||||
static public void safePut(JSONObject obj, String key, Object value) {
|
||||
try {
|
||||
obj.put(key, value);
|
||||
} catch (JSONException e) {
|
||||
// Ignore: the JSONObject is just too happy about throwing exceptions.
|
||||
}
|
||||
}
|
||||
|
||||
static public Object[] toArray(JSONArray a) throws JSONException {
|
||||
int l = a.length();
|
||||
|
||||
|
64
main/src/com/google/refine/util/TrackingInputStream.java
Normal file
64
main/src/com/google/refine/util/TrackingInputStream.java
Normal file
@ -0,0 +1,64 @@
|
||||
package com.google.refine.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
public class TrackingInputStream extends InputStream {
|
||||
final private InputStream is;
|
||||
protected long bytesRead;
|
||||
|
||||
public TrackingInputStream(InputStream is) {
|
||||
this.is = is;
|
||||
}
|
||||
|
||||
public long getBytesRead() {
|
||||
return bytesRead;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
return (int) track(is.read());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b) throws IOException {
|
||||
return (int) track(is.read(b));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b, int off, int len) throws IOException {
|
||||
return (int) track(is.read(b, off, len));
|
||||
}
|
||||
|
||||
@Override
|
||||
public long skip(long n) throws IOException {
|
||||
return track(is.skip(n));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void mark(int readlimit) {
|
||||
is.mark(readlimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
is.reset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean markSupported() {
|
||||
return is.markSupported();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
is.close();
|
||||
}
|
||||
|
||||
protected long track(long bytesRead) {
|
||||
if (bytesRead > 0) {
|
||||
this.bytesRead += bytesRead;
|
||||
}
|
||||
return bytesRead;
|
||||
}
|
||||
}
|
@ -33,6 +33,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.tests;
|
||||
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
@ -41,6 +48,7 @@ import com.google.refine.model.Cell;
|
||||
import com.google.refine.model.Column;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class RefineTest {
|
||||
|
||||
@ -82,4 +90,41 @@ public class RefineTest {
|
||||
logger.info(sb.toString());
|
||||
}
|
||||
}
|
||||
|
||||
//----helpers----
|
||||
|
||||
static public void whenGetBooleanOption(String name, JSONObject options, Boolean def){
|
||||
when(options.has(name)).thenReturn(true);
|
||||
when(JSONUtilities.getBoolean(options, name, def)).thenReturn(def);
|
||||
}
|
||||
|
||||
static public void whenGetIntegerOption(String name, JSONObject options, int def){
|
||||
when(options.has(name)).thenReturn(true);
|
||||
when(JSONUtilities.getInt(options, name, def)).thenReturn(def);
|
||||
}
|
||||
|
||||
static public void whenGetStringOption(String name, JSONObject options, String def){
|
||||
when(options.has(name)).thenReturn(true);
|
||||
when(JSONUtilities.getString(options, name, def)).thenReturn(def);
|
||||
}
|
||||
|
||||
static public void whenGetObjectOption(String name, JSONObject options, JSONObject def){
|
||||
when(options.has(name)).thenReturn(true);
|
||||
when(JSONUtilities.getObject(options, name)).thenReturn(def);
|
||||
}
|
||||
|
||||
static public void whenGetArrayOption(String name, JSONObject options, JSONArray def){
|
||||
when(options.has(name)).thenReturn(true);
|
||||
when(JSONUtilities.getArray(options, name)).thenReturn(def);
|
||||
}
|
||||
|
||||
static public void verifyGetOption(String name, JSONObject options){
|
||||
verify(options, times(1)).has(name);
|
||||
try {
|
||||
verify(options, times(1)).get(name);
|
||||
} catch (JSONException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,14 +1,12 @@
|
||||
package com.google.refine.tests.importers;
|
||||
|
||||
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.json.JSONArray;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.AfterMethod;
|
||||
@ -16,13 +14,10 @@ import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.FixedWidthImporter;
|
||||
import com.google.refine.importers.ImportException;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class FixedWidthImporterTests extends RefineTest {
|
||||
public class FixedWidthImporterTests extends ImporterTest {
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
logger = LoggerFactory.getLogger(this.getClass());
|
||||
@ -30,45 +25,20 @@ public class FixedWidthImporterTests extends RefineTest {
|
||||
|
||||
//constants
|
||||
String SAMPLE_ROW = "NDB_NoShrt_DescWater";
|
||||
String SAMPLE_ROW_WIDTHS = "6,9,5";
|
||||
|
||||
//System Under Test
|
||||
FixedWidthImporter SUT = null;
|
||||
|
||||
//mock dependencies
|
||||
Project project = null;
|
||||
Properties properties = null;
|
||||
|
||||
|
||||
@BeforeMethod
|
||||
public void SetUp(){
|
||||
super.SetUp();
|
||||
SUT = new FixedWidthImporter();
|
||||
project = new Project(); //FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
|
||||
properties = mock(Properties.class);
|
||||
}
|
||||
|
||||
@AfterMethod
|
||||
public void TearDown(){
|
||||
SUT = null;
|
||||
project = null;
|
||||
properties = null;
|
||||
}
|
||||
|
||||
//TODO a lot of these tests are very similar to the TsvCsvImporterTests. It might be possible to overlap them
|
||||
|
||||
@Test
|
||||
public void canParseSeparator(){
|
||||
int[] i = null;
|
||||
try {
|
||||
i = SUT.getColumnWidthsFromString("1,2,3");
|
||||
} catch (ImportException e) {
|
||||
Assert.fail(e.getMessage());
|
||||
}
|
||||
|
||||
Assert.assertNotNull(i);
|
||||
Assert.assertEquals(i[0], 1);
|
||||
Assert.assertEquals(i[1], 2);
|
||||
Assert.assertEquals(i[2], 3);
|
||||
super.TearDown();
|
||||
}
|
||||
|
||||
//---------------------read tests------------------------
|
||||
@ -76,19 +46,23 @@ public class FixedWidthImporterTests extends RefineTest {
|
||||
public void readFixedWidth(){
|
||||
StringReader reader = new StringReader(SAMPLE_ROW + "\nTooShort");
|
||||
|
||||
when(properties.getProperty("fixed-column-widths")).thenReturn(SAMPLE_ROW_WIDTHS);
|
||||
whenGetIntegerOption("ignore",properties,0);
|
||||
whenGetIntegerOption("header-lines",properties,0);
|
||||
whenGetIntegerOption("limit",properties,-1);
|
||||
whenGetIntegerOption("skip",properties,0);
|
||||
JSONArray columnWidths = new JSONArray();
|
||||
JSONUtilities.append(columnWidths, 6);
|
||||
JSONUtilities.append(columnWidths, 9);
|
||||
JSONUtilities.append(columnWidths, 5);
|
||||
|
||||
whenGetArrayOption("columnWidths", options, columnWidths);
|
||||
whenGetIntegerOption("ignoreLines", options, 0);
|
||||
whenGetIntegerOption("headerLines", options, 0);
|
||||
whenGetIntegerOption("skipDataLines", options, 0);
|
||||
whenGetIntegerOption("limit", options, -1);
|
||||
|
||||
try {
|
||||
SUT.read(reader, project, new ProjectMetadata(), properties);
|
||||
parseOneFile(SUT, reader);
|
||||
} catch (Exception e) {
|
||||
Assert.fail(e.getMessage());
|
||||
}
|
||||
|
||||
|
||||
Assert.assertEquals(project.rows.size(), 2);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "NDB_No");
|
||||
@ -99,27 +73,10 @@ public class FixedWidthImporterTests extends RefineTest {
|
||||
Assert.assertEquals((String)project.rows.get(1).cells.get(1).value, "rt");
|
||||
Assert.assertNull(project.rows.get(1).cells.get(2));
|
||||
|
||||
verify(properties, times(1)).getProperty("fixed-column-widths");
|
||||
verifyGetOption("ignore",properties);
|
||||
verifyGetOption("header-lines",properties);
|
||||
verifyGetOption("limit",properties);
|
||||
verifyGetOption("skip",properties);
|
||||
}
|
||||
|
||||
//----helpers----
|
||||
|
||||
public void whenGetBooleanOption(String name, Properties properties, Boolean def){
|
||||
when(properties.containsKey(name)).thenReturn(true);
|
||||
when(properties.getProperty(name)).thenReturn(Boolean.toString(def));
|
||||
}
|
||||
|
||||
public void whenGetIntegerOption(String name, Properties properties, int def){
|
||||
when(properties.containsKey(name)).thenReturn(true);
|
||||
when(properties.getProperty(name)).thenReturn(Integer.toString(def));
|
||||
}
|
||||
|
||||
public void verifyGetOption(String name, Properties properties){
|
||||
verify(properties, times(1)).containsKey(name);
|
||||
verify(properties, times(1)).getProperty(name);
|
||||
JSONUtilities.getIntArray(verify(options, times(1)), "columnWidths");
|
||||
verifyGetOption("ignore", options);
|
||||
verifyGetOption("header-lines", options);
|
||||
verifyGetOption("limit", options);
|
||||
verifyGetOption("skip", options);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,109 @@
|
||||
package com.google.refine.tests.importers;
|
||||
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.json.JSONObject;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.ImportingParserBase;
|
||||
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||
import com.google.refine.importers.tree.TreeImportingParserBase;
|
||||
import com.google.refine.importers.tree.XmlImportUtilities;
|
||||
import com.google.refine.importing.ImportingJob;
|
||||
import com.google.refine.importing.ImportingManager;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
|
||||
abstract class ImporterTest extends RefineTest {
|
||||
//mock dependencies
|
||||
protected Project project;
|
||||
protected ProjectMetadata metadata;
|
||||
protected ImportingJob job;
|
||||
|
||||
protected JSONObject options;
|
||||
|
||||
public void SetUp(){
|
||||
//FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
|
||||
project = new Project();
|
||||
metadata = new ProjectMetadata();
|
||||
job = ImportingManager.createJob();
|
||||
|
||||
options = mock(JSONObject.class);
|
||||
}
|
||||
|
||||
public void TearDown(){
|
||||
project = null;
|
||||
metadata = null;
|
||||
|
||||
ImportingManager.disposeJob(job.id);
|
||||
job = null;
|
||||
|
||||
options = null;
|
||||
}
|
||||
|
||||
protected void parseOneFile(ImportingParserBase parser, Reader reader) {
|
||||
parser.parseOneFile(
|
||||
project,
|
||||
metadata,
|
||||
job,
|
||||
"file-source",
|
||||
reader,
|
||||
-1,
|
||||
options,
|
||||
new ArrayList<Exception>()
|
||||
);
|
||||
project.update();
|
||||
}
|
||||
|
||||
protected void parseOneFile(ImportingParserBase parser, InputStream inputStream) {
|
||||
parser.parseOneFile(
|
||||
project,
|
||||
metadata,
|
||||
job,
|
||||
"file-source",
|
||||
inputStream,
|
||||
-1,
|
||||
options,
|
||||
new ArrayList<Exception>()
|
||||
);
|
||||
project.update();
|
||||
}
|
||||
|
||||
protected void parseOneFile(TreeImportingParserBase parser, Reader reader) {
|
||||
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
||||
parser.parseOneFile(
|
||||
project,
|
||||
metadata,
|
||||
job,
|
||||
"file-source",
|
||||
reader,
|
||||
rootColumnGroup,
|
||||
-1,
|
||||
options,
|
||||
new ArrayList<Exception>()
|
||||
);
|
||||
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
||||
project.columnModel.update();
|
||||
}
|
||||
|
||||
protected void parseOneFile(TreeImportingParserBase parser, InputStream inputStream) {
|
||||
ImportColumnGroup rootColumnGroup = new ImportColumnGroup();
|
||||
parser.parseOneFile(
|
||||
project,
|
||||
metadata,
|
||||
job,
|
||||
"file-source",
|
||||
inputStream,
|
||||
rootColumnGroup,
|
||||
-1,
|
||||
options,
|
||||
new ArrayList<Exception>()
|
||||
);
|
||||
XmlImportUtilities.createColumnsFromImport(project, rootColumnGroup);
|
||||
project.columnModel.update();
|
||||
}
|
||||
}
|
@ -33,12 +33,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.tests.importers;
|
||||
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
@ -47,15 +45,12 @@ import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.JsonImporter;
|
||||
import com.google.refine.importers.parsers.JSONParser;
|
||||
import com.google.refine.importers.parsers.TreeParserToken;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.importers.JsonImporter.JSONTreeReader;
|
||||
import com.google.refine.importers.tree.TreeReader.Token;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
|
||||
public class JsonImporterTests extends RefineTest {
|
||||
public class JsonImporterTests extends ImporterTest {
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
logger = LoggerFactory.getLogger(this.getClass());
|
||||
@ -63,29 +58,30 @@ public class JsonImporterTests extends RefineTest {
|
||||
|
||||
|
||||
//dependencies
|
||||
Project project = null;
|
||||
Properties options = null;
|
||||
ByteArrayInputStream inputStream = null;
|
||||
|
||||
//System Under Test
|
||||
JsonImporter SUT = null;
|
||||
|
||||
|
||||
@BeforeMethod
|
||||
public void SetUp(){
|
||||
super.SetUp();
|
||||
SUT = new JsonImporter();
|
||||
project = new Project();
|
||||
options = mock(Properties.class);
|
||||
}
|
||||
|
||||
@AfterMethod
|
||||
public void TearDown() throws IOException{
|
||||
public void TearDown() {
|
||||
SUT = null;
|
||||
project = null;
|
||||
options = null;
|
||||
if (inputStream != null) inputStream.close();
|
||||
if (inputStream != null) {
|
||||
try {
|
||||
inputStream.close();
|
||||
} catch (IOException e) {
|
||||
// Ignore
|
||||
}
|
||||
inputStream = null;
|
||||
}
|
||||
super.TearDown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void canParseSample(){
|
||||
@ -181,8 +177,8 @@ public class JsonImporterTests extends RefineTest {
|
||||
String sampleJson2 = "{\"field\":{}}";
|
||||
String sampleJson3 = "{\"field\":[{},{}]}";
|
||||
|
||||
JSONParser parser = new JSONParser(new ByteArrayInputStream( sampleJson.getBytes( "UTF-8" ) ));
|
||||
TreeParserToken token = TreeParserToken.Ignorable;
|
||||
JSONTreeReader parser = new JSONTreeReader(new StringReader(sampleJson));
|
||||
Token token = Token.Ignorable;
|
||||
int i = 0;
|
||||
try{
|
||||
while(token != null){
|
||||
@ -191,8 +187,8 @@ public class JsonImporterTests extends RefineTest {
|
||||
break;
|
||||
i++;
|
||||
if(i == 3){
|
||||
Assert.assertEquals(TreeParserToken.Value, token);
|
||||
Assert.assertEquals("field", parser.getLocalName());
|
||||
Assert.assertEquals(Token.Value, token);
|
||||
Assert.assertEquals("field", parser.getFieldName());
|
||||
}
|
||||
}
|
||||
}catch(Exception e){
|
||||
@ -200,8 +196,8 @@ public class JsonImporterTests extends RefineTest {
|
||||
}
|
||||
|
||||
|
||||
parser = new JSONParser(new ByteArrayInputStream( sampleJson2.getBytes( "UTF-8" ) ) );
|
||||
token = TreeParserToken.Ignorable;
|
||||
parser = new JSONTreeReader(new StringReader(sampleJson2));
|
||||
token = Token.Ignorable;
|
||||
i = 0;
|
||||
try{
|
||||
while(token != null){
|
||||
@ -210,16 +206,16 @@ public class JsonImporterTests extends RefineTest {
|
||||
break;
|
||||
i++;
|
||||
if(i == 3){
|
||||
Assert.assertEquals(TreeParserToken.StartEntity, token);
|
||||
Assert.assertEquals(parser.getLocalName(), "field");
|
||||
Assert.assertEquals(Token.StartEntity, token);
|
||||
Assert.assertEquals(parser.getFieldName(), "field");
|
||||
}
|
||||
}
|
||||
}catch(Exception e){
|
||||
//silent
|
||||
}
|
||||
|
||||
parser = new JSONParser(new ByteArrayInputStream( sampleJson3.getBytes( "UTF-8" ) ) );
|
||||
token = TreeParserToken.Ignorable;
|
||||
parser = new JSONTreeReader(new StringReader(sampleJson3));
|
||||
token = Token.Ignorable;
|
||||
i = 0;
|
||||
try{
|
||||
while(token != null){
|
||||
@ -228,16 +224,16 @@ public class JsonImporterTests extends RefineTest {
|
||||
break;
|
||||
i++;
|
||||
if(i == 3){
|
||||
Assert.assertEquals(token, TreeParserToken.StartEntity);
|
||||
Assert.assertEquals(parser.getLocalName(), "field");
|
||||
Assert.assertEquals(token, Token.StartEntity);
|
||||
Assert.assertEquals(parser.getFieldName(), "field");
|
||||
}
|
||||
if(i == 4){
|
||||
Assert.assertEquals(token, TreeParserToken.StartEntity);
|
||||
Assert.assertEquals(parser.getLocalName(), "__anonymous__");
|
||||
Assert.assertEquals(token, Token.StartEntity);
|
||||
Assert.assertEquals(parser.getFieldName(), "__anonymous__");
|
||||
}
|
||||
if(i == 6){
|
||||
Assert.assertEquals(token, TreeParserToken.StartEntity);
|
||||
Assert.assertEquals(parser.getLocalName(), "__anonymous__");
|
||||
Assert.assertEquals(token, Token.StartEntity);
|
||||
Assert.assertEquals(parser.getFieldName(), "__anonymous__");
|
||||
}
|
||||
}
|
||||
}catch(Exception e){
|
||||
@ -352,7 +348,7 @@ public class JsonImporterTests extends RefineTest {
|
||||
}
|
||||
|
||||
try {
|
||||
SUT.read(inputStream, project, new ProjectMetadata(), options);
|
||||
parseOneFile(SUT, inputStream);
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
@ -34,7 +34,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
package com.google.refine.tests.importers;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
@ -42,13 +41,10 @@ import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.RdfTripleImporter;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
|
||||
public class RdfTripleImporterTests extends RefineTest {
|
||||
public class RdfTripleImporterTests extends ImporterTest {
|
||||
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
@ -58,15 +54,12 @@ public class RdfTripleImporterTests extends RefineTest {
|
||||
|
||||
//System Under Test
|
||||
RdfTripleImporter SUT = null;
|
||||
Project project = null;
|
||||
Properties options = null;
|
||||
|
||||
@BeforeMethod
|
||||
public void SetUp(){
|
||||
super.SetUp();
|
||||
SUT = new RdfTripleImporter();
|
||||
project = new Project();
|
||||
options = new Properties();
|
||||
options.put("base-url", "http://rdf.freebase.com");
|
||||
JSONUtilities.safePut(options, "base-url", "http://rdf.freebase.com");
|
||||
}
|
||||
|
||||
@Test(enabled=false)
|
||||
@ -75,8 +68,7 @@ public class RdfTripleImporterTests extends RefineTest {
|
||||
StringReader reader = new StringReader(sampleRdf);
|
||||
|
||||
try {
|
||||
SUT.read(reader, project, new ProjectMetadata(), options);
|
||||
project.update();
|
||||
parseOneFile(SUT, reader);
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -98,8 +90,7 @@ public class RdfTripleImporterTests extends RefineTest {
|
||||
StringReader reader = new StringReader(sampleRdf);
|
||||
|
||||
try {
|
||||
SUT.read(reader, project, new ProjectMetadata(), options);
|
||||
project.update();
|
||||
parseOneFile(SUT, reader);
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -140,8 +131,7 @@ public class RdfTripleImporterTests extends RefineTest {
|
||||
StringReader reader = new StringReader(sampleRdf);
|
||||
|
||||
try {
|
||||
SUT.read(reader, project, new ProjectMetadata(), options);
|
||||
project.update();
|
||||
parseOneFile(SUT, reader);
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
@ -175,8 +165,7 @@ public class RdfTripleImporterTests extends RefineTest {
|
||||
StringReader reader = new StringReader(sampleRdf);
|
||||
|
||||
try {
|
||||
SUT.read(reader, project, new ProjectMetadata(), options);
|
||||
project.update();
|
||||
parseOneFile(SUT, reader);
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
@ -33,15 +33,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.tests.importers;
|
||||
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
@ -51,12 +47,10 @@ import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.TsvCsvImporter;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
import com.google.refine.importers.SeparatorBasedImporter;
|
||||
import com.google.refine.util.JSONUtilities;
|
||||
|
||||
public class TsvCsvImporterTests extends RefineTest {
|
||||
public class TsvCsvImporterTests extends ImporterTest {
|
||||
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
@ -67,25 +61,18 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String SAMPLE_ROW = "NDB_No,Shrt_Desc,Water";
|
||||
|
||||
//System Under Test
|
||||
TsvCsvImporter SUT = null;
|
||||
|
||||
//mock dependencies
|
||||
Project project = null;
|
||||
Properties properties = null;
|
||||
|
||||
SeparatorBasedImporter SUT = null;
|
||||
|
||||
@BeforeMethod
|
||||
public void SetUp() {
|
||||
SUT = new TsvCsvImporter();
|
||||
project = new Project(); //FIXME - should we try and use mock(Project.class); - seems unnecessary complexity
|
||||
properties = mock(Properties.class);
|
||||
super.SetUp();
|
||||
SUT = new SeparatorBasedImporter();
|
||||
}
|
||||
|
||||
@AfterMethod
|
||||
public void TearDown(){
|
||||
SUT = null;
|
||||
project = null;
|
||||
properties = null;
|
||||
super.TearDown();
|
||||
}
|
||||
|
||||
@Test(dataProvider = "CSV-TSV-AutoDetermine")
|
||||
@ -94,11 +81,10 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
@ -113,11 +99,10 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||
String input = "value1" + inputSeparator + "value2" + inputSeparator + "value3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, false, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 0, false, false, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 1);
|
||||
@ -135,10 +120,10 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
@ -160,13 +145,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||
"data1" + inputSeparator + "234" + inputSeparator + "data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, true, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 1, true, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||
@ -185,13 +169,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||
String input = "data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 0, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "Column");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "Column2");
|
||||
@ -209,13 +192,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||
String input = " data1 " + inputSeparator + " 3.4 " + inputSeparator + " data3 ";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 0, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.rows.size(), 1);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||
@ -230,13 +212,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||
String input = " data1" + inputSeparator + " 12" + inputSeparator + " data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 0, true, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.rows.size(), 1);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||
@ -251,13 +232,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||
String input = " data1" + inputSeparator + inputSeparator + " data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, true, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 0, true, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.rows.size(), 1);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||
@ -274,13 +254,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
"sub1" + inputSeparator + "sub2" + inputSeparator + "sub3\n" +
|
||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 2, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 2, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2");
|
||||
@ -299,13 +278,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3" + inputSeparator + "data4" + inputSeparator + "data5" + inputSeparator + "data6";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 6);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||
@ -330,13 +308,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||
"\"\"\"To Be\"\" is often followed by \"\"or not To Be\"\"\"" + inputSeparator + "data2";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||
@ -355,13 +332,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
"col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 1, 1, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 1, 1, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||
@ -381,13 +357,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
"skip1\n" +
|
||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 1, 0, 1, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 1, 0, 1, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||
@ -411,13 +386,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
"skip1\n" +
|
||||
"data1" + inputSeparator + "data2" + inputSeparator + "data3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 1, 3, 2, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 1, 3, 2, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1 sub1");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2 sub2");
|
||||
@ -444,10 +418,10 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
"data-row2-cell1" + inputSeparator + "data-row2-cell2" + inputSeparator + "\n" + //missing last data point of this row on purpose
|
||||
"data-row3-cell1" + inputSeparator + "data-row3-cell2" + inputSeparator + "data-row1-cell3";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, 2, 2, 3, 2, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, 2, 2, 3, 2, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
@ -471,13 +445,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String inputSeparator = sep == "\t" ? "\t" : ",";
|
||||
String input = "data1" + inputSeparator + "data2\"" + inputSeparator + "data3" + inputSeparator + "data4";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 0, false, true, true);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 0, false, true, true);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 4);
|
||||
Assert.assertEquals(project.rows.size(), 1);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
||||
@ -493,13 +466,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||
"\"\"\"To\n Be\"\" is often followed by \"\"or not To\n Be\"\"\"" + inputSeparator + "data2";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||
@ -517,13 +489,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String input = "col1" + inputSeparator + "col2" + inputSeparator + "col3\n" +
|
||||
"\"A line with many \n\n\n\n\n empty lines\"" + inputSeparator + "data2";
|
||||
|
||||
LineNumberReader lnReader = new LineNumberReader(new StringReader(input));
|
||||
try {
|
||||
SUT.read(lnReader, project, sep, -1, 0, 0, 1, false, true, false);
|
||||
} catch (IOException e) {
|
||||
prepareOptions(sep, -1, 0, 0, 1, false, true, false);
|
||||
parseOneFile(SUT, new StringReader(input));
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
Assert.assertEquals(project.columnModel.columns.size(), 3);
|
||||
Assert.assertEquals(project.columnModel.columns.get(0).getName(), "col1");
|
||||
Assert.assertEquals(project.columnModel.columns.get(1).getName(), "col2");
|
||||
@ -539,32 +510,31 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
public void readCsvWithProperties() {
|
||||
StringReader reader = new StringReader(SAMPLE_ROW);
|
||||
|
||||
when(properties.getProperty("separator")).thenReturn(",");
|
||||
whenGetIntegerOption("ignore",properties,0);
|
||||
whenGetIntegerOption("header-lines",properties,0);
|
||||
whenGetIntegerOption("limit",properties,-1);
|
||||
whenGetIntegerOption("skip",properties,0);
|
||||
whenGetIntegerOption("ignore-quotes",properties,0);
|
||||
when(JSONUtilities.getString(options, "separator", null)).thenReturn(",");
|
||||
whenGetIntegerOption("ignore", options, 0);
|
||||
whenGetIntegerOption("header-lines", options, 0);
|
||||
whenGetIntegerOption("limit", options, -1);
|
||||
whenGetIntegerOption("skip", options, 0);
|
||||
whenGetIntegerOption("ignore-quotes", options, 0);
|
||||
|
||||
try {
|
||||
SUT.read(reader, project, new ProjectMetadata(), properties);
|
||||
parseOneFile(SUT, reader);
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
|
||||
Assert.assertEquals(project.rows.size(), 1);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 3);
|
||||
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "NDB_No");
|
||||
Assert.assertEquals((String)project.rows.get(0).cells.get(1).value, "Shrt_Desc");
|
||||
Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "Water");
|
||||
|
||||
verify(properties, times(1)).getProperty("separator");
|
||||
verifyGetOption("ignore",properties);
|
||||
verifyGetOption("header-lines",properties);
|
||||
verifyGetOption("limit",properties);
|
||||
verifyGetOption("skip",properties);
|
||||
verifyGetOption("ignore-quotes",properties);
|
||||
JSONUtilities.getString(verify(options, times(1)), "separator", null);
|
||||
verifyGetOption("ignore", options);
|
||||
verifyGetOption("header-lines", options);
|
||||
verifyGetOption("limit", options);
|
||||
verifyGetOption("skip", options);
|
||||
verifyGetOption("ignore-quotes", options);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -572,20 +542,19 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
String input = "data1,data2\",data3,data4";
|
||||
StringReader reader = new StringReader(input);
|
||||
|
||||
when(properties.getProperty("separator")).thenReturn(",");
|
||||
whenGetIntegerOption("ignore",properties,0);
|
||||
whenGetIntegerOption("header-lines",properties,0);
|
||||
whenGetIntegerOption("limit",properties,-1);
|
||||
whenGetIntegerOption("skip",properties,0);
|
||||
whenGetBooleanOption("ignore-quotes",properties,true);
|
||||
when(JSONUtilities.getString(options, "separator", null)).thenReturn(",");
|
||||
whenGetIntegerOption("ignore", options, 0);
|
||||
whenGetIntegerOption("header-lines", options, 0);
|
||||
whenGetIntegerOption("limit", options, -1);
|
||||
whenGetIntegerOption("skip", options, 0);
|
||||
whenGetBooleanOption("ignore-quotes", options, true);
|
||||
|
||||
try {
|
||||
SUT.read(reader, project, new ProjectMetadata(), properties);
|
||||
parseOneFile(SUT, reader);
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
|
||||
Assert.assertEquals(project.rows.size(), 1);
|
||||
Assert.assertEquals(project.rows.get(0).cells.size(), 4);
|
||||
Assert.assertEquals((String)project.rows.get(0).cells.get(0).value, "data1");
|
||||
@ -593,12 +562,12 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
Assert.assertEquals((String)project.rows.get(0).cells.get(2).value, "data3");
|
||||
Assert.assertEquals((String)project.rows.get(0).cells.get(3).value, "data4");
|
||||
|
||||
verify(properties, times(1)).getProperty("separator");
|
||||
verifyGetOption("ignore",properties);
|
||||
verifyGetOption("header-lines",properties);
|
||||
verifyGetOption("limit",properties);
|
||||
verifyGetOption("skip",properties);
|
||||
verifyGetOption("ignore-quotes",properties);
|
||||
JSONUtilities.getString(verify(options, times(1)), "separator", null);
|
||||
verifyGetOption("ignore", options);
|
||||
verifyGetOption("header-lines", options);
|
||||
verifyGetOption("limit", options);
|
||||
verifyGetOption("skip", options);
|
||||
verifyGetOption("ignore-quotes", options);
|
||||
}
|
||||
|
||||
//--helpers--
|
||||
@ -612,19 +581,16 @@ public class TsvCsvImporterTests extends RefineTest {
|
||||
};
|
||||
}
|
||||
|
||||
public void whenGetBooleanOption(String name, Properties properties, Boolean def){
|
||||
when(properties.containsKey(name)).thenReturn(true);
|
||||
when(properties.getProperty(name)).thenReturn(Boolean.toString(def));
|
||||
private void prepareOptions(
|
||||
String sep, int limit, int skip, int ignoreLines,
|
||||
int headerLines, boolean guessValueType, boolean splitIntoColumns, boolean ignoreQuotes) {
|
||||
JSONUtilities.safePut(options, "separator", sep);
|
||||
JSONUtilities.safePut(options, "limit", limit);
|
||||
JSONUtilities.safePut(options, "skipDataLines", skip);
|
||||
JSONUtilities.safePut(options, "ignoreLines", ignoreLines);
|
||||
JSONUtilities.safePut(options, "headerLines", headerLines);
|
||||
JSONUtilities.safePut(options, "guessCellValueTypes", guessValueType);
|
||||
JSONUtilities.safePut(options, "splitIntoColumns", splitIntoColumns);
|
||||
JSONUtilities.safePut(options, "processQuotes", !ignoreQuotes);
|
||||
}
|
||||
|
||||
public void whenGetIntegerOption(String name, Properties properties, int def){
|
||||
when(properties.containsKey(name)).thenReturn(true);
|
||||
when(properties.getProperty(name)).thenReturn(Integer.toString(def));
|
||||
}
|
||||
|
||||
public void verifyGetOption(String name, Properties properties){
|
||||
verify(properties, times(1)).containsKey(name);
|
||||
verify(properties, times(1)).getProperty(name);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -35,27 +35,27 @@ package com.google.refine.tests.importers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
|
||||
import com.google.refine.importers.XmlImportUtilities;
|
||||
import com.google.refine.importers.parsers.TreeParser;
|
||||
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||
import com.google.refine.importers.tree.ImportRecord;
|
||||
import com.google.refine.importers.tree.TreeReader;
|
||||
import com.google.refine.importers.tree.XmlImportUtilities;
|
||||
import com.google.refine.model.Project;
|
||||
|
||||
public class XmlImportUtilitiesStub extends XmlImportUtilities {
|
||||
|
||||
public List<String> detectRecordElementWrapper(TreeParser parser, String tag) throws ServletException{
|
||||
public List<String> detectRecordElementWrapper(TreeReader parser, String tag) throws Exception{
|
||||
return super.detectRecordElement(parser, tag);
|
||||
}
|
||||
|
||||
public void ProcessSubRecordWrapper(Project project, TreeParser parser, ImportColumnGroup columnGroup, ImportRecord record) throws ServletException{
|
||||
public void ProcessSubRecordWrapper(Project project, TreeReader parser, ImportColumnGroup columnGroup, ImportRecord record) throws Exception{
|
||||
super.processSubRecord(project, parser, columnGroup, record);
|
||||
}
|
||||
|
||||
public void findRecordWrapper(Project project, TreeParser parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws ServletException{
|
||||
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup);
|
||||
public void findRecordWrapper(Project project, TreeReader parser, String[] recordPath, int pathIndex, ImportColumnGroup rootColumnGroup) throws Exception{
|
||||
super.findRecord(project, parser, recordPath, pathIndex, rootColumnGroup, -1);
|
||||
}
|
||||
|
||||
public void processRecordWrapper(Project project, TreeParser parser, ImportColumnGroup rootColumnGroup) throws ServletException{
|
||||
public void processRecordWrapper(Project project, TreeReader parser, ImportColumnGroup rootColumnGroup) throws Exception{
|
||||
super.processRecord(project, parser, rootColumnGroup);
|
||||
}
|
||||
|
||||
|
@ -35,11 +35,12 @@ package com.google.refine.tests.importers;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
@ -48,13 +49,12 @@ import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.importers.TreeImportUtilities.ImportColumn;
|
||||
import com.google.refine.importers.TreeImportUtilities.ImportColumnGroup;
|
||||
import com.google.refine.importers.TreeImportUtilities.ImportRecord;
|
||||
import com.google.refine.importers.parsers.JSONParser;
|
||||
import com.google.refine.importers.parsers.TreeParser;
|
||||
import com.google.refine.importers.parsers.TreeParserToken;
|
||||
import com.google.refine.importers.parsers.XmlParser;
|
||||
import com.google.refine.importers.JsonImporter.JSONTreeReader;
|
||||
import com.google.refine.importers.XmlImporter.XmlParser;
|
||||
import com.google.refine.importers.tree.ImportColumn;
|
||||
import com.google.refine.importers.tree.ImportColumnGroup;
|
||||
import com.google.refine.importers.tree.ImportRecord;
|
||||
import com.google.refine.importers.tree.TreeReader;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
@ -69,7 +69,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
//dependencies
|
||||
Project project;
|
||||
TreeParser parser;
|
||||
TreeReader parser;
|
||||
ImportColumnGroup columnGroup;
|
||||
ImportRecord record;
|
||||
ByteArrayInputStream inputStream;
|
||||
@ -134,7 +134,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
List<String> response = new ArrayList<String>();
|
||||
try {
|
||||
response = SUT.detectRecordElementWrapper(parser, tag);
|
||||
} catch (ServletException e) {
|
||||
} catch (Exception e) {
|
||||
Assert.fail(e.getMessage());
|
||||
}
|
||||
Assert.assertNotNull(response);
|
||||
@ -152,7 +152,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
List<String> response = new ArrayList<String>();
|
||||
try {
|
||||
response = SUT.detectRecordElementWrapper(parser, tag);
|
||||
} catch (ServletException e) {
|
||||
} catch (Exception e) {
|
||||
Assert.fail(e.getMessage());
|
||||
}
|
||||
Assert.assertNotNull(response);
|
||||
@ -171,7 +171,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
List<String> response = new ArrayList<String>();
|
||||
try {
|
||||
response = SUT.detectRecordElementWrapper(parser, tag);
|
||||
} catch (ServletException e) {
|
||||
} catch (Exception e) {
|
||||
Assert.fail(e.getMessage());
|
||||
}
|
||||
Assert.assertNull(response);
|
||||
@ -181,7 +181,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
public void detectRecordElementRegressionXmlTest(){
|
||||
loadSampleXml();
|
||||
|
||||
String[] path = XmlImportUtilitiesStub.detectRecordElement(new XmlParser(inputStream));
|
||||
String[] path = XmlImportUtilitiesStub.detectRecordElement(createXmlParser());
|
||||
Assert.assertNotNull(path);
|
||||
Assert.assertEquals(path.length, 2);
|
||||
Assert.assertEquals(path[0], "library");
|
||||
@ -192,7 +192,8 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
public void detectRecordElementRegressionJsonTest(){
|
||||
loadSampleJson();
|
||||
|
||||
String[] path = XmlImportUtilitiesStub.detectRecordElement(new JSONParser(inputStream));
|
||||
String[] path = XmlImportUtilitiesStub.detectRecordElement(
|
||||
new JSONTreeReader(new InputStreamReader(inputStream)));
|
||||
Assert.assertNotNull(path);
|
||||
Assert.assertEquals(path.length, 2);
|
||||
Assert.assertEquals(path[0], "__anonymous__");
|
||||
@ -204,7 +205,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
loadSampleXml();
|
||||
|
||||
String[] recordPath = new String[]{"library","book"};
|
||||
XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup );
|
||||
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1);
|
||||
|
||||
log(project);
|
||||
assertProjectCreated(project, 0, 6);
|
||||
@ -224,7 +225,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
loadData(XmlImporterTests.getSampleWithVaryingStructure());
|
||||
|
||||
String[] recordPath = new String[]{"library", "book"};
|
||||
XmlImportUtilitiesStub.importTreeData(new XmlParser(inputStream), project, recordPath, columnGroup);
|
||||
XmlImportUtilitiesStub.importTreeData(createXmlParser(), project, recordPath, columnGroup, -1);
|
||||
|
||||
log(project);
|
||||
assertProjectCreated(project, 0, 6);
|
||||
@ -278,7 +279,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
try {
|
||||
SUT.findRecordWrapper(project, parser, recordPath, pathIndex, columnGroup);
|
||||
} catch (ServletException e) {
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
|
||||
@ -297,7 +298,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
try {
|
||||
SUT.processRecordWrapper(project, parser, columnGroup);
|
||||
} catch (ServletException e) {
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
log(project);
|
||||
@ -318,7 +319,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
try {
|
||||
SUT.processRecordWrapper(project, parser, columnGroup);
|
||||
} catch (ServletException e) {
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
log(project);
|
||||
@ -343,7 +344,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
try {
|
||||
SUT.processRecordWrapper(project, parser, columnGroup);
|
||||
} catch (ServletException e) {
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
log(project);
|
||||
@ -367,7 +368,7 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
try {
|
||||
SUT.ProcessSubRecordWrapper(project, parser, columnGroup, record);
|
||||
} catch (ServletException e) {
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
log(project);
|
||||
@ -429,18 +430,24 @@ public class XmlImportUtilitiesTests extends RefineTest {
|
||||
|
||||
public void ParserSkip(){
|
||||
try {
|
||||
if(parser.getEventType() == TreeParserToken.Ignorable){
|
||||
if (parser.current() == TreeReader.Token.Ignorable){
|
||||
parser.next(); //move parser forward once e.g. skip the START_DOCUMENT parser event
|
||||
}
|
||||
} catch (ServletException e1) {
|
||||
} catch (Exception e1) {
|
||||
Assert.fail();
|
||||
}
|
||||
}
|
||||
|
||||
public void createXmlParser(){
|
||||
public TreeReader createXmlParser(){
|
||||
try {
|
||||
parser = new XmlParser(inputStream);
|
||||
}
|
||||
public void createJsonParser(){
|
||||
parser = new JSONParser(inputStream);
|
||||
return parser;
|
||||
} catch (XMLStreamException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
public TreeReader createJsonParser(){
|
||||
parser = new JSONTreeReader(new InputStreamReader(inputStream));
|
||||
return parser;
|
||||
}
|
||||
}
|
||||
|
@ -33,12 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
package com.google.refine.tests.importers;
|
||||
|
||||
import static org.mockito.Mockito.mock;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
@ -47,14 +44,11 @@ import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.refine.ProjectMetadata;
|
||||
import com.google.refine.importers.XmlImporter;
|
||||
import com.google.refine.model.Project;
|
||||
import com.google.refine.model.Row;
|
||||
import com.google.refine.tests.RefineTest;
|
||||
|
||||
|
||||
public class XmlImporterTests extends RefineTest {
|
||||
public class XmlImporterTests extends ImporterTest {
|
||||
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
@ -62,29 +56,30 @@ public class XmlImporterTests extends RefineTest {
|
||||
}
|
||||
|
||||
//dependencies
|
||||
Project project = null;
|
||||
Properties options = null;
|
||||
ByteArrayInputStream inputStream = null;
|
||||
|
||||
//System Under Test
|
||||
XmlImporter SUT = null;
|
||||
|
||||
|
||||
@BeforeMethod
|
||||
public void SetUp(){
|
||||
super.SetUp();
|
||||
SUT = new XmlImporter();
|
||||
project = new Project();
|
||||
options = mock(Properties.class);
|
||||
}
|
||||
|
||||
@AfterMethod
|
||||
public void TearDown() throws IOException{
|
||||
public void TearDown() {
|
||||
SUT = null;
|
||||
project = null;
|
||||
options = null;
|
||||
if (inputStream != null) inputStream.close();
|
||||
if (inputStream != null) {
|
||||
try {
|
||||
inputStream.close();
|
||||
} catch (IOException e) {
|
||||
// Ignore
|
||||
}
|
||||
inputStream = null;
|
||||
}
|
||||
super.TearDown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void canParseSample(){
|
||||
@ -309,11 +304,9 @@ public class XmlImporterTests extends RefineTest {
|
||||
}
|
||||
|
||||
try {
|
||||
SUT.read(inputStream, project, new ProjectMetadata(), options);
|
||||
parseOneFile(SUT, inputStream);
|
||||
} catch (Exception e) {
|
||||
Assert.fail();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -50,9 +50,10 @@ function registerCommands() {
|
||||
|
||||
RS.registerCommand(module, "get-version", new Packages.com.google.refine.commands.GetVersionCommand());
|
||||
|
||||
RS.registerCommand(module, "create-import-job", new Packages.com.google.refine.commands.importing.CreateImportJobCommand());
|
||||
RS.registerCommand(module, "retrieve-import-content", new Packages.com.google.refine.commands.importing.RetrieveImportContentCommand());
|
||||
RS.registerCommand(module, "get-import-job-status", new Packages.com.google.refine.commands.importing.GetImportJobStatusCommand());
|
||||
RS.registerCommand(module, "get-importing-configuration", new Packages.com.google.refine.commands.importing.GetImportingConfigurationCommand());
|
||||
RS.registerCommand(module, "create-importing-job", new Packages.com.google.refine.commands.importing.CreateImportingJobCommand());
|
||||
RS.registerCommand(module, "get-importing-job-status", new Packages.com.google.refine.commands.importing.GetImportingJobStatusCommand());
|
||||
RS.registerCommand(module, "importing-controller", new Packages.com.google.refine.commands.importing.ImportingControllerCommand());
|
||||
|
||||
RS.registerCommand(module, "create-project-from-upload", new Packages.com.google.refine.commands.project.CreateProjectCommand());
|
||||
RS.registerCommand(module, "import-project", new Packages.com.google.refine.commands.project.ImportProjectCommand());
|
||||
@ -120,12 +121,9 @@ function registerCommands() {
|
||||
|
||||
RS.registerCommand(module, "get-expression-language-info", new Packages.com.google.refine.commands.expr.GetExpressionLanguageInfoCommand());
|
||||
RS.registerCommand(module, "get-expression-history", new Packages.com.google.refine.commands.expr.GetExpressionHistoryCommand());
|
||||
RS.registerCommand(module, "get-starred-expressions", new Packages.com.google.refine.commands.expr.GetStarredExpressionsCommand());
|
||||
RS.registerCommand(module, "toggle-starred-expression", new Packages.com.google.refine.commands.expr.ToggleStarredExpressionCommand());
|
||||
RS.registerCommand(module, "log-expression", new Packages.com.google.refine.commands.expr.LogExpressionCommand());
|
||||
RS.registerCommand(module, "preview-expression", new Packages.com.google.refine.commands.expr.PreviewExpressionCommand());
|
||||
|
||||
|
||||
RS.registerCommand(module, "get-preference", new Packages.com.google.refine.commands.GetPreferenceCommand());
|
||||
RS.registerCommand(module, "get-all-preferences", new Packages.com.google.refine.commands.GetAllPreferencesCommand());
|
||||
RS.registerCommand(module, "set-preference", new Packages.com.google.refine.commands.SetPreferenceCommand());
|
||||
@ -168,11 +166,98 @@ function registerOperations() {
|
||||
OR.registerOperation(module, "recon-copy-across-columns", Packages.com.google.refine.operations.recon.ReconCopyAcrossColumnsOperation);
|
||||
}
|
||||
|
||||
function registerImportSourceClasses() {
|
||||
var RM = Packages.com.google.refine.commands.importing.ImportManager;
|
||||
RM.registerImportSourceClass("file-upload", Packages.com.google.refine.model.meta.FileUploadImportSource);
|
||||
RM.registerImportSourceClass("text", Packages.com.google.refine.model.meta.TextImportSource);
|
||||
RM.registerImportSourceClass("web", Packages.com.google.refine.model.meta.WebImportSource);
|
||||
function registerImporting() {
|
||||
var IM = Packages.com.google.refine.importing.ImportingManager;
|
||||
|
||||
/*
|
||||
* Formats and their UI class names and parsers:
|
||||
* - UI class names are used on the client-side in Javascript to instantiate code that lets the user
|
||||
* configure the parser's options.
|
||||
* - Parsers are server-side code that do the actual parsing. Because they have access to the raw files,
|
||||
* they also generate defaults for the client-side UIs to initialize.
|
||||
*/
|
||||
|
||||
IM.registerFormat("text", "Text files"); // generic format, no parser to handle it
|
||||
IM.registerFormat("text/line-based", "Line-based text files", "LineBasedParserUI",
|
||||
new Packages.com.google.refine.importers.LineBasedImporter());
|
||||
IM.registerFormat("text/line-based/*sv", "CSV / TSV / separator-based files", "SeparatorBasedParserUI",
|
||||
new Packages.com.google.refine.importers.SeparatorBasedImporter());
|
||||
IM.registerFormat("text/line-based/fixed-width", "Fixed-width field text files", "FixedWidthParserUI",
|
||||
new Packages.com.google.refine.importers.FixedWidthImporter());
|
||||
|
||||
IM.registerFormat("text/xml", "XML files", "XmlParserUI", new Packages.com.google.refine.importers.XmlImporter());
|
||||
IM.registerFormat("text/xml/xlsx", "Excel (.xlsx) files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
|
||||
IM.registerFormat("text/xml/rdf", "RDF/XML files", "RdfParserUI", new Packages.com.google.refine.importers.RdfTripleImporter());
|
||||
IM.registerFormat("text/json", "JSON files", "JsonParserUI", new Packages.com.google.refine.importers.JsonImporter());
|
||||
IM.registerFormat("text/marc", "MARC files");
|
||||
|
||||
IM.registerFormat("binary", "Binary files"); // generic format, no parser to handle it
|
||||
IM.registerFormat("binary/xls", "Excel files", "ExcelParserUI", new Packages.com.google.refine.importers.ExcelImporter());
|
||||
|
||||
IM.registerFormat("service", "Services"); // generic format, no parser to handle it
|
||||
|
||||
/*
|
||||
* Extension to format mappings
|
||||
*/
|
||||
IM.registerExtension(".txt", "text/line-based");
|
||||
IM.registerExtension(".csv", "text/line-based/*sv");
|
||||
IM.registerExtension(".tsv", "text/line-based/*sv");
|
||||
|
||||
IM.registerExtension(".xml", "text/xml");
|
||||
IM.registerExtension(".rdf", "text/xml/rdf");
|
||||
|
||||
IM.registerExtension(".json", "text/json");
|
||||
IM.registerExtension(".js", "text/json");
|
||||
|
||||
IM.registerExtension(".xls", "binary/xls");
|
||||
IM.registerExtension(".xlsx", "text/xml/xlsx");
|
||||
|
||||
IM.registerExtension(".marc", "text/marc");
|
||||
IM.registerExtension(".mrc", "text/marc");
|
||||
|
||||
/*
|
||||
* Mime type to format mappings
|
||||
*/
|
||||
IM.registerMimeType("text/plain", "text/line-based");
|
||||
IM.registerMimeType("text/csv", "text/line-based/*sv");
|
||||
IM.registerMimeType("text/x-csv", "text/line-based/*sv");
|
||||
IM.registerMimeType("text/tab-separated-value", "text/line-based/*sv");
|
||||
|
||||
IM.registerMimeType("text/fixed-width", "text/line-based/fixed-width");
|
||||
|
||||
IM.registerMimeType("application/msexcel", "binary/xls");
|
||||
IM.registerMimeType("application/x-msexcel", "binary/xls");
|
||||
IM.registerMimeType("application/x-ms-excel", "binary/xls");
|
||||
IM.registerMimeType("application/vnd.ms-excel", "binary/xls");
|
||||
IM.registerMimeType("application/x-excel", "binary/xls");
|
||||
IM.registerMimeType("application/xls", "binary/xls");
|
||||
IM.registerMimeType("application/x-xls", "text/xml/xlsx");
|
||||
|
||||
IM.registerMimeType("application/json", "text/json");
|
||||
IM.registerMimeType("text/json", "text/json");
|
||||
|
||||
IM.registerMimeType("application/rdf+xml", "text/xml/rdf");
|
||||
|
||||
IM.registerMimeType("application/marc", "text/marc");
|
||||
|
||||
/*
|
||||
* Format guessers: these take a format derived from extensions or mime-types,
|
||||
* look at the actual files' content, and try to guess a better format.
|
||||
*/
|
||||
IM.registerFormatGuesser("text", new Packages.com.google.refine.importers.TextFormatGuesser());
|
||||
IM.registerFormatGuesser("text/line-based", new Packages.com.google.refine.importers.LineBasedFormatGuesser());
|
||||
|
||||
/*
|
||||
* Controllers: these implement high-level UI flows for importing data. For example, the default
|
||||
* controller lets the user specify one or more source files, either local or remote or on the clipboard,
|
||||
* lets the user select which files to actually import in case any of the original file is an archive
|
||||
* containing several files, and then lets the user configure parsing options.
|
||||
*/
|
||||
IM.registerController(
|
||||
module,
|
||||
"default-importing-controller",
|
||||
new Packages.com.google.refine.importing.DefaultImportingController()
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -183,7 +268,7 @@ function init() {
|
||||
|
||||
registerCommands();
|
||||
registerOperations();
|
||||
registerImportSourceClasses();
|
||||
registerImporting();
|
||||
|
||||
var RC = Packages.com.google.refine.model.recon.ReconConfig;
|
||||
RC.registerReconConfig(module, "standard-service", Packages.com.google.refine.model.recon.StandardReconConfig);
|
||||
@ -193,12 +278,36 @@ function init() {
|
||||
module,
|
||||
[
|
||||
"externals/jquery-1.4.2.min.js",
|
||||
"externals/jquery.cookie.js",
|
||||
"externals/jquery.eventstack-0.3.js",
|
||||
"externals/jquery-ui/jquery-ui-1.8.custom.min.js",
|
||||
"externals/date.js",
|
||||
|
||||
"scripts/util/misc.js",
|
||||
"scripts/util/url.js",
|
||||
"scripts/util/string.js",
|
||||
"scripts/util/ajax.js",
|
||||
"scripts/util/menu.js",
|
||||
"scripts/util/dialog.js",
|
||||
"scripts/util/dom.js",
|
||||
|
||||
"scripts/index.js",
|
||||
"scripts/index/import-sources.js"
|
||||
"scripts/index/create-project-ui.js",
|
||||
"scripts/index/open-project-ui.js",
|
||||
"scripts/index/import-project-ui.js",
|
||||
|
||||
"scripts/index/default-importing-controller/controller.js",
|
||||
"scripts/index/default-importing-controller/file-selection-panel.js",
|
||||
"scripts/index/default-importing-controller/parsing-panel.js",
|
||||
|
||||
"scripts/index/default-importing-sources/sources.js",
|
||||
"scripts/index/parser-interfaces/preview-table.js",
|
||||
"scripts/index/parser-interfaces/separator-based-parser-ui.js",
|
||||
"scripts/index/parser-interfaces/line-based-parser-ui.js",
|
||||
"scripts/index/parser-interfaces/fixed-width-parser-ui.js",
|
||||
"scripts/index/parser-interfaces/excel-parser-ui.js",
|
||||
"scripts/index/parser-interfaces/xml-parser-ui.js",
|
||||
"scripts/index/parser-interfaces/json-parser-ui.js"
|
||||
]
|
||||
);
|
||||
|
||||
@ -210,32 +319,20 @@ function init() {
|
||||
"styles/jquery-ui-overrides.less",
|
||||
"styles/common.less",
|
||||
"styles/pure.css",
|
||||
"styles/index.less"
|
||||
]
|
||||
);
|
||||
"styles/index.less",
|
||||
"styles/index/create-project-ui.less",
|
||||
"styles/index/open-project-ui.less",
|
||||
"styles/index/import-project-ui.less",
|
||||
|
||||
ClientSideResourceManager.addPaths(
|
||||
"import/scripts",
|
||||
module,
|
||||
[
|
||||
"externals/jquery-1.4.2.min.js",
|
||||
"externals/jquery-ui/jquery-ui-1.8.custom.min.js",
|
||||
"externals/date.js",
|
||||
"scripts/util/string.js",
|
||||
"scripts/util/dom.js",
|
||||
"scripts/import.js"
|
||||
]
|
||||
);
|
||||
"styles/index/default-importing-controller.less",
|
||||
"styles/index/default-importing-file-selection-panel.less",
|
||||
"styles/index/default-importing-parsing-panel.less",
|
||||
|
||||
ClientSideResourceManager.addPaths(
|
||||
"import/styles",
|
||||
module,
|
||||
[
|
||||
"externals/jquery-ui/css/ui-lightness/jquery-ui-1.8.custom.css",
|
||||
"styles/jquery-ui-overrides.less",
|
||||
"styles/common.less",
|
||||
"styles/pure.css",
|
||||
"styles/import.less"
|
||||
"styles/index/default-importing-sources.less",
|
||||
"styles/views/data-table-view.less", // for the preview table's styles
|
||||
"styles/index/fixed-width-parser-ui.less",
|
||||
"styles/index/xml-parser-ui.less",
|
||||
"styles/index/json-parser-ui.less"
|
||||
]
|
||||
);
|
||||
|
||||
|
@ -41,182 +41,33 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
$styleInjection
|
||||
</head>
|
||||
<body>
|
||||
#if($params.new && $params.new == "1")
|
||||
#set($newStyle = "")
|
||||
#set($oldStyle = "display: none; ")
|
||||
#else
|
||||
#set($oldStyle = "")
|
||||
#set($newStyle = "display: none; ")
|
||||
#end
|
||||
<div id="container">
|
||||
<div id="logo"> </div>
|
||||
<div id="header-home">
|
||||
<img alt="Google Refine" src="images/logo-googlerefine-40.png" />
|
||||
<h1>A power tool for working with messy data.</h1>
|
||||
<div id="header">
|
||||
<img alt="Google Refine" src="images/logo-googlerefine-30.png" width="129" height="29" />
|
||||
A power tool for working with messy data.
|
||||
</div>
|
||||
<div id="content-home">
|
||||
|
||||
<div id="left-panel" class="main-layout-panel"><div id="left-panel-body">
|
||||
<ul id="action-area-tabs">
|
||||
</ul>
|
||||
|
||||
<div id="project-links">
|
||||
<div id="logo-container">
|
||||
<img alt="Google Refine" src="images/logo-gem-40.png" />
|
||||
<div id="google-refine-version"></div>
|
||||
</div>
|
||||
<ul>
|
||||
<li><a href="http://code.google.com/p/google-refine/wiki/DocumentationForUsers">Help</a></li>
|
||||
<li><a href="/about.html">About</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div></div> <!-- left-panel -->
|
||||
|
||||
<div id="right-panel" class="main-layout-panel"><div id="right-panel-body">
|
||||
</div></div> <!-- right-panel -->
|
||||
|
||||
<div id="no-project-message" class="message" style="display: none;">
|
||||
No existing project. Create one now!<br/>
|
||||
Try these <a href="http://code.google.com/p/google-refine/wiki/SampleDatasets" target="_blank">sample data sets »</a>
|
||||
</div>
|
||||
|
||||
<div id="project-open">
|
||||
<h1>Open a Project</h1>
|
||||
<div id="projects-container"></div>
|
||||
<div class="content-block-footer"><a href="javascript:openWorkspaceDir()" class="secondary">Browse workspace directory</a></div>
|
||||
</div>
|
||||
<div id="project-create">
|
||||
<h1 style="$newStyle">Create a New Project</h1>
|
||||
<div style="$newStyle" id="import-panel"><table id="import-panel-layout">
|
||||
<tr>
|
||||
<td id="import-panel-tab-headers">
|
||||
<div>Import data from</div>
|
||||
</td>
|
||||
<td id="import-panel-tab-bodies"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2" id="import-panel-message">
|
||||
<h3>What kinds of data files can I import?</h3>
|
||||
<div>TSV, CSV, *SV, Excel (.xls and .xlsx), JSON, XML, RDF as XML, and
|
||||
Google Spreadsheets are all supported. Support for other formats can
|
||||
be added with Refine extensions.
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
</table></div>
|
||||
|
||||
<div style="$newStyle" id="import-progress-panel">
|
||||
<div class="grid-layout layout-normal layout-full"><table>
|
||||
<tr><td colspan="3" id="import-progress-message"></td></tr>
|
||||
<tr><td colspan="3">
|
||||
<div id="import-progress-bar-frame"><div id="import-progress-bar-body"></div></div>
|
||||
</td></tr>
|
||||
<tr>
|
||||
<td id="import-progress-message-left"></td>
|
||||
<td id="import-progress-message-center"></td>
|
||||
<td id="import-progress-message-right"></td>
|
||||
</tr>
|
||||
<tr><td colspan="3">
|
||||
<button class="button" id="import-progress-cancel-button">Cancel</button>
|
||||
</td></tr>
|
||||
</table></div>
|
||||
<iframe id="import-iframe" name="import-iframe"></iframe>
|
||||
</div>
|
||||
|
||||
<div style="$newStyle" id="import-error-panel"><div class="grid-layout layout-normal layout-full"><table>
|
||||
<tr><td id="import-error-message"></td></tr>
|
||||
<tr><td id="import-error-stack"></td></tr>
|
||||
<tr><td><button class="button button-primary" id="import-error-ok-button">OK</button></td></tr>
|
||||
</table></div></div>
|
||||
|
||||
<form style="$oldStyle" id="file-upload-form" method="post" enctype="multipart/form-data" action="/command/core/create-project-from-upload" accept-charset="UTF-8">
|
||||
<h1>Create a New Project</h1>
|
||||
<h2 id="project-toggle">
|
||||
<a class="secondary" href="javascript:showHide('file-upload-form', 'project-upload-form')">or Import an Existing Project</a>
|
||||
</h2>
|
||||
<div class="project-create-basic">
|
||||
<table class="form-table">
|
||||
<tr>
|
||||
<th><label for="project-file">Data file:</label></th>
|
||||
<td><input type="file" id="project-file-input" name="project-file" /></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th><label for="project-url">or data file URL:</label></th>
|
||||
<td><input type="text" id="project-url-input" name="project-url" size="40" /></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th><label for="project-name">Project name:</label></th>
|
||||
<td><input type="text" size="25" id="project-name-input" name="project-name" /></td></tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td><input type="submit" value="Create Project" id="upload-file-button" class="button button-primary" /></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
<div class="project-create-advanced">
|
||||
<h2>Advanced Options</h2>
|
||||
<div class="project-create-option">
|
||||
Limit load to:
|
||||
<div class="project-create-suboption">
|
||||
<input type="text" id="limit-input" name="limit" size="5" /> rows (blank for all)
|
||||
</div>
|
||||
</div>
|
||||
<div class="project-create-option">
|
||||
Ignore:
|
||||
<div class="project-create-suboption">
|
||||
<input type="text" id="ignore-input" name="ignore" size="5" value="0" /> initial non-blank lines
|
||||
</div>
|
||||
</div>
|
||||
<div class="project-create-option">
|
||||
Skip:
|
||||
<div class="project-create-suboption">
|
||||
<input type="text" id="skip-input" name="skip" size="5" value="0" /> initial data rows
|
||||
</div>
|
||||
</div>
|
||||
<div id="project-create-parsetext">
|
||||
<div class="project-create-option">
|
||||
When parsing text files:
|
||||
<div class="project-create-suboption">
|
||||
<input id="split-into-columns-input" type="checkbox" checked="true" name="split-into-columns" />
|
||||
Split into columns
|
||||
</div>
|
||||
<div class="project-create-suboption">
|
||||
Column separator:
|
||||
<input type="text" id="separator-input" name="separator" size="2" /><br />
|
||||
(leave blank to auto-detect)
|
||||
</div>
|
||||
<div class="project-create-suboption">
|
||||
<input type="checkbox" id="guess-value-type-input" name="guess-value-type" checked="true" />
|
||||
Auto-detect value types<br />
|
||||
(numbers, dates, etc)
|
||||
</div>
|
||||
<div class="project-create-suboption">
|
||||
Header lines: <input type="text" id="header-lines-input" name="header-lines" size="5" value="1" /><br />
|
||||
(use 0 if your data has no header)
|
||||
</div>
|
||||
<div class="project-create-suboption">
|
||||
<input type="checkbox" id="ignore-quotes-input" name="ignore-quotes" />
|
||||
Ignore quotation marks
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<form style="display: none;" id="project-upload-form" method="post" enctype="multipart/form-data" action="/command/core/import-project" accept-charset="UTF-8" style="display:none;">
|
||||
<h1>Import an Existing Project</h1>
|
||||
<h2 id="project-toggle">
|
||||
<a class="secondary" href="javascript:showHide('project-upload-form', 'file-upload-form')">or Create a New Project</a>
|
||||
</h2>
|
||||
<div class="project-create-basic">
|
||||
<p>Import an existing Google Refine .tar or .tar.gz project file:</p>
|
||||
<table class="form-table">
|
||||
<tr>
|
||||
<th><label for="project-file">Project file:</label></th>
|
||||
<td><input type="file" id="project-tar-file-input" name="project-file" /></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th><label for="project-name">Project name (optional):</label></th>
|
||||
<td><input type="text" size="25" id="project-name-input" name="project-name" /></td></tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td><input type="submit" value="Import Project" id="import-project-button" class="button button-primary" /></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div id="project-links">
|
||||
<ul>
|
||||
<li><a href="/about.html">About Google Refine</a></li>
|
||||
<li><a href="https://code.google.com/p/google-refine/">Project Home Page</a></li>
|
||||
<li><a href="http://code.google.com/p/google-refine/wiki/Screencasts">Screencasts</a></li>
|
||||
<li><a href="http://code.google.com/p/google-refine/wiki/DocumentationForUsers">Help Documentation</a></li>
|
||||
</ul>
|
||||
<div id="google-refine-version"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -31,64 +31,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
function onClickUploadFileButton(evt) {
|
||||
var projectName = $("#project-name-input")[0].value;
|
||||
var dataURL = $.trim($("#project-url-input")[0].value);
|
||||
if (! $.trim(projectName).length) {
|
||||
window.alert("You must specify a project name.");
|
||||
var GoogleRefineVersion;
|
||||
|
||||
} else if ($("#project-file-input")[0].files.length === 0 && ! dataURL.length) {
|
||||
window.alert("You must specify a data file to upload or a URL to retrieve.");
|
||||
var Refine = {
|
||||
actionAreas: []
|
||||
};
|
||||
|
||||
} else {
|
||||
$("#file-upload-form").attr("action",
|
||||
"/command/core/create-project-from-upload?" + [
|
||||
"url=" + escape(dataURL),
|
||||
"split-into-columns=" + $("#split-into-columns-input")[0].checked,
|
||||
"separator=" + $("#separator-input")[0].value,
|
||||
"ignore=" + $("#ignore-input")[0].value,
|
||||
"header-lines=" + $("#header-lines-input")[0].value,
|
||||
"skip=" + $("#skip-input")[0].value,
|
||||
"limit=" + $("#limit-input")[0].value,
|
||||
"guess-value-type=" + $("#guess-value-type-input")[0].checked,
|
||||
"ignore-quotes=" + $("#ignore-quotes-input")[0].checked
|
||||
].join("&"));
|
||||
Refine.selectActionArea = function(id) {
|
||||
$('.action-area-tab').removeClass('selected');
|
||||
$('.action-area-tab-body').css('visibility', 'hidden').css('z-index', '100');
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
evt.preventDefault();
|
||||
return false;
|
||||
}
|
||||
|
||||
function formatDate(d) {
|
||||
var d = new Date(d);
|
||||
var last_year = Date.today().add({ years: -1 });
|
||||
var last_month = Date.today().add({ months: -1 });
|
||||
var last_week = Date.today().add({ days: -7 });
|
||||
var today = Date.today();
|
||||
var tomorrow = Date.today().add({ days: 1 });
|
||||
|
||||
if (d > today) {
|
||||
return "today " + d.toString("h:mm tt");
|
||||
} else if (d.between(last_week, today)) {
|
||||
var diff = Math.floor(today.getDayOfYear() - d.getDayOfYear());
|
||||
return (diff <= 1) ? ("yesterday " + d.toString("h:mm tt")) : (diff + " days ago");
|
||||
} else if (d.between(last_month, today)) {
|
||||
var diff = Math.floor((today.getDayOfYear() - d.getDayOfYear()) / 7);
|
||||
if (diff < 0) {diff += 52;}
|
||||
return (diff == 1) ? "a week ago" : diff.toFixed(0) + " weeks ago" ;
|
||||
} else if (d.between(last_year, today)) {
|
||||
var diff = Math.floor(today.getMonth() - d.getMonth());
|
||||
if (diff < 0) {diff += 12;}
|
||||
return (diff == 1) ? "a month ago" : diff + " months ago";
|
||||
} else {
|
||||
var diff = Math.floor(today.getYear() - d.getYear());
|
||||
return (diff == 1) ? "a year ago" : diff + " years ago";
|
||||
for (var i = 0; i < Refine.actionAreas.length; i++) {
|
||||
var actionArea = Refine.actionAreas[i];
|
||||
if (id == actionArea.id) {
|
||||
actionArea.tabElmt.addClass('selected');
|
||||
actionArea.bodyElmt.css('visibility', 'visible').css('z-index', '110');;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
function isThereNewRelease() {
|
||||
$(function() {
|
||||
var isThereNewRelease = function() {
|
||||
var thisRevision = GoogleRefineVersion.revision;
|
||||
|
||||
var revision_pattern = /r([0-9]+)/;
|
||||
@ -103,150 +66,9 @@ function isThereNewRelease() {
|
||||
var latestRev = parseInt(revision_pattern.exec(GoogleRefineReleases.releases[0].revision)[1],10);
|
||||
|
||||
return latestRev > thisRev;
|
||||
}
|
||||
|
||||
function fetchProjects() {
|
||||
$.getJSON(
|
||||
"/command/core/get-all-project-metadata",
|
||||
null,
|
||||
function(data) {
|
||||
renderProjects(data);
|
||||
},
|
||||
"json"
|
||||
);
|
||||
}
|
||||
|
||||
function renderProjects(data) {
|
||||
var projects = [];
|
||||
for (var n in data.projects) {
|
||||
if (data.projects.hasOwnProperty(n)) {
|
||||
var project = data.projects[n];
|
||||
project.id = n;
|
||||
project.date = Date.parseExact(project.modified, "yyyy-MM-ddTHH:mm:ssZ");
|
||||
projects.push(project);
|
||||
}
|
||||
}
|
||||
projects.sort(function(a, b) { return b.date.getTime() - a.date.getTime(); });
|
||||
|
||||
var container = $("#projects-container").empty();
|
||||
if (!projects.length) {
|
||||
$("#no-project-message").clone().show().appendTo(container);
|
||||
} else {
|
||||
var table = $(
|
||||
'<table class="list-table"><tr>' +
|
||||
'<th>Name</th>' +
|
||||
'<th></th>' +
|
||||
'<th></th>' +
|
||||
'<th align="right">Last modified</th>' +
|
||||
'</tr></table>'
|
||||
).appendTo(container)[0];
|
||||
|
||||
var renderProject = function(project) {
|
||||
var tr = table.insertRow(table.rows.length);
|
||||
tr.className = "project";
|
||||
|
||||
var nameLink = $('<a></a>')
|
||||
.addClass("list-table-itemname")
|
||||
.text(project.name)
|
||||
.attr("href", "/project?project=" + project.id)
|
||||
.appendTo(tr.insertCell(tr.cells.length));
|
||||
|
||||
var renameLink = $('<a></a>')
|
||||
.text("rename")
|
||||
.addClass("secondary")
|
||||
.attr("href", "javascript:{}")
|
||||
.css("visibility", "hidden")
|
||||
.click(function() {
|
||||
var name = window.prompt("New project name:", project.name);
|
||||
if (name == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
name = $.trim(name);
|
||||
if (project.name == name || name.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/command/core/rename-project",
|
||||
data: { "project" : project.id, "name" : name },
|
||||
dataType: "json",
|
||||
success: function (data) {
|
||||
if (data && typeof data.code != 'undefined' && data.code == "ok") {
|
||||
nameLink.text(name);
|
||||
} else {
|
||||
alert("Failed to rename project: " + data.message);
|
||||
}
|
||||
}
|
||||
});
|
||||
}).appendTo(tr.insertCell(tr.cells.length));
|
||||
|
||||
var deleteLink = $('<a></a>')
|
||||
.addClass("delete-project")
|
||||
.attr("title","Delete this project")
|
||||
.attr("href","")
|
||||
.css("visibility", "hidden")
|
||||
.html("<img src='/images/close.png' />")
|
||||
.click(function() {
|
||||
if (window.confirm("Are you sure you want to delete project \"" + project.name + "\"?")) {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/command/core/delete-project",
|
||||
data: { "project" : project.id },
|
||||
dataType: "json",
|
||||
success: function (data) {
|
||||
if (data && typeof data.code != 'undefined' && data.code == "ok") {
|
||||
fetchProjects();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
return false;
|
||||
}).appendTo(tr.insertCell(tr.cells.length));
|
||||
|
||||
|
||||
$('<div></div>')
|
||||
.html(formatDate(project.date))
|
||||
.addClass("last-modified")
|
||||
.attr("title", project.date.toString())
|
||||
.appendTo(tr.insertCell(tr.cells.length));
|
||||
|
||||
$(tr).mouseenter(function() {
|
||||
renameLink.css("visibility", "visible");
|
||||
deleteLink.css("visibility", "visible");
|
||||
}).mouseleave(function() {
|
||||
renameLink.css("visibility", "hidden");
|
||||
deleteLink.css("visibility", "hidden");
|
||||
});
|
||||
};
|
||||
|
||||
for (var i = 0; i < projects.length; i++) {
|
||||
renderProject(projects[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function showHide(toHide, toShow) {
|
||||
$("#" + toHide).hide();
|
||||
$("#" + toShow).show();
|
||||
}
|
||||
|
||||
function openWorkspaceDir() {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/command/core/open-workspace-dir",
|
||||
dataType: "json",
|
||||
success: function (data) {
|
||||
if (data.code != "ok" && "message" in data) {
|
||||
alert(data.message);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
var GoogleRefineVersion;
|
||||
function showVersion() {
|
||||
var showVersion = function() {
|
||||
$.getJSON(
|
||||
"/command/core/get-version",
|
||||
null,
|
||||
@ -264,10 +86,10 @@ function showVersion() {
|
||||
if ("releases" in window) {
|
||||
if (isThereNewRelease()) {
|
||||
var container = $('<div id="notification-container">')
|
||||
.appendTo(document.body);
|
||||
.appendTo(document.body)
|
||||
var notification = $('<div id="notification">')
|
||||
.text('New version! ')
|
||||
.appendTo(container);
|
||||
.appendTo(container)
|
||||
$('<a>')
|
||||
.addClass('notification-action')
|
||||
.attr("href", releases.homepage)
|
||||
@ -281,174 +103,72 @@ function showVersion() {
|
||||
window.setTimeout(poll, 1000);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function renderImportPanel() {
|
||||
var headerContainer = $('#import-panel-tab-headers');
|
||||
var bodyContainer = $('#import-panel-tab-bodies');
|
||||
|
||||
var selectImportSourceTab = function(importSource) {
|
||||
$('.import-panel-tab-body').hide();
|
||||
$('.import-panel-tab-header').removeClass('selected');
|
||||
|
||||
importSource._divBody.show();
|
||||
importSource._divHeader.addClass('selected');
|
||||
importSource._ui.focus();
|
||||
};
|
||||
|
||||
var createImportSourceTab = function(importSource) {
|
||||
importSource._divBody = $('<div>')
|
||||
.addClass('import-panel-tab-body')
|
||||
.appendTo(bodyContainer)
|
||||
.hide();
|
||||
var resize = function() {
|
||||
var leftPanelWidth = 150;
|
||||
// px
|
||||
var width = $(window).width();
|
||||
var height = $(window).height();
|
||||
var headerHeight = $('#header').outerHeight();
|
||||
var panelHeight = height - headerHeight;
|
||||
|
||||
importSource._divHeader = $('<div>')
|
||||
.addClass('import-panel-tab-header')
|
||||
.text(importSource.label)
|
||||
.appendTo(headerContainer)
|
||||
.click(function() { selectImportSourceTab(importSource); });
|
||||
$('.main-layout-panel')
|
||||
.css("top", headerHeight + "px")
|
||||
.css("bottom", "0px")
|
||||
.css("height", panelHeight + "px")
|
||||
.css("visibility", "visible");
|
||||
|
||||
importSource._ui = new importSource.ui(importSource._divBody);
|
||||
$('#left-panel')
|
||||
.css("left", "0px")
|
||||
.css("width", leftPanelWidth + "px");
|
||||
var leftPanelBodyHPaddings = 10;
|
||||
// px
|
||||
var leftPanelBodyVPaddings = 0;
|
||||
// px
|
||||
$('#left-panel-body')
|
||||
.css("margin-left", leftPanelBodyHPaddings + "px")
|
||||
.css("margin-top", leftPanelBodyVPaddings + "px")
|
||||
.css("width", ($('#left-panel').width() - leftPanelBodyHPaddings) + "px")
|
||||
.css("height", ($('#left-panel').height() - leftPanelBodyVPaddings) + "px");
|
||||
|
||||
$('#right-panel')
|
||||
.css("left", leftPanelWidth + "px")
|
||||
.css("width", (width - leftPanelWidth) + "px");
|
||||
|
||||
var rightPanelBodyHPaddings = 5;
|
||||
// px
|
||||
var rightPanelBodyVPaddings = 5;
|
||||
// px
|
||||
$('#right-panel-body')
|
||||
.css("margin-left", rightPanelBodyHPaddings + "px")
|
||||
.css("margin-top", rightPanelBodyVPaddings + "px")
|
||||
.css("width", ($('#right-panel').width() - rightPanelBodyHPaddings) + "px")
|
||||
.css("height", ($('#right-panel').height() - rightPanelBodyVPaddings) + "px");
|
||||
};
|
||||
$(window).bind("resize", resize);
|
||||
window.setTimeout(resize, 50); // for Chrome, give the window some time to layout first
|
||||
|
||||
var renderActionArea = function(actionArea) {
|
||||
actionArea.bodyElmt = $('<div>')
|
||||
.addClass('action-area-tab-body')
|
||||
.appendTo('#right-panel-body');
|
||||
|
||||
actionArea.tabElmt = $('<li>')
|
||||
.addClass('action-area-tab')
|
||||
.text(actionArea.label)
|
||||
.appendTo($('#action-area-tabs'))
|
||||
.click(function() {
|
||||
Refine.selectActionArea(actionArea.id);
|
||||
});
|
||||
|
||||
actionArea.ui = new actionArea.uiClass(actionArea.bodyElmt);
|
||||
};
|
||||
|
||||
for (var i= 0; i < ImportSources.length; i++) {
|
||||
createImportSourceTab(ImportSources[i]);
|
||||
for (var i = 0; i < Refine.actionAreas.length; i++) {
|
||||
renderActionArea(Refine.actionAreas[i]);
|
||||
}
|
||||
selectImportSourceTab(ImportSources[0]);
|
||||
}
|
||||
|
||||
function startImportJob(importSource, form, progressMessage) {
|
||||
$.post(
|
||||
"/command/core/create-import-job",
|
||||
null,
|
||||
function(data) {
|
||||
var jobID = data.jobID;
|
||||
|
||||
form.attr("method", "post")
|
||||
.attr("enctype", "multipart/form-data")
|
||||
.attr("accept-charset", "UTF-8")
|
||||
.attr("target", "import-iframe")
|
||||
.attr("action", "/command/core/retrieve-import-content?" + $.param({
|
||||
"jobID" : jobID,
|
||||
"source" : importSource
|
||||
}));
|
||||
|
||||
form[0].submit();
|
||||
|
||||
var start = new Date();
|
||||
var timerID = window.setInterval(function() { pollImportJob(start, jobID, timerID); }, 1000);
|
||||
initializeImportProgressPanel(progressMessage, jobID, timerID);
|
||||
},
|
||||
"json"
|
||||
);
|
||||
}
|
||||
|
||||
function initializeImportProgressPanel(progressMessage, jobID, timerID) {
|
||||
$('#import-progress-message').text(progressMessage);
|
||||
$('#import-progress-bar-body').css("width", "0%");
|
||||
$('#import-progress-message-left').text('Starting');
|
||||
$('#import-progress-message-center').empty();
|
||||
$('#import-progress-message-right').empty();
|
||||
|
||||
$('#import-panel').hide();
|
||||
$('#import-progress-panel').show();
|
||||
|
||||
$('#import-progress-cancel-button').unbind().click(function() {
|
||||
$('#import-panel').show();
|
||||
$('#import-progress-panel').hide();
|
||||
|
||||
// stop the iframe
|
||||
$('#import-iframe')[0].contentWindow.stop();
|
||||
|
||||
// stop the timed polling
|
||||
window.clearInterval(timerID);
|
||||
|
||||
// explicitly cancel the import job
|
||||
$.post("/command/core/cancel-import-job?" + $.param({ "jobID" : jobID }));
|
||||
});
|
||||
}
|
||||
|
||||
function bytesToString(b) {
|
||||
if (b >= 1024 * 1024) {
|
||||
return Math.round(b / (1024 * 1024)) + " MB";
|
||||
} else if (b >= 1024) {
|
||||
return Math.round(b / 1024) + " KB";
|
||||
} else {
|
||||
return b + " bytes";
|
||||
}
|
||||
}
|
||||
|
||||
function pollImportJob(start, jobID, timerID) {
|
||||
$.post(
|
||||
"/command/core/get-import-job-status?" + $.param({ "jobID" : jobID }),
|
||||
null,
|
||||
function(data) {
|
||||
if (data.code == "error") {
|
||||
showImportJobError(data.message);
|
||||
window.clearInterval(timerID);
|
||||
} else if (data.state == "error") {
|
||||
showImportJobError(data.message, data.stack);
|
||||
window.clearInterval(timerID);
|
||||
} else if (data.state == "retrieving") {
|
||||
if (data.progress < 0) {
|
||||
$('#import-progress-message-left').text(bytesToString(data.bytesSaved) + " saved");
|
||||
} else {
|
||||
$('#import-progress-bar-body').css("width", data.progress + "%");
|
||||
$('#import-progress-message-left').text(data.progress + "% saved");
|
||||
}
|
||||
} else if (data.state == "ready") {
|
||||
window.clearInterval(timerID);
|
||||
|
||||
// Just so if the user clicks Back the progress panel won't be showing if the DOM is cached.
|
||||
$('#import-progress-panel').hide();
|
||||
$('#import-panel').show();
|
||||
|
||||
window.location = "/import?" + $.param({ "jobID" : jobID });
|
||||
}
|
||||
},
|
||||
"json"
|
||||
);
|
||||
}
|
||||
|
||||
function showImportJobError(message, stack) {
|
||||
$('#import-error-message').text(message);
|
||||
$('#import-error-stack').text(stack || 'No technical details.');
|
||||
|
||||
$('#import-progress-panel').hide();
|
||||
$('#import-error-panel').show();
|
||||
|
||||
$('#import-error-ok-button').unbind().click(function() {
|
||||
$('#import-error-panel').hide();
|
||||
$('#import-panel').show();
|
||||
});
|
||||
}
|
||||
|
||||
function onLoad() {
|
||||
renderImportPanel();
|
||||
|
||||
fetchProjects();
|
||||
|
||||
$("#project-file-input").change(function() {
|
||||
if ($("#project-name-input")[0].value.length == 0) {
|
||||
var fileName = this.files[0].fileName;
|
||||
if (fileName) {
|
||||
$("#project-name-input")[0].value = fileName.replace(/\.\w+/, "").replace(/[_-]/g, " ");
|
||||
}
|
||||
$("#project-name-input").focus().select();
|
||||
}
|
||||
}).keypress(function(evt) {
|
||||
if (evt.keyCode == 13) {
|
||||
onClickUploadFileButton();
|
||||
}
|
||||
});
|
||||
|
||||
$("#upload-file-button").click(onClickUploadFileButton);
|
||||
$("#more-options-link").click(function() {
|
||||
$("#more-options-controls").hide();
|
||||
$("#more-options").show();
|
||||
});
|
||||
Refine.selectActionArea('create-project');
|
||||
|
||||
showVersion();
|
||||
}
|
||||
|
||||
$(onLoad);
|
||||
});
|
||||
|
@ -0,0 +1,17 @@
|
||||
<div id="create-project-ui-source-selection" class="relative-frame"><table id="create-project-ui-source-selection-layout">
|
||||
<tr>
|
||||
<td colspan="2" id="create-project-ui-source-selection-message">
|
||||
<h3>Create a project by importing data. What kinds of data files can I import?</h3>
|
||||
<div>TSV, CSV, *SV, Excel (.xls and .xlsx), JSON, XML, RDF as XML, and
|
||||
Google Spreadsheets are all supported. Support for other formats can
|
||||
be added with Refine extensions.
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td id="create-project-ui-source-selection-tabs">
|
||||
<div>Get data from</div>
|
||||
</td>
|
||||
<td id="create-project-ui-source-selection-tab-bodies"></td>
|
||||
</tr>
|
||||
</table></div>
|
145
main/webapp/modules/core/scripts/index/create-project-ui.js
Normal file
145
main/webapp/modules/core/scripts/index/create-project-ui.js
Normal file
@ -0,0 +1,145 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
Refine.CreateProjectUI = function(elmt) {
|
||||
var self = this;
|
||||
|
||||
this._elmt = elmt;
|
||||
this._sourceSelectionUIs = [];
|
||||
this._customPanels = [];
|
||||
this._controllers = [];
|
||||
|
||||
$.post(
|
||||
"/command/core/get-importing-configuration",
|
||||
null,
|
||||
function(data) {
|
||||
Refine.importingConfig = data.config;
|
||||
self._initializeUI();
|
||||
},
|
||||
"json"
|
||||
);
|
||||
};
|
||||
|
||||
Refine.CreateProjectUI.controllers = [];
|
||||
|
||||
Refine.CreateProjectUI.prototype._initializeUI = function() {
|
||||
this._sourceSelectionElmt =
|
||||
$(DOM.loadHTML("core", "scripts/index/create-project-ui-source-selection.html")).appendTo(this._elmt);
|
||||
|
||||
this._sourceSelectionElmts = DOM.bind(this._sourceSelectionElmt);
|
||||
|
||||
for (var i = 0; i < Refine.CreateProjectUI.controllers.length; i++) {
|
||||
this._controllers.push(new Refine.CreateProjectUI.controllers[i](this));
|
||||
}
|
||||
};
|
||||
|
||||
Refine.CreateProjectUI.prototype.addSourceSelectionUI = function(sourceSelectionUI) {
|
||||
var self = this;
|
||||
|
||||
var headerContainer = $('#create-project-ui-source-selection-tabs');
|
||||
var bodyContainer = $('#create-project-ui-source-selection-tab-bodies');
|
||||
|
||||
sourceSelectionUI._divBody = $('<div>')
|
||||
.addClass('create-project-ui-source-selection-tab-body')
|
||||
.appendTo(bodyContainer)
|
||||
.hide();
|
||||
|
||||
sourceSelectionUI._divHeader = $('<div>')
|
||||
.addClass('create-project-ui-source-selection-tab')
|
||||
.text(sourceSelectionUI.label)
|
||||
.appendTo(headerContainer)
|
||||
.click(function() { self.selectImportSource(sourceSelectionUI.id); });
|
||||
|
||||
sourceSelectionUI.ui.attachUI(sourceSelectionUI._divBody);
|
||||
|
||||
this._sourceSelectionUIs.push(sourceSelectionUI);
|
||||
|
||||
if (this._sourceSelectionUIs.length == 1) {
|
||||
self.selectImportSource(sourceSelectionUI.id);
|
||||
}
|
||||
};
|
||||
|
||||
Refine.CreateProjectUI.prototype.selectImportSource = function(id) {
|
||||
for (var i = 0; i < this._sourceSelectionUIs.length; i++) {
|
||||
var sourceSelectionUI = this._sourceSelectionUIs[i];
|
||||
if (sourceSelectionUI.id == id) {
|
||||
$('.create-project-ui-source-selection-tab-body').hide();
|
||||
$('.create-project-ui-source-selection-tab').removeClass('selected');
|
||||
|
||||
sourceSelectionUI._divBody.show();
|
||||
sourceSelectionUI._divHeader.addClass('selected');
|
||||
|
||||
sourceSelectionUI.ui.focus();
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Refine.CreateProjectUI.prototype.addCustomPanel = function() {
|
||||
var div = $('<div>')
|
||||
.addClass('create-project-ui-panel')
|
||||
.appendTo(this._elmt);
|
||||
|
||||
var innerDiv = $('<div>')
|
||||
.addClass('relative-frame')
|
||||
.appendTo(div);
|
||||
|
||||
this._customPanels.push(div);
|
||||
|
||||
return innerDiv;
|
||||
};
|
||||
|
||||
Refine.CreateProjectUI.prototype.showCustomPanel = function(div) {
|
||||
var parent = div.parent();
|
||||
for (var i = 0; i < this._customPanels.length; i++) {
|
||||
var panel = this._customPanels[i];
|
||||
if (panel[0] === parent[0]) {
|
||||
$('.create-project-ui-panel').css('visibility', 'hidden');
|
||||
this._sourceSelectionElmt.css('visibility', 'hidden');
|
||||
panel.css('visibility', 'visible');
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Refine.CreateProjectUI.prototype.showSourceSelectionPanel = function() {
|
||||
$('.create-project-ui-panel').css('visibility', 'hidden');
|
||||
this._sourceSelectionElmt.css('visibility', 'visible');
|
||||
};
|
||||
|
||||
Refine.actionAreas.push({
|
||||
id: "create-project",
|
||||
label: "Create Project",
|
||||
uiClass: Refine.CreateProjectUI
|
||||
});
|
@ -0,0 +1,395 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
Refine.DefaultImportingController = function(createProjectUI) {
|
||||
this._createProjectUI = createProjectUI;
|
||||
|
||||
this._progressPanel = createProjectUI.addCustomPanel();
|
||||
this._progressPanel.html(DOM.loadHTML("core", "scripts/index/default-importing-controller/progress-panel.html"));
|
||||
|
||||
this._errorPanel = createProjectUI.addCustomPanel();
|
||||
this._errorPanel.html(DOM.loadHTML("core", "scripts/index/default-importing-controller/error-panel.html"));
|
||||
|
||||
this._fileSelectionPanel = createProjectUI.addCustomPanel();
|
||||
this._parsingPanel = createProjectUI.addCustomPanel();
|
||||
|
||||
for (var i = 0; i < Refine.DefaultImportingController.sources.length; i++) {
|
||||
var sourceSelectionUI = Refine.DefaultImportingController.sources[i];
|
||||
sourceSelectionUI.ui = new sourceSelectionUI.uiClass(this);
|
||||
|
||||
createProjectUI.addSourceSelectionUI(sourceSelectionUI);
|
||||
}
|
||||
};
|
||||
Refine.CreateProjectUI.controllers.push(Refine.DefaultImportingController);
|
||||
|
||||
Refine.DefaultImportingController.sources = [];
|
||||
Refine.DefaultImportingController.parserUIs = {};
|
||||
|
||||
Refine.DefaultImportingController.prototype._startOver = function() {
|
||||
this._disposeFileSelectionPanel();
|
||||
this._disposeFileSelectionPanel();
|
||||
|
||||
delete this._fileSelectionPanelElmts;
|
||||
delete this._parsingPanelElmts;
|
||||
|
||||
delete this._jobID;
|
||||
delete this._job;
|
||||
delete this._extensions;
|
||||
|
||||
delete this._format;
|
||||
delete this._parserOptions;
|
||||
delete this._projectName;
|
||||
|
||||
this._createProjectUI.showSourceSelectionPanel();
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype.startImportJob = function(form, progressMessage, callback) {
|
||||
var self = this;
|
||||
$.post(
|
||||
"/command/core/create-importing-job",
|
||||
null,
|
||||
function(data) {
|
||||
var jobID = self._jobID = data.jobID;
|
||||
|
||||
form.attr("method", "post")
|
||||
.attr("enctype", "multipart/form-data")
|
||||
.attr("accept-charset", "UTF-8")
|
||||
.attr("target", "default-importing-iframe")
|
||||
.attr("action", "/command/core/importing-controller?" + $.param({
|
||||
"controller": "core/default-importing-controller",
|
||||
"jobID": jobID,
|
||||
"subCommand": "load-raw-data"
|
||||
}));
|
||||
form[0].submit();
|
||||
|
||||
var start = new Date();
|
||||
var timerID = window.setInterval(
|
||||
function() {
|
||||
self._pollImportJob(
|
||||
start, jobID, timerID,
|
||||
function(job) {
|
||||
return job.config.hasData;
|
||||
},
|
||||
function(jobID, job) {
|
||||
self._job = job;
|
||||
self._onImportJobReady();
|
||||
if (callback) {
|
||||
callback(jobID, job);
|
||||
}
|
||||
}
|
||||
);
|
||||
},
|
||||
1000
|
||||
);
|
||||
self._initializeImportProgressPanel(progressMessage, function() {
|
||||
// stop the iframe
|
||||
$('#default-importing-iframe')[0].contentWindow.stop();
|
||||
|
||||
// stop the timed polling
|
||||
window.clearInterval(timerID);
|
||||
|
||||
// explicitly cancel the import job
|
||||
$.post("/command/core/cancel-importing-job?" + $.param({ "jobID": jobID }));
|
||||
|
||||
self._createProjectUI.showSourceSelectionPanel();
|
||||
});
|
||||
},
|
||||
"json"
|
||||
);
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._initializeImportProgressPanel = function(progressMessage, onCancel) {
|
||||
var self = this;
|
||||
|
||||
this._createProjectUI.showCustomPanel(this._progressPanel);
|
||||
|
||||
$('#default-importing-progress-message').text(progressMessage);
|
||||
$('#default-importing-progress-bar-body').css("width", "0%");
|
||||
$('#default-importing-progress-message-left').text('Starting');
|
||||
$('#default-importing-progress-message-center').empty();
|
||||
$('#default-importing-progress-message-right').empty();
|
||||
$('#default-importing-progress-timing').empty();
|
||||
|
||||
$('#default-importing-progress-cancel-button').unbind().click(onCancel);
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._pollImportJob = function(start, jobID, timerID, checkDone, callback) {
|
||||
var self = this;
|
||||
$.post(
|
||||
"/command/core/get-importing-job-status?" + $.param({ "jobID": jobID }),
|
||||
null,
|
||||
function(data) {
|
||||
if (!(data)) {
|
||||
self._showImportJobError("Unknown error");
|
||||
window.clearInterval(timerID);
|
||||
return;
|
||||
} else if (data.code == "error" || !("job" in data)) {
|
||||
self._showImportJobError(data.message || "Unknown error");
|
||||
window.clearInterval(timerID);
|
||||
return;
|
||||
}
|
||||
|
||||
var job = data.job;
|
||||
if (checkDone(job)) {
|
||||
$('#default-importing-progress-message').text('Done.');
|
||||
|
||||
window.clearInterval(timerID);
|
||||
if (callback) {
|
||||
callback(jobID, job);
|
||||
}
|
||||
} else {
|
||||
var progress = job.config.progress;
|
||||
if (progress.percent > 0) {
|
||||
var secondsSpent = (new Date().getTime() - start.getTime()) / 1000;
|
||||
var secondsRemaining = (100 / progress.percent) * secondsSpent - secondsSpent;
|
||||
|
||||
$('#default-importing-progress-bar-body')
|
||||
.removeClass('indefinite')
|
||||
.css("width", progress.percent + "%");
|
||||
|
||||
if (secondsRemaining > 1) {
|
||||
if (secondsRemaining > 60) {
|
||||
$('#default-importing-progress-timing').text(
|
||||
Math.ceil(secondsRemaining / 60) + " minutes remaining");
|
||||
} else {
|
||||
$('#default-importing-progress-timing').text(
|
||||
Math.ceil(secondsRemaining) + " seconds remaining");
|
||||
}
|
||||
} else {
|
||||
$('#default-importing-progress-timing').text('almost done ...');
|
||||
}
|
||||
} else {
|
||||
$('#default-importing-progress-bar-body').addClass('indefinite');
|
||||
$('#default-importing-progress-timing').empty();
|
||||
}
|
||||
$('#default-importing-progress-message').text(progress.message);
|
||||
}
|
||||
},
|
||||
"json"
|
||||
);
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._showImportJobError = function(message, stack) {
|
||||
var self = this;
|
||||
|
||||
$('#default-importing-error-message').text(message);
|
||||
$('#default-importing-error-stack').text(stack || 'No technical details.');
|
||||
|
||||
this._createProjectUI.showCustomPanel(this._errorPanel);
|
||||
$('#default-importing-error-ok-button').unbind().click(function() {
|
||||
self._createProjectUI.showSourceSelectionPanel();
|
||||
});
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._onImportJobReady = function() {
|
||||
this._prepareData();
|
||||
if (this._job.config.retrievalRecord.files.length > 1) {
|
||||
this._showFileSelectionPanel();
|
||||
} else {
|
||||
this._showParsingPanel(false);
|
||||
}
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._prepareData = function() {
|
||||
var extensionMap = {};
|
||||
var extensionList = [];
|
||||
|
||||
var files = this._job.config.retrievalRecord.files;
|
||||
var fileSelection = this._job.config.fileSelection;
|
||||
for (var i = 0; i < files.length; i++) {
|
||||
var file = files[i];
|
||||
file.selected = false;
|
||||
|
||||
var slash = file.fileName.lastIndexOf('/');
|
||||
var dot = file.fileName.lastIndexOf('.');
|
||||
if (dot > slash + 1) {
|
||||
var extension = file.fileName.substring(dot);
|
||||
if (extension in extensionMap) {
|
||||
extensionMap[extension].count++;
|
||||
} else {
|
||||
extensionMap[extension] = { extension: extension, count: 1 };
|
||||
extensionList.push(extensionMap[extension]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (var i = 0; i < fileSelection.length; i++) {
|
||||
files[fileSelection[i]].selected = true;
|
||||
}
|
||||
|
||||
extensionList.sort(function(a, b) {
|
||||
return b.count - a.count;
|
||||
});
|
||||
this._extensions = extensionList;
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._ensureFormatParserUIHasInitializationData = function(format, onDone) {
|
||||
if (!(format in this._parserOptions)) {
|
||||
var self = this;
|
||||
var dismissBusy = DialogSystem.showBusy("Inspecting selected files ...");
|
||||
$.post(
|
||||
"/command/core/importing-controller?" + $.param({
|
||||
"controller": "core/default-importing-controller",
|
||||
"jobID": this._jobID,
|
||||
"subCommand": "initialize-parser-ui",
|
||||
"format": format
|
||||
}),
|
||||
null,
|
||||
function(data) {
|
||||
dismissBusy();
|
||||
|
||||
if (data.options) {
|
||||
self._parserOptions[format] = data.options;
|
||||
onDone();
|
||||
}
|
||||
},
|
||||
"json"
|
||||
);
|
||||
} else {
|
||||
onDone();
|
||||
}
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype.updateFormatAndOptions = function(options, callback) {
|
||||
var self = this;
|
||||
$.post(
|
||||
"/command/core/importing-controller?" + $.param({
|
||||
"controller": "core/default-importing-controller",
|
||||
"jobID": this._jobID,
|
||||
"subCommand": "update-format-and-options"
|
||||
}),
|
||||
{
|
||||
"format" : this._format,
|
||||
"options" : JSON.stringify(options)
|
||||
},
|
||||
callback,
|
||||
"json"
|
||||
);
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype.getPreviewData = function(callback, numRows) {
|
||||
var self = this;
|
||||
var result = {};
|
||||
|
||||
$.post(
|
||||
"/command/core/get-models?" + $.param({ "importingJobID" : this._jobID }),
|
||||
null,
|
||||
function(data) {
|
||||
for (var n in data) {
|
||||
if (data.hasOwnProperty(n)) {
|
||||
result[n] = data[n];
|
||||
}
|
||||
}
|
||||
|
||||
$.post(
|
||||
"/command/core/get-rows?" + $.param({
|
||||
"importingJobID" : self._jobID,
|
||||
"start" : 0,
|
||||
"limit" : numRows || 100 // More than we parse for preview anyway
|
||||
}),
|
||||
null,
|
||||
function(data) {
|
||||
// Un-pool objects
|
||||
for (var r = 0; r < data.rows.length; r++) {
|
||||
var row = data.rows[r];
|
||||
for (var c = 0; c < row.cells.length; c++) {
|
||||
var cell = row.cells[c];
|
||||
if ((cell) && ("r" in cell)) {
|
||||
cell.r = data.pool.recons[cell.r];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.rowModel = data;
|
||||
callback(result);
|
||||
},
|
||||
"jsonp"
|
||||
);
|
||||
},
|
||||
"json"
|
||||
);
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._createProject = function() {
|
||||
if ((this._formatParserUI) && this._formatParserUI.confirmReadyToCreateProject()) {
|
||||
var projectName = $.trim(this._parsingPanelElmts.projectNameInput[0].value);
|
||||
if (projectName.length == 0) {
|
||||
window.alert("Please name the project.");
|
||||
this._parsingPanelElmts.focus();
|
||||
return;
|
||||
}
|
||||
|
||||
var self = this;
|
||||
var options = this._formatParserUI.getOptions();
|
||||
options.projectName = projectName;
|
||||
$.post(
|
||||
"/command/core/importing-controller?" + $.param({
|
||||
"controller": "core/default-importing-controller",
|
||||
"jobID": this._jobID,
|
||||
"subCommand": "create-project"
|
||||
}),
|
||||
{
|
||||
"format" : this._format,
|
||||
"options" : JSON.stringify(options)
|
||||
},
|
||||
function() {
|
||||
var start = new Date();
|
||||
var timerID = window.setInterval(
|
||||
function() {
|
||||
self._pollImportJob(
|
||||
start,
|
||||
self._jobID,
|
||||
timerID,
|
||||
function(job) {
|
||||
return "projectID" in job.config;
|
||||
},
|
||||
function(jobID, job) {
|
||||
document.location = "project?project=" + job.config.projectID;
|
||||
}
|
||||
);
|
||||
},
|
||||
1000
|
||||
);
|
||||
self._initializeImportProgressPanel("Creating project ...", function() {
|
||||
// stop the timed polling
|
||||
window.clearInterval(timerID);
|
||||
|
||||
// explicitly cancel the import job
|
||||
$.post("/command/core/cancel-importing-job?" + $.param({ "jobID": jobID }));
|
||||
|
||||
self._createProjectUI.showSourceSelectionPanel();
|
||||
});
|
||||
},
|
||||
"json"
|
||||
);
|
||||
}
|
||||
};
|
@ -0,0 +1,5 @@
|
||||
<div id="default-importing-error-panel"><div class="grid-layout layout-normal layout-full"><table>
|
||||
<tr><td id="default-importing-error-message"></td></tr>
|
||||
<tr><td id="default-importing-error-stack"></td></tr>
|
||||
<tr><td><button class="button button-primary" id="default-importing-error-ok-button">OK</button></td></tr>
|
||||
</table></div></div>
|
@ -0,0 +1,36 @@
|
||||
<div bind="wizardHeader" class="default-importing-wizard-header"><div class="grid-layout layout-tightest layout-full"><table><tr>
|
||||
<td width="1%"><button bind="startOverButton" class="button">« Start Over</button></td>
|
||||
<td width="98%">Select Files to Import</td>
|
||||
<td width="1%"><button bind="nextButton" class="button button-primary">Configure Parsing Opions »</button></td>
|
||||
</tr></table></div></div>
|
||||
|
||||
<div bind="controlPanel" class="default-importing-file-selection-control-panel">
|
||||
<div class="grid-layout layout-full layout-tighter"><table>
|
||||
<tr>
|
||||
<td colspan="3">
|
||||
There are several files available to import.
|
||||
Please select the desired ones.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="98%" bind="summary"></td>
|
||||
<td width="1%"><button bind="selectAllButton" class="button">Select All</button></td>
|
||||
<td width="1%"><button bind="unselectAllButton" class="button">Unselect All</button></td>
|
||||
</tr>
|
||||
</table></div>
|
||||
|
||||
<h2>Select by Extension</h2>
|
||||
<div bind="extensionContainer" class="grid-layout layout-full layout-tightest"></div>
|
||||
|
||||
<h2>Select by Regex on File Names</h2>
|
||||
<div class="grid-layout layout-full layout-tighter"><table>
|
||||
<tr><td colspan="3"><input bind="regexInput" style="width: 100%;"/></td></tr>
|
||||
<tr>
|
||||
<td bind="regexSummary"></td>
|
||||
<td width="1%"><button bind="selectRegexButton" class="button">Select</button></td>
|
||||
<td width="1%"><button bind="unselectRegexButton" class="button">Unselect</button></td>
|
||||
</tr>
|
||||
</table></div>
|
||||
</div>
|
||||
|
||||
<div bind="filePanel" class="default-importing-file-selection-file-panel"></div>
|
@ -0,0 +1,314 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
Refine.DefaultImportingController.prototype._showFileSelectionPanel = function() {
|
||||
var self = this;
|
||||
|
||||
this._prepareFileSelectionPanel();
|
||||
|
||||
this._fileSelectionPanelElmts.nextButton.click(function() {
|
||||
self._commitFileSelection();
|
||||
});
|
||||
this._renderFileSelectionPanel();
|
||||
this._createProjectUI.showCustomPanel(this._fileSelectionPanel);
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._disposeFileSelectionPanel = function() {
|
||||
if (this._fileSelectionPanelResizer) {
|
||||
$(window).unbind("resize", this._fileSelectionPanelResizer);
|
||||
}
|
||||
this._fileSelectionPanel.unbind().empty();
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._prepareFileSelectionPanel = function() {
|
||||
var self = this;
|
||||
|
||||
this._fileSelectionPanel.unbind().empty().html(
|
||||
DOM.loadHTML("core", "scripts/index/default-importing-controller/file-selection-panel.html"));
|
||||
|
||||
this._fileSelectionPanelElmts = DOM.bind(this._fileSelectionPanel);
|
||||
this._fileSelectionPanelElmts.startOverButton.click(function() {
|
||||
self._startOver();
|
||||
});
|
||||
|
||||
this._fileSelectionPanelResizer = function() {
|
||||
var elmts = self._fileSelectionPanelElmts;
|
||||
var width = self._fileSelectionPanel.width();
|
||||
var height = self._fileSelectionPanel.height();
|
||||
var headerHeight = elmts.wizardHeader.outerHeight(true);
|
||||
var controlPanelWidth = 350;
|
||||
|
||||
elmts.controlPanel
|
||||
.css("left", "0px")
|
||||
.css("top", headerHeight + "px")
|
||||
.css("width", (controlPanelWidth - DOM.getHPaddings(elmts.controlPanel)) + "px")
|
||||
.css("height", (height - headerHeight - DOM.getVPaddings(elmts.controlPanel)) + "px");
|
||||
|
||||
elmts.filePanel
|
||||
.css("left", controlPanelWidth + "px")
|
||||
.css("top", headerHeight + "px")
|
||||
.css("width", (width - controlPanelWidth - DOM.getHPaddings(elmts.filePanel)) + "px")
|
||||
.css("height", (height - headerHeight - DOM.getVPaddings(elmts.filePanel)) + "px");
|
||||
};
|
||||
|
||||
$(window).resize(this._fileSelectionPanelResizer);
|
||||
this._fileSelectionPanelResizer();
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._renderFileSelectionPanel = function() {
|
||||
this._renderFileSelectionPanelFileTable();
|
||||
this._renderFileSelectionPanelControlPanel();
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._renderFileSelectionPanelFileTable = function() {
|
||||
var self = this;
|
||||
|
||||
this._fileSelectionPanelElmts.filePanel.empty();
|
||||
|
||||
var fileTable = $('<table><tr><th></th><th>Name</th><th>Mime-type</th><th>Format</th><th>Size</th></tr></table>')
|
||||
.appendTo(this._fileSelectionPanelElmts.filePanel)[0];
|
||||
|
||||
var files = this._job.config.retrievalRecord.files;
|
||||
var renderFile = function(fileRecord, index) {
|
||||
var tr = fileTable.insertRow(fileTable.rows.length);
|
||||
$(tr).addClass(index % 2 == 0 ? 'even' : 'odd');
|
||||
|
||||
var tdSelect = $('<td>').appendTo(tr);
|
||||
var checkbox = $('<input>')
|
||||
.attr("type", "checkbox")
|
||||
.attr("index", index)
|
||||
.appendTo(tdSelect)
|
||||
.click(function() {
|
||||
files[index].selected = this.checked;
|
||||
self._updateFileSelectionSummary();
|
||||
});
|
||||
if (fileRecord.selected) {
|
||||
checkbox.attr("checked", "checked");
|
||||
}
|
||||
|
||||
$('<td>').text(fileRecord.fileName).addClass("default-importing-file-selection-filename").appendTo(tr);
|
||||
$('<td>').text(fileRecord.declaredMimeType || fileRecord.mimeType || "unknown").appendTo(tr);
|
||||
$('<td>').text(fileRecord.format || "unknown").appendTo(tr);
|
||||
$('<td>').text(fileRecord.size + " bytes").appendTo(tr);
|
||||
};
|
||||
|
||||
for (var i = 0; i < files.length; i++) {
|
||||
renderFile(files[i], i);
|
||||
}
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._renderFileSelectionPanelControlPanel = function() {
|
||||
var self = this;
|
||||
var files = this._job.config.retrievalRecord.files;
|
||||
|
||||
this._fileSelectionPanelElmts.extensionContainer.empty();
|
||||
this._fileSelectionPanelElmts.selectAllButton.unbind().click(function(evt) {
|
||||
for (var i = 0; i < files.length; i++) {
|
||||
files[i].selected = true;
|
||||
}
|
||||
self._fileSelectionPanelElmts.filePanel.find("input").attr("checked", "checked");
|
||||
self._updateFileSelectionSummary();
|
||||
});
|
||||
this._fileSelectionPanelElmts.unselectAllButton.unbind().click(function(evt) {
|
||||
for (var i = 0; i < files.length; i++) {
|
||||
files[i].selected = false;
|
||||
}
|
||||
self._fileSelectionPanelElmts.filePanel.find("input").removeAttr("checked");
|
||||
self._updateFileSelectionSummary();
|
||||
});
|
||||
|
||||
var table = $('<table></table>')
|
||||
.appendTo(this._fileSelectionPanelElmts.extensionContainer)[0];
|
||||
|
||||
var renderExtension = function(extension) {
|
||||
var tr = table.insertRow(table.rows.length);
|
||||
$('<td>').text(extension.extension).appendTo(tr);
|
||||
$('<td>').text(extension.count + (extension.count > 1 ? " files" : " file")).appendTo(tr);
|
||||
$('<button>')
|
||||
.text("Select")
|
||||
.addClass("button")
|
||||
.appendTo($('<td>').appendTo(tr))
|
||||
.click(function() {
|
||||
for (var i = 0; i < files.length; i++) {
|
||||
var file = files[i];
|
||||
if (!file.selected) {
|
||||
if (file.fileName.endsWith(extension.extension)) {
|
||||
file.selected = true;
|
||||
self._fileSelectionPanelElmts.filePanel
|
||||
.find("input[index='" + i + "']")
|
||||
.attr("checked", "checked");
|
||||
}
|
||||
}
|
||||
}
|
||||
self._updateFileSelectionSummary();
|
||||
});
|
||||
$('<button>')
|
||||
.text("Unselect")
|
||||
.addClass("button")
|
||||
.appendTo($('<td>').appendTo(tr))
|
||||
.click(function() {
|
||||
for (var i = 0; i < files.length; i++) {
|
||||
var file = files[i];
|
||||
if (file.selected) {
|
||||
if (file.fileName.endsWith(extension.extension)) {
|
||||
file.selected = false;
|
||||
self._fileSelectionPanelElmts.filePanel
|
||||
.find("input[index='" + i + "']")
|
||||
.removeAttr("checked");
|
||||
}
|
||||
}
|
||||
}
|
||||
self._updateFileSelectionSummary();
|
||||
});
|
||||
};
|
||||
for (var i = 0; i < this._extensions.length; i++) {
|
||||
renderExtension(this._extensions[i]);
|
||||
}
|
||||
|
||||
this._updateFileSelectionSummary();
|
||||
|
||||
this._fileSelectionPanelElmts.regexInput.unbind().keyup(function() {
|
||||
var count = 0;
|
||||
var elmts = self._fileSelectionPanelElmts.filePanel
|
||||
.find(".default-importing-file-selection-filename")
|
||||
.removeClass("highlighted");
|
||||
try {
|
||||
var regex = new RegExp(this.value);
|
||||
elmts.each(function() {
|
||||
if (regex.test($(this).text())) {
|
||||
$(this).addClass("highlighted");
|
||||
count++;
|
||||
}
|
||||
});
|
||||
} catch (e) {
|
||||
// Ignore
|
||||
}
|
||||
self._fileSelectionPanelElmts.regexSummary.text(count + (count == 1 ? " match" : " matches"));
|
||||
});
|
||||
this._fileSelectionPanelElmts.selectRegexButton.unbind().click(function() {
|
||||
self._fileSelectionPanelElmts.filePanel
|
||||
.find(".default-importing-file-selection-filename")
|
||||
.removeClass("highlighted");
|
||||
try {
|
||||
var regex = new RegExp(self._fileSelectionPanelElmts.regexInput[0].value);
|
||||
for (var i = 0; i < files.length; i++) {
|
||||
var file = files[i];
|
||||
if (!file.selected) {
|
||||
if (regex.test(file.fileName)) {
|
||||
file.selected = true;
|
||||
self._fileSelectionPanelElmts.filePanel
|
||||
.find("input[index='" + i + "']")
|
||||
.attr("checked", "checked");
|
||||
}
|
||||
}
|
||||
}
|
||||
self._updateFileSelectionSummary();
|
||||
} catch (e) {
|
||||
// Ignore
|
||||
}
|
||||
});
|
||||
this._fileSelectionPanelElmts.unselectRegexButton.unbind().click(function() {
|
||||
self._fileSelectionPanelElmts.filePanel
|
||||
.find(".default-importing-file-selection-filename")
|
||||
.removeClass("highlighted");
|
||||
try {
|
||||
var regex = new RegExp(self._fileSelectionPanelElmts.regexInput[0].value);
|
||||
for (var i = 0; i < files.length; i++) {
|
||||
var file = files[i];
|
||||
if (file.selected) {
|
||||
if (regex.test(file.fileName)) {
|
||||
file.selected = false;
|
||||
self._fileSelectionPanelElmts.filePanel
|
||||
.find("input[index='" + i + "']")
|
||||
.removeAttr("checked");
|
||||
}
|
||||
}
|
||||
}
|
||||
self._updateFileSelectionSummary();
|
||||
} catch (e) {
|
||||
// Ignore
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._updateFileSelectionSummary = function() {
|
||||
var fileSelection = [];
|
||||
var files = this._job.config.retrievalRecord.files;
|
||||
for (var i = 0; i < files.length; i++) {
|
||||
if (files[i].selected) {
|
||||
fileSelection.push(i);
|
||||
}
|
||||
}
|
||||
this._job.config.fileSelection = fileSelection;
|
||||
this._fileSelectionPanelElmts.summary.text(fileSelection.length + " of " + files.length + " files selected");
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._commitFileSelection = function() {
|
||||
if (this._job.config.fileSelection.length == 0) {
|
||||
alert("Please select at least one file.");
|
||||
return;
|
||||
}
|
||||
|
||||
var self = this;
|
||||
var dismissBusy = DialogSystem.showBusy("Inspecting selected files ...");
|
||||
$.post(
|
||||
"/command/core/importing-controller?" + $.param({
|
||||
"controller": "core/default-importing-controller",
|
||||
"jobID": this._jobID,
|
||||
"subCommand": "update-file-selection"
|
||||
}),
|
||||
{
|
||||
"fileSelection" : JSON.stringify(this._job.config.fileSelection)
|
||||
},
|
||||
function(data) {
|
||||
if (!(data)) {
|
||||
self._showImportJobError("Unknown error");
|
||||
window.clearInterval(timerID);
|
||||
return;
|
||||
} else if (data.code == "error" || !("job" in data)) {
|
||||
self._showImportJobError(data.message || "Unknown error");
|
||||
window.clearInterval(timerID);
|
||||
return;
|
||||
}
|
||||
|
||||
dismissBusy();
|
||||
|
||||
// Different files might be selected. We start over again.
|
||||
delete this._parserOptions;
|
||||
|
||||
self._job = data.job;
|
||||
self._showParsingPanel(true);
|
||||
},
|
||||
"json"
|
||||
);
|
||||
};
|
@ -0,0 +1,21 @@
|
||||
<div bind="wizardHeader" class="default-importing-wizard-header"><div class="grid-layout layout-tightest layout-full"><table><tr>
|
||||
<td width="1%"><button bind="startOverButton" class="button">« Start Over</button></td>
|
||||
<td width="1%"><button bind="previousButton" class="button">« Re-select Files</button></td>
|
||||
<td width="98%">Configure Parsing Options</td>
|
||||
<td style="text-align: right;">Project name</td>
|
||||
<td width="1%"><input class="inline" type="text" size="30" bind="projectNameInput" /></td>
|
||||
<td width="1%"><button bind="nextButton" class="button button-primary">Create Project »</button></td>
|
||||
</tr></table></div></div>
|
||||
|
||||
<div bind="dataPanel" class="default-importing-parsing-data-panel"></div>
|
||||
<div bind="progressPanel" class="default-importing-progress-data-panel">
|
||||
<img src="images/large-spinner.gif" /> Updating preview ...
|
||||
</div>
|
||||
|
||||
<div bind="controlPanel" class="default-importing-parsing-control-panel"><table><tr>
|
||||
<td class="default-importing-parsing-control-panel-formats">
|
||||
<div class="default-importing-parsing-control-panel-formats-message">Parse data as</div>
|
||||
<div bind="formatsContainer"></div>
|
||||
</td>
|
||||
<td bind="optionsContainer" class="default-importing-parsing-control-panel-options-panel"></td>
|
||||
</tr></table></div>
|
@ -0,0 +1,186 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
Refine.DefaultImportingController.prototype._showParsingPanel = function(hasFileSelection) {
|
||||
var self = this;
|
||||
|
||||
if (!(this._format)) {
|
||||
this._format = this._job.config.rankedFormats[0];
|
||||
}
|
||||
if (!(this._parserOptions)) {
|
||||
this._parserOptions = {};
|
||||
}
|
||||
|
||||
this._prepareParsingPanel();
|
||||
this._parsingPanelElmts.nextButton.click(function() {
|
||||
self._createProject();
|
||||
});
|
||||
if (hasFileSelection) {
|
||||
this._parsingPanelElmts.previousButton.click(function() {
|
||||
self._createProjectUI.showCustomPanel(self._fileSelectionPanel);
|
||||
});
|
||||
} else {
|
||||
this._parsingPanelElmts.previousButton.hide();
|
||||
}
|
||||
|
||||
if (!(this._projectName) && this._job.config.fileSelection.length > 0) {
|
||||
var index = this._job.config.fileSelection[0];
|
||||
var record = this._job.config.retrievalRecord.files[index];
|
||||
this._projectName = $.trim(record.fileName.replace(/\W/g, ' ').replace(/\s+/g, ' '));
|
||||
}
|
||||
if (this._projectName) {
|
||||
this._parsingPanelElmts.projectNameInput[0].value = this._projectName;
|
||||
}
|
||||
|
||||
this._createProjectUI.showCustomPanel(this._parsingPanel);
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._disposeFileSelectionPanel = function() {
|
||||
this._disposeParserUI();
|
||||
|
||||
if (this._parsingPanelResizer) {
|
||||
$(window).unbind("resize", this._parsingPanelResizer);
|
||||
}
|
||||
|
||||
this._parsingPanel.unbind().empty();
|
||||
delete this._parsingPanelElmts;
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._prepareParsingPanel = function() {
|
||||
var self = this;
|
||||
|
||||
this._parsingPanel.unbind().empty().html(
|
||||
DOM.loadHTML("core", "scripts/index/default-importing-controller/parsing-panel.html"));
|
||||
|
||||
this._parsingPanelElmts = DOM.bind(this._parsingPanel);
|
||||
this._parsingPanelElmts.startOverButton.click(function() {
|
||||
self._startOver();
|
||||
});
|
||||
this._parsingPanelElmts.progressPanel.hide();
|
||||
|
||||
this._parsingPanelResizer = function() {console.log("here");
|
||||
var elmts = self._parsingPanelElmts;
|
||||
var width = self._parsingPanel.width();
|
||||
var height = self._parsingPanel.height();
|
||||
var headerHeight = elmts.wizardHeader.outerHeight(true);
|
||||
var controlPanelHeight = 300;
|
||||
|
||||
elmts.dataPanel
|
||||
.css("left", "0px")
|
||||
.css("top", headerHeight + "px")
|
||||
.css("width", (width - DOM.getHPaddings(elmts.dataPanel)) + "px")
|
||||
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.dataPanel)) + "px");
|
||||
elmts.progressPanel
|
||||
.css("left", "0px")
|
||||
.css("top", headerHeight + "px")
|
||||
.css("width", (width - DOM.getHPaddings(elmts.progressPanel)) + "px")
|
||||
.css("height", (height - headerHeight - controlPanelHeight - DOM.getVPaddings(elmts.progressPanel)) + "px");
|
||||
|
||||
elmts.controlPanel
|
||||
.css("left", "0px")
|
||||
.css("top", (height - controlPanelHeight) + "px")
|
||||
.css("width", (width - DOM.getHPaddings(elmts.controlPanel)) + "px")
|
||||
.css("height", (controlPanelHeight - DOM.getVPaddings(elmts.controlPanel)) + "px");
|
||||
};
|
||||
|
||||
$(window).resize(this._parsingPanelResizer);
|
||||
this._parsingPanelResizer();
|
||||
|
||||
var formats = this._job.config.rankedFormats;
|
||||
var createFormatTab = function(format) {
|
||||
var tab = $('<div>')
|
||||
.text(Refine.importingConfig.formats[format].label)
|
||||
.attr("format", format)
|
||||
.addClass("default-importing-parsing-control-panel-format")
|
||||
.appendTo(self._parsingPanelElmts.formatsContainer)
|
||||
.click(function() {
|
||||
self._selectFormat(format);
|
||||
});
|
||||
|
||||
if (format == self._format) {
|
||||
tab.addClass("selected");
|
||||
}
|
||||
};
|
||||
for (var i = 0; i < formats.length; i++) {
|
||||
createFormatTab(formats[i]);
|
||||
}
|
||||
this._selectFormat(this._format);
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._disposeParserUI = function() {
|
||||
if (this._formatParserUI) {
|
||||
this._formatParserUI.dispose();
|
||||
delete this._formatParserUI;
|
||||
}
|
||||
if (this._parsingPanelElmts) {
|
||||
this._parsingPanelElmts.optionsContainer.unbind().empty();
|
||||
this._parsingPanelElmts.progressPanel.unbind();
|
||||
this._parsingPanelElmts.dataPanel.unbind().empty();
|
||||
}
|
||||
};
|
||||
|
||||
Refine.DefaultImportingController.prototype._selectFormat = function(newFormat) {
|
||||
if (newFormat == this._format && (this._formatParserUI)) {
|
||||
// The new format is the same as the existing one.
|
||||
return;
|
||||
}
|
||||
|
||||
var uiClassName = Refine.importingConfig.formats[newFormat].uiClass;
|
||||
var uiClass = Refine.DefaultImportingController.parserUIs[uiClassName];
|
||||
if (uiClass) {
|
||||
var self = this;
|
||||
this._ensureFormatParserUIHasInitializationData(newFormat, function() {
|
||||
self._disposeParserUI();
|
||||
self._parsingPanelElmts.formatsContainer
|
||||
.find(".default-importing-parsing-control-panel-format")
|
||||
.removeClass("selected")
|
||||
.each(function() {
|
||||
if (this.getAttribute("format") == newFormat) {
|
||||
$(this).addClass("selected");
|
||||
}
|
||||
});
|
||||
|
||||
self._format = newFormat;
|
||||
self._formatParserUI = new uiClass(
|
||||
self,
|
||||
self._jobID,
|
||||
self._job,
|
||||
self._format,
|
||||
self._parserOptions[newFormat],
|
||||
self._parsingPanelElmts.dataPanel,
|
||||
self._parsingPanelElmts.progressPanel,
|
||||
self._parsingPanelElmts.optionsContainer
|
||||
);
|
||||
});
|
||||
}
|
||||
};
|
@ -0,0 +1,13 @@
|
||||
<div id="default-importing-progress-panel">
|
||||
<div class="grid-layout layout-normal layout-full"><table>
|
||||
<tr><td colspan="3" id="default-importing-progress-message"></td></tr>
|
||||
<tr><td colspan="3">
|
||||
<div id="default-importing-progress-bar-frame"><div id="default-importing-progress-bar-body"></div></div>
|
||||
</td></tr>
|
||||
<tr><td colspan="3">
|
||||
<button class="button" id="default-importing-progress-cancel-button">Cancel</button>
|
||||
<span id="default-importing-progress-timing"></span>
|
||||
</td></tr>
|
||||
</table></div>
|
||||
<iframe id="default-importing-iframe" name="default-importing-iframe"></iframe>
|
||||
</div>
|
@ -0,0 +1,6 @@
|
||||
<form bind="form"><div class="grid-layout layout-normal"><table>
|
||||
<tr><td>Paste data from clipboard here:</td></tr>
|
||||
<tr><td><textarea bind="textInput" name="clipboard" id="default-importing-clipboard-textarea"></textarea>
|
||||
</td></tr>
|
||||
<tr><td><button bind="nextButton" class="button button-primary" type="button">Next »</button></td></tr>
|
||||
</table></div></form>
|
@ -0,0 +1,5 @@
|
||||
<form bind="form"><div class="grid-layout layout-normal"><table>
|
||||
<tr><td>Locate one or more files on your computer to upload:</td></tr>
|
||||
<tr><td><input type="file" multiple bind="fileInput" name="upload" /></td></tr>
|
||||
<tr><td><button bind="nextButton" class="button button-primary" type="button">Next »</button></td></tr>
|
||||
</table></div></form>
|
@ -0,0 +1,8 @@
|
||||
<form bind="form"><div class="grid-layout layout-normal"><table>
|
||||
<tr><td colspan="2">Enter one or more web addresses (URLs) pointing to data to download:</td></tr>
|
||||
<tr bind="urlRow"><td colspan="2"><input bind="urlInput" name="download" class="default-importing-web-url" /></td></tr>
|
||||
<tr bind="buttons">
|
||||
<td width="1"><button bind="addButton" class="button" type="button">Add Another URL</button></td>
|
||||
<td><button bind="nextButton" class="button button-primary" type="button">Next »</button></td>
|
||||
</tr>
|
||||
</table></div></form>
|
@ -0,0 +1,118 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
function ThisComputerImportingSourceUI(controller) {
|
||||
this._controller = controller;
|
||||
}
|
||||
Refine.DefaultImportingController.sources.push({
|
||||
"label": "This Computer",
|
||||
"id": "upload",
|
||||
"uiClass": ThisComputerImportingSourceUI
|
||||
});
|
||||
|
||||
ThisComputerImportingSourceUI.prototype.attachUI = function(bodyDiv) {
|
||||
var self = this;
|
||||
|
||||
bodyDiv.html(DOM.loadHTML("core", "scripts/index/default-importing-sources/import-from-computer-form.html"));
|
||||
|
||||
this._elmts = DOM.bind(bodyDiv);
|
||||
this._elmts.nextButton.click(function(evt) {
|
||||
if (self._elmts.fileInput[0].files.length === 0) {
|
||||
window.alert("You must specify a data file to import.");
|
||||
} else {
|
||||
self._controller.startImportJob(self._elmts.form, "Uploading data ...");
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
ThisComputerImportingSourceUI.prototype.focus = function() {
|
||||
};
|
||||
|
||||
function UrlImportingSourceUI(controller) {
|
||||
this._controller = controller;
|
||||
}
|
||||
Refine.DefaultImportingController.sources.push({
|
||||
"label": "Web Addresses (URLs)",
|
||||
"id": "download",
|
||||
"uiClass": UrlImportingSourceUI
|
||||
});
|
||||
|
||||
UrlImportingSourceUI.prototype.attachUI = function(bodyDiv) {
|
||||
var self = this;
|
||||
|
||||
bodyDiv.html(DOM.loadHTML("core", "scripts/index/default-importing-sources/import-from-web-form.html"));
|
||||
|
||||
this._elmts = DOM.bind(bodyDiv);
|
||||
this._elmts.nextButton.click(function(evt) {
|
||||
if ($.trim(self._elmts.urlInput[0].value.length) === 0) {
|
||||
window.alert("You must specify a web address (URL) to import.");
|
||||
} else {
|
||||
self._controller.startImportJob(self._elmts.form, "Downloading data ...");
|
||||
}
|
||||
});
|
||||
this._elmts.addButton.click(function(evt) {
|
||||
self._elmts.buttons.before(self._elmts.urlRow.clone());
|
||||
});
|
||||
};
|
||||
|
||||
UrlImportingSourceUI.prototype.focus = function() {
|
||||
this._elmts.urlInput.focus();
|
||||
};
|
||||
|
||||
function ClipboardImportingSourceUI(controller) {
|
||||
this._controller = controller;
|
||||
}
|
||||
Refine.DefaultImportingController.sources.push({
|
||||
"label": "Clipboard",
|
||||
"id": "clipboard",
|
||||
"uiClass": ClipboardImportingSourceUI
|
||||
});
|
||||
|
||||
ClipboardImportingSourceUI.prototype.attachUI = function(bodyDiv) {
|
||||
var self = this;
|
||||
|
||||
bodyDiv.html(DOM.loadHTML("core", "scripts/index/default-importing-sources/import-from-clipboard-form.html"));
|
||||
|
||||
this._elmts = DOM.bind(bodyDiv);
|
||||
this._elmts.nextButton.click(function(evt) {
|
||||
if ($.trim(self._elmts.textInput[0].value).length === 0) {
|
||||
window.alert("You must paste some data to import.");
|
||||
} else {
|
||||
self._controller.startImportJob(self._elmts.form, "Uploading pasted data ...");
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
ClipboardImportingSourceUI.prototype.focus = function() {
|
||||
this._elmts.textInput.focus();
|
||||
};
|
@ -1,7 +0,0 @@
|
||||
<form bind="form"><div class="grid-layout layout-normal"><table>
|
||||
<tr><td>File to import:</td></tr>
|
||||
<tr><td><input type="file" bind="fileInput" name="project-file" />
|
||||
<input type="hidden" name="project-name" bind="nameInput" />
|
||||
</td></tr>
|
||||
<tr><td><button bind="nextButton" class="button button-primary" type="button">Next »</button></td></tr>
|
||||
</table></div></form>
|
@ -0,0 +1,17 @@
|
||||
<form id="project-upload-form" method="post" enctype="multipart/form-data" action="/command/core/import-project" accept-charset="UTF-8">
|
||||
<div class="grid-layout layout-normal"><table>
|
||||
<tr>
|
||||
<td colspan="2">Locate an existing Google Refine project file (.tar or .tar.gz):</td>
|
||||
<tr>
|
||||
<td>Project file:</th>
|
||||
<td><input type="file" id="project-tar-file-input" name="project-file" /></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Re-name project (optional):</th>
|
||||
<td><input type="text" size="25" id="project-name-input" name="project-name" /></td></tr>
|
||||
<tr>
|
||||
<td></td>
|
||||
<td><input type="submit" value="Import Project" id="import-project-button" class="button button-primary" /></td>
|
||||
</tr>
|
||||
</table></div>
|
||||
</form>
|
45
main/webapp/modules/core/scripts/index/import-project-ui.js
Normal file
45
main/webapp/modules/core/scripts/index/import-project-ui.js
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
Refine.ImportProjectUI = function(elmt) {
|
||||
elmt.html(DOM.loadHTML("core", "scripts/index/import-project-ui.html"));
|
||||
|
||||
this._elmt = elmt;
|
||||
this._elmts = DOM.bind(elmt);
|
||||
};
|
||||
|
||||
Refine.actionAreas.push({
|
||||
id: "import-project",
|
||||
label: "Import Project",
|
||||
uiClass: Refine.ImportProjectUI
|
||||
});
|
@ -0,0 +1,4 @@
|
||||
<div class="relative-frame">
|
||||
<div bind="projectsContainer" id="projects-container"></div>
|
||||
<div bind="workspaceControls" id="projects-workspace-controls"><a id="projects-workspace-open" href="javascript:{}" class="secondary">Browse workspace directory</a></div>
|
||||
</div>
|
274
main/webapp/modules/core/scripts/index/open-project-ui.js
Normal file
274
main/webapp/modules/core/scripts/index/open-project-ui.js
Normal file
@ -0,0 +1,274 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
Refine.OpenProjectUI = function(elmt) {
|
||||
var self = this;
|
||||
|
||||
elmt.html(DOM.loadHTML("core", "scripts/index/open-project-ui.html"));
|
||||
|
||||
this._elmt = elmt;
|
||||
this._elmts = DOM.bind(elmt);
|
||||
|
||||
var resize = function() {
|
||||
var height = elmt.height();
|
||||
var width = elmt.width();
|
||||
var controlsHeight = self._elmts.workspaceControls.outerHeight();
|
||||
self._elmts.projectsContainer
|
||||
.css("height", (height - controlsHeight - DOM.getVPaddings(self._elmts.projectsContainer)) + "px");
|
||||
self._elmts.workspaceControls
|
||||
.css("bottom", "0px")
|
||||
.css("width", (width - DOM.getHPaddings(self._elmts.workspaceControls)) + "px")
|
||||
};
|
||||
$(window).resize(resize);
|
||||
window.setTimeout(resize, 100);
|
||||
|
||||
$("#project-file-input").change(function() {
|
||||
if ($("#project-name-input")[0].value.length == 0) {
|
||||
var fileName = this.files[0].fileName;
|
||||
if (fileName) {
|
||||
$("#project-name-input")[0].value = fileName.replace(/\.\w+/, "").replace(/[_-]/g, " ");
|
||||
}
|
||||
$("#project-name-input").focus().select();
|
||||
}
|
||||
}).keypress(function(evt) {
|
||||
if (evt.keyCode == 13) {
|
||||
return self._onClickUploadFileButton(evt);
|
||||
}
|
||||
});
|
||||
|
||||
$("#upload-file-button").click(function(evt) {
|
||||
return self._onClickUploadFileButton(evt)
|
||||
});
|
||||
|
||||
$('#projects-workspace-open').click(function() {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/command/core/open-workspace-dir",
|
||||
dataType: "json",
|
||||
success: function (data) {
|
||||
if (data.code != "ok" && "message" in data) {
|
||||
alert(data.message);
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
this._fetchProjects();
|
||||
};
|
||||
|
||||
Refine.OpenProjectUI.prototype._fetchProjects = function() {
|
||||
var self = this;
|
||||
$.getJSON(
|
||||
"/command/core/get-all-project-metadata",
|
||||
null,
|
||||
function(data) {
|
||||
self._renderProjects(data);
|
||||
},
|
||||
"json"
|
||||
);
|
||||
};
|
||||
|
||||
Refine.OpenProjectUI.prototype._renderProjects = function(data) {
|
||||
var self = this;
|
||||
var projects = [];
|
||||
for (var n in data.projects) {
|
||||
if (data.projects.hasOwnProperty(n)) {
|
||||
var project = data.projects[n];
|
||||
project.id = n;
|
||||
project.date = Date.parseExact(project.modified, "yyyy-MM-ddTHH:mm:ssZ");
|
||||
projects.push(project);
|
||||
}
|
||||
}
|
||||
projects.sort(function(a, b) { return b.date.getTime() - a.date.getTime(); });
|
||||
|
||||
var container = $("#projects-container").empty();
|
||||
if (!projects.length) {
|
||||
$("#no-project-message").clone().show().appendTo(container);
|
||||
} else {
|
||||
Refine.selectActionArea('open-project');
|
||||
|
||||
var table = $(
|
||||
'<table class="list-table"><tr>' +
|
||||
'<th>Name</th>' +
|
||||
'<th></th>' +
|
||||
'<th></th>' +
|
||||
'<th align="right">Last modified</th>' +
|
||||
'</tr></table>'
|
||||
).appendTo(container)[0];
|
||||
|
||||
var formatDate = function(d) {
|
||||
var d = new Date(d);
|
||||
var last_year = Date.today().add({ years: -1 });
|
||||
var last_month = Date.today().add({ months: -1 });
|
||||
var last_week = Date.today().add({ days: -7 });
|
||||
var today = Date.today();
|
||||
var tomorrow = Date.today().add({ days: 1 });
|
||||
|
||||
if (d.between(today, tomorrow)) {
|
||||
return "today " + d.toString("h:mm tt");
|
||||
} else if (d.between(last_week, today)) {
|
||||
var diff = Math.floor(today.getDayOfYear() - d.getDayOfYear());
|
||||
return (diff <= 1) ? ("yesterday " + d.toString("h:mm tt")) : (diff + " days ago");
|
||||
} else if (d.between(last_month, today)) {
|
||||
var diff = Math.floor((today.getDayOfYear() - d.getDayOfYear()) / 7);
|
||||
return (diff == 1) ? "a week ago" : diff.toFixed(0) + " weeks ago" ;
|
||||
} else if (d.between(last_year, today)) {
|
||||
var diff = Math.floor(today.getMonth() - d.getMonth());
|
||||
return (diff == 1) ? "a month ago" : diff + " months ago";
|
||||
} else {
|
||||
var diff = Math.floor(today.getYear() - d.getYear());
|
||||
return (diff == 1) ? "a year ago" : diff + " years ago";
|
||||
}
|
||||
};
|
||||
|
||||
var renderProject = function(project) {
|
||||
var tr = table.insertRow(table.rows.length);
|
||||
tr.className = "project";
|
||||
|
||||
var nameLink = $('<a></a>')
|
||||
.addClass("list-table-itemname")
|
||||
.text(project.name)
|
||||
.attr("href", "/project?project=" + project.id)
|
||||
.appendTo(tr.insertCell(tr.cells.length));
|
||||
|
||||
var renameLink = $('<a></a>')
|
||||
.text("rename")
|
||||
.addClass("secondary")
|
||||
.attr("href", "javascript:{}")
|
||||
.css("visibility", "hidden")
|
||||
.click(function() {
|
||||
var name = window.prompt("New project name:", project.name);
|
||||
if (name == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
name = $.trim(name);
|
||||
if (project.name == name || name.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/command/core/rename-project",
|
||||
data: { "project" : project.id, "name" : name },
|
||||
dataType: "json",
|
||||
success: function (data) {
|
||||
if (data && typeof data.code != 'undefined' && data.code == "ok") {
|
||||
nameLink.text(name);
|
||||
} else {
|
||||
alert("Failed to rename project: " + data.message)
|
||||
}
|
||||
}
|
||||
});
|
||||
}).appendTo(tr.insertCell(tr.cells.length));
|
||||
|
||||
var deleteLink = $('<a></a>')
|
||||
.addClass("delete-project")
|
||||
.attr("title","Delete this project")
|
||||
.attr("href","")
|
||||
.css("visibility", "hidden")
|
||||
.html("<img src='/images/close.png' />")
|
||||
.click(function() {
|
||||
if (window.confirm("Are you sure you want to delete project \"" + project.name + "\"?")) {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/command/core/delete-project",
|
||||
data: { "project" : project.id },
|
||||
dataType: "json",
|
||||
success: function (data) {
|
||||
if (data && typeof data.code != 'undefined' && data.code == "ok") {
|
||||
self._fetchProjects();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
return false;
|
||||
}).appendTo(tr.insertCell(tr.cells.length));
|
||||
|
||||
|
||||
$('<div></div>')
|
||||
.html(formatDate(project.date))
|
||||
.addClass("last-modified")
|
||||
.attr("title", project.date.toString())
|
||||
.appendTo(tr.insertCell(tr.cells.length));
|
||||
|
||||
$(tr).mouseenter(function() {
|
||||
renameLink.css("visibility", "visible");
|
||||
deleteLink.css("visibility", "visible");
|
||||
}).mouseleave(function() {
|
||||
renameLink.css("visibility", "hidden");
|
||||
deleteLink.css("visibility", "hidden");
|
||||
});
|
||||
};
|
||||
|
||||
for (var i = 0; i < projects.length; i++) {
|
||||
renderProject(projects[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Refine.OpenProjectUI.prototype._onClickUploadFileButton = function(evt) {
|
||||
var projectName = $("#project-name-input")[0].value;
|
||||
var dataURL = $.trim($("#project-url-input")[0].value);
|
||||
if (! $.trim(projectName).length) {
|
||||
window.alert("You must specify a project name.");
|
||||
|
||||
} else if ($("#project-file-input")[0].files.length === 0 && ! dataURL.length) {
|
||||
window.alert("You must specify a data file to upload or a URL to retrieve.");
|
||||
|
||||
} else {
|
||||
$("#file-upload-form").attr("action",
|
||||
"/command/core/create-project-from-upload?" + [
|
||||
"url=" + escape(dataURL),
|
||||
"split-into-columns=" + $("#split-into-columns-input")[0].checked,
|
||||
"separator=" + $("#separator-input")[0].value,
|
||||
"ignore=" + $("#ignore-input")[0].value,
|
||||
"header-lines=" + $("#header-lines-input")[0].value,
|
||||
"skip=" + $("#skip-input")[0].value,
|
||||
"limit=" + $("#limit-input")[0].value,
|
||||
"guess-value-type=" + $("#guess-value-type-input")[0].checked,
|
||||
"ignore-quotes=" + $("#ignore-quotes-input")[0].checked
|
||||
].join("&"));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
evt.preventDefault();
|
||||
return false;
|
||||
};
|
||||
|
||||
Refine.actionAreas.push({
|
||||
id: "open-project",
|
||||
label: "Open Project",
|
||||
uiClass: Refine.OpenProjectUI
|
||||
});
|
@ -0,0 +1,36 @@
|
||||
<div class="grid-layout layout-loose layout-full"><table>
|
||||
<tr>
|
||||
<td colspan="2"></td>
|
||||
<td><div class="grid-layout layout-tighter layout-full"><table>
|
||||
<tr>
|
||||
<td style="text-align: right;"> </td>
|
||||
<td width="1%"><button class="button" bind="previewButton">Update Preview</button></td>
|
||||
</tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div class="grid-layout layout-tightest"><table bind="sheetRecordContainer">
|
||||
<tr><td colspan="3">Worksheets to Import</td></tr>
|
||||
</table></div></td>
|
||||
|
||||
<td><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td width="1%"><input type="checkbox" bind="ignoreCheckbox" /></td><td>Ignore first</td>
|
||||
<td><input bind="ignoreInput" type="text" class="lightweight" size="2" value="0" /> line(s) at beginning of file</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="headerLinesCheckbox" /></td><td>Parse next</td>
|
||||
<td><input bind="headerLinesInput" type="text" class="lightweight" size="2" value="1" /> line(s) as column headers</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="skipCheckbox" /></td><td>Discard initial</td>
|
||||
<td><input bind="skipInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
|
||||
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||
</table></div></td>
|
||||
|
||||
<td><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" /></td>
|
||||
<td colspan="2">Store blank rows</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" /></td>
|
||||
<td colspan="2">Store blank cells as nulls</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
|
||||
<td>Store file source<br/>(file names, URLs)<br/>in each row</td></tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
</table></div>
|
@ -0,0 +1,198 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
Refine.ExcelParserUI = function(controller, jobID, job, format, config,
|
||||
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
|
||||
this._controller = controller;
|
||||
this._jobID = jobID;
|
||||
this._job = job;
|
||||
this._format = format;
|
||||
this._config = config;
|
||||
|
||||
this._dataContainer = dataContainerElmt;
|
||||
this._progressContainer = progressContainerElmt;
|
||||
this._optionContainer = optionContainerElmt;
|
||||
|
||||
this._timerID = null;
|
||||
this._initialize();
|
||||
this._updatePreview();
|
||||
};
|
||||
Refine.DefaultImportingController.parserUIs["ExcelParserUI"] = Refine.ExcelParserUI;
|
||||
|
||||
Refine.ExcelParserUI.prototype.dispose = function() {
|
||||
if (this._timerID != null) {
|
||||
window.clearTimeout(this._timerID);
|
||||
this._timerID = null;
|
||||
}
|
||||
};
|
||||
|
||||
Refine.ExcelParserUI.prototype.confirmReadyToCreateProject = function() {
|
||||
return true; // always ready
|
||||
};
|
||||
|
||||
Refine.ExcelParserUI.prototype.getOptions = function() {
|
||||
var options = {
|
||||
xmlBased: this._config.xmlBased,
|
||||
sheets: []
|
||||
};
|
||||
|
||||
var parseIntDefault = function(s, def) {
|
||||
try {
|
||||
var n = parseInt(s);
|
||||
if (!isNaN(n)) {
|
||||
return n;
|
||||
}
|
||||
} catch (e) {
|
||||
// Ignore
|
||||
}
|
||||
return def;
|
||||
};
|
||||
|
||||
this._optionContainerElmts.sheetRecordContainer.find('input').each(function() {
|
||||
if (this.checked) {
|
||||
options.sheets.push(parseInt(this.getAttribute('index')));
|
||||
}
|
||||
});
|
||||
|
||||
if (this._optionContainerElmts.ignoreCheckbox[0].checked) {
|
||||
options.ignoreLines = parseIntDefault(this._optionContainerElmts.ignoreInput[0].value, -1);
|
||||
} else {
|
||||
options.ignoreLines = -1;
|
||||
}
|
||||
if (this._optionContainerElmts.headerLinesCheckbox[0].checked) {
|
||||
options.headerLines = parseIntDefault(this._optionContainerElmts.headerLinesInput[0].value, 0);
|
||||
} else {
|
||||
options.headerLines = 0;
|
||||
}
|
||||
if (this._optionContainerElmts.skipCheckbox[0].checked) {
|
||||
options.skipDataLines = parseIntDefault(this._optionContainerElmts.skipInput[0].value, 0);
|
||||
} else {
|
||||
options.skipDataLines = 0;
|
||||
}
|
||||
if (this._optionContainerElmts.limitCheckbox[0].checked) {
|
||||
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
|
||||
} else {
|
||||
options.limit = -1;
|
||||
}
|
||||
options.storeBlankRows = this._optionContainerElmts.storeBlankRowsCheckbox[0].checked;
|
||||
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
||||
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||
|
||||
return options;
|
||||
};
|
||||
|
||||
Refine.ExcelParserUI.prototype._initialize = function() {
|
||||
var self = this;
|
||||
|
||||
this._optionContainer.unbind().empty().html(
|
||||
DOM.loadHTML("core", "scripts/index/parser-interfaces/excel-parser-ui.html"));
|
||||
this._optionContainerElmts = DOM.bind(this._optionContainer);
|
||||
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
|
||||
|
||||
var sheetTable = this._optionContainerElmts.sheetRecordContainer[0];
|
||||
$.each(this._config.sheetRecords, function(i, v) {
|
||||
var tr = sheetTable.insertRow(sheetTable.rows.length);
|
||||
var td0 = $(tr.insertCell(0)).attr('width', '1%');
|
||||
var checkbox = $('<input>')
|
||||
.attr('type', 'checkbox')
|
||||
.attr('index', i)
|
||||
.appendTo(td0);
|
||||
if (this.selected) {
|
||||
checkbox.attr('checked', 'true');
|
||||
}
|
||||
|
||||
$(tr.insertCell(1)).text(this.name);
|
||||
$(tr.insertCell(2)).text(this.rows + ' rows');
|
||||
});
|
||||
|
||||
if (this._config.ignoreLines > 0) {
|
||||
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.ignoreInput[0].value = this._config.ignoreLines.toString();
|
||||
}
|
||||
if (this._config.headerLines > 0) {
|
||||
this._optionContainerElmts.headerLinesCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.headerLinesInput[0].value = this._config.headerLines.toString();
|
||||
}
|
||||
if (this._config.limit > 0) {
|
||||
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||
}
|
||||
if (this._config.skipDataLines > 0) {
|
||||
this._optionContainerElmts.skipCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.skipInput.value[0].value = this._config.skipDataLines.toString();
|
||||
}
|
||||
if (this._config.storeBlankRows) {
|
||||
this._optionContainerElmts.storeBlankRowsCheckbox.attr("checked", "checked");
|
||||
}
|
||||
if (this._config.storeBlankCellsAsNulls) {
|
||||
this._optionContainerElmts.storeBlankCellsAsNullsCheckbox.attr("checked", "checked");
|
||||
}
|
||||
if (this._config.includeFileSources) {
|
||||
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
|
||||
}
|
||||
|
||||
var onChange = function() {
|
||||
self._scheduleUpdatePreview();
|
||||
};
|
||||
this._optionContainer.find("input").bind("change", onChange);
|
||||
this._optionContainer.find("select").bind("change", onChange);
|
||||
};
|
||||
|
||||
Refine.ExcelParserUI.prototype._scheduleUpdatePreview = function() {
|
||||
if (this._timerID != null) {
|
||||
window.clearTimeout(this._timerID);
|
||||
this._timerID = null;
|
||||
}
|
||||
|
||||
var self = this;
|
||||
this._timerID = window.setTimeout(function() {
|
||||
self._timerID = null;
|
||||
self._updatePreview();
|
||||
}, 500); // 0.5 second
|
||||
};
|
||||
|
||||
Refine.ExcelParserUI.prototype._updatePreview = function() {
|
||||
var self = this;
|
||||
|
||||
this._progressContainer.show();
|
||||
|
||||
this._controller.updateFormatAndOptions(this.getOptions(), function(result) {
|
||||
if (result.status == "ok") {
|
||||
self._controller.getPreviewData(function(projectData) {
|
||||
self._progressContainer.hide();
|
||||
|
||||
new Refine.PreviewTable(projectData, self._dataContainer.unbind().empty());
|
||||
});
|
||||
}
|
||||
});
|
||||
};
|
@ -0,0 +1,51 @@
|
||||
<div class="grid-layout layout-loose layout-full"><table>
|
||||
<tr>
|
||||
<td><div class="grid-layout layout-tighter"><table>
|
||||
<tr>
|
||||
<td width="1%">Character encoding</td>
|
||||
<td><select bind="encodingSelect"></select></td>
|
||||
</tr>
|
||||
</table></div></td>
|
||||
<td colspan="2"><div class="grid-layout layout-tighter layout-full"><table>
|
||||
<tr>
|
||||
<td style="text-align: right;"> </td>
|
||||
<td width="1%"><button class="button" bind="previewButton">Update Preview</button></td>
|
||||
</tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="3"><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td>Column widths:</td><td><input style="width: 40em;" bind="columnWidthsInput" /></td><td>comma separated numbers</td></tr>
|
||||
<tr><td>Column names:</td><td><input style="width: 40em;" bind="columnNamesInput" /></td><td>optional, comma separated</td></tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td colspan="2">Rows are separated by</td></tr>
|
||||
<tr><td width="1%"><input type="radio" name="row-separator" value="new-line" /></td><td>new line characters \n</td></tr>
|
||||
<tr><td width="1%"><input type="radio" name="row-separator" value="custom" /></td><td>custom
|
||||
<input bind="rowSeparatorInput" type="text" class="lightweight" size="5" /></td></tr>
|
||||
<tr><td colspan="2">Escape special characters with \</td></tr>
|
||||
</table></div></td>
|
||||
<td><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td width="1%"><input type="checkbox" bind="ignoreCheckbox" /></td><td>Ignore first</td>
|
||||
<td><input bind="ignoreInput" type="text" class="lightweight" size="2" value="0" /> line(s) at beginning of file</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="headerLinesCheckbox" /></td><td>Parse next</td>
|
||||
<td><input bind="headerLinesInput" type="text" class="lightweight" size="2" value="1" /> line(s) as column headers</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="skipCheckbox" /></td><td>Discard initial</td>
|
||||
<td><input bind="skipInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
|
||||
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||
</table></div></td>
|
||||
<td><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td width="1%"><input type="checkbox" bind="guessCellValueTypesCheckbox" /></td>
|
||||
<td>Parse cell text into<br/>numbers, dates, ...</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" /></td>
|
||||
<td colspan="2">Store blank rows</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" /></td>
|
||||
<td colspan="2">Store blank cells as nulls</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
|
||||
<td>Store file source<br/>(file names, URLs)<br/>in each row</td></tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
</table></div>
|
@ -0,0 +1,471 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
Refine.FixedWidthParserUI = function(controller, jobID, job, format, config,
|
||||
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
|
||||
|
||||
this._controller = controller;
|
||||
this._jobID = jobID;
|
||||
this._job = job;
|
||||
this._format = format;
|
||||
this._config = config;
|
||||
|
||||
this._dataContainer = dataContainerElmt;
|
||||
this._progressContainer = progressContainerElmt;
|
||||
this._optionContainer = optionContainerElmt;
|
||||
|
||||
this._timerID = null;
|
||||
this._initialize();
|
||||
this.updatePreview();
|
||||
};
|
||||
Refine.DefaultImportingController.parserUIs["FixedWidthParserUI"] = Refine.FixedWidthParserUI;
|
||||
|
||||
Refine.FixedWidthParserUI.encodeSeparator = function(s) {
|
||||
return s.replace("\\", "\\\\")
|
||||
.replace("\n", "\\n")
|
||||
.replace("\t", "\\t");
|
||||
};
|
||||
|
||||
Refine.FixedWidthParserUI.decodeSeparator = function(s) {
|
||||
return s.replace("\\n", "\n")
|
||||
.replace("\\t", "\t")
|
||||
.replace("\\\\", "\\");
|
||||
};
|
||||
|
||||
Refine.FixedWidthParserUI.prototype.dispose = function() {
|
||||
if (this._timerID != null) {
|
||||
window.clearTimeout(this._timerID);
|
||||
this._timerID = null;
|
||||
}
|
||||
};
|
||||
|
||||
Refine.FixedWidthParserUI.prototype.confirmReadyToCreateProject = function() {
|
||||
return true; // always ready
|
||||
};
|
||||
|
||||
Refine.FixedWidthParserUI.prototype.getOptions = function() {
|
||||
var options = {
|
||||
columnWidths: [].concat(this._config.columnWidths)
|
||||
};
|
||||
|
||||
var columnNames = $.trim(this._optionContainerElmts.columnNamesInput[0].value).replace(/,\s+/g, ',').split(',');
|
||||
if (columnNames.length > 0 && columnNames[0].length > 0) {
|
||||
options.columnNames = columnNames;
|
||||
}
|
||||
|
||||
switch (this._optionContainer.find("input[name='row-separator']:checked")[0].value) {
|
||||
case 'new-line':
|
||||
options.lineSeparator = "\n";
|
||||
break;
|
||||
default:
|
||||
options.lineSeparator = Refine.FixedWidthParserUI.decodeSeparator(
|
||||
this._optionContainerElmts.rowSeparatorInput[0].value);
|
||||
}
|
||||
|
||||
var parseIntDefault = function(s, def) {
|
||||
try {
|
||||
var n = parseInt(s);
|
||||
if (!isNaN(n)) {
|
||||
return n;
|
||||
}
|
||||
} catch (e) {
|
||||
// Ignore
|
||||
}
|
||||
return def;
|
||||
};
|
||||
if (this._optionContainerElmts.ignoreCheckbox[0].checked) {
|
||||
options.ignoreLines = parseIntDefault(this._optionContainerElmts.ignoreInput[0].value, -1);
|
||||
} else {
|
||||
options.ignoreLines = -1;
|
||||
}
|
||||
if (this._optionContainerElmts.headerLinesCheckbox[0].checked) {
|
||||
options.headerLines = parseIntDefault(this._optionContainerElmts.headerLinesInput[0].value, 0);
|
||||
} else {
|
||||
options.headerLines = 0;
|
||||
}
|
||||
if (this._optionContainerElmts.skipCheckbox[0].checked) {
|
||||
options.skipDataLines = parseIntDefault(this._optionContainerElmts.skipInput[0].value, 0);
|
||||
} else {
|
||||
options.skipDataLines = 0;
|
||||
}
|
||||
if (this._optionContainerElmts.limitCheckbox[0].checked) {
|
||||
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
|
||||
} else {
|
||||
options.limit = -1;
|
||||
}
|
||||
|
||||
options.guessCellValueTypes = this._optionContainerElmts.guessCellValueTypesCheckbox[0].checked;
|
||||
|
||||
options.storeBlankRows = this._optionContainerElmts.storeBlankRowsCheckbox[0].checked;
|
||||
options.storeBlankCellsAsNulls = this._optionContainerElmts.storeBlankCellsAsNullsCheckbox[0].checked;
|
||||
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||
|
||||
return options;
|
||||
};
|
||||
|
||||
Refine.FixedWidthParserUI.prototype._initialize = function() {
|
||||
var self = this;
|
||||
|
||||
this._optionContainer.unbind().empty().html(
|
||||
DOM.loadHTML("core", "scripts/index/parser-interfaces/fixed-width-parser-ui.html"));
|
||||
this._optionContainerElmts = DOM.bind(this._optionContainer);
|
||||
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
|
||||
|
||||
this._optionContainerElmts.columnWidthsInput[0].value = this._config.columnWidths.join(',');
|
||||
if ('columnNames' in this._config) {
|
||||
this._optionContainerElmts.columnNamesInput[0].value = this._config.columnNames.join(',');
|
||||
}
|
||||
|
||||
var rowSeparatorValue = (this._config.lineSeparator == "\n") ? 'new-line' : 'custom';
|
||||
this._optionContainer.find(
|
||||
"input[name='row-separator'][value='" + rowSeparatorValue + "']").attr("checked", "checked");
|
||||
this._optionContainerElmts.rowSeparatorInput[0].value =
|
||||
Refine.FixedWidthParserUI.encodeSeparator(this._config.lineSeparator);
|
||||
|
||||
if (this._config.ignoreLines > 0) {
|
||||
this._optionContainerElmts.ignoreCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.ignoreInput[0].value = this._config.ignoreLines.toString();
|
||||
}
|
||||
if (this._config.headerLines > 0) {
|
||||
this._optionContainerElmts.headerLinesCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.headerLinesInput[0].value = this._config.headerLines.toString();
|
||||
}
|
||||
if (this._config.limit > 0) {
|
||||
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||
}
|
||||
if (this._config.skipDataLines > 0) {
|
||||
this._optionContainerElmts.skipCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.skipInput.value[0].value = this._config.skipDataLines.toString();
|
||||
}
|
||||
if (this._config.storeBlankRows) {
|
||||
this._optionContainerElmts.storeBlankRowsCheckbox.attr("checked", "checked");
|
||||
}
|
||||
|
||||
if (this._config.guessCellValueTypes) {
|
||||
this._optionContainerElmts.guessCellValueTypesCheckbox.attr("checked", "checked");
|
||||
}
|
||||
|
||||
if (this._config.storeBlankCellsAsNulls) {
|
||||
this._optionContainerElmts.storeBlankCellsAsNullsCheckbox.attr("checked", "checked");
|
||||
}
|
||||
if (this._config.includeFileSources) {
|
||||
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
|
||||
}
|
||||
|
||||
var onChange = function() {
|
||||
self._scheduleUpdatePreview();
|
||||
};
|
||||
this._optionContainer.find("input").bind("change", onChange);
|
||||
this._optionContainer.find("select").bind("change", onChange);
|
||||
|
||||
this._optionContainerElmts.columnWidthsInput.bind("change", function() {
|
||||
var newColumnWidths = [];
|
||||
var a = $.trim(this.value).replace(/,\s+/g, ',').split(',');
|
||||
for (var i = 0; i < a.length; i++) {
|
||||
var n = parseInt(a[i]);
|
||||
if (isNaN(n)) {
|
||||
return;
|
||||
}
|
||||
newColumnWidths.push(n);
|
||||
}
|
||||
self._config.columnWidths = newColumnWidths;
|
||||
onChange();
|
||||
});
|
||||
this._optionContainerElmts.columnNamesInput.bind("change", onChange);
|
||||
|
||||
};
|
||||
|
||||
Refine.FixedWidthParserUI.prototype._scheduleUpdatePreview = function() {
|
||||
if (this._timerID != null) {
|
||||
window.clearTimeout(this._timerID);
|
||||
this._timerID = null;
|
||||
}
|
||||
|
||||
var self = this;
|
||||
this._timerID = window.setTimeout(function() {
|
||||
self._timerID = null;
|
||||
self.updatePreview();
|
||||
}, 500); // 0.5 second
|
||||
};
|
||||
|
||||
Refine.FixedWidthParserUI.prototype.updatePreview = function() {
|
||||
var self = this;
|
||||
|
||||
this._progressContainer.show();
|
||||
|
||||
var options = this.getOptions();
|
||||
// for preview, we need exact text, so it's easier to show where the columns are split
|
||||
options.guessCellValueTypes = false;
|
||||
|
||||
this._controller.updateFormatAndOptions(options, function(result) {
|
||||
if (result.status == "ok") {
|
||||
self._controller.getPreviewData(function(projectData) {
|
||||
new Refine.FixedWidthPreviewTable(
|
||||
self,
|
||||
self._config,
|
||||
projectData,
|
||||
self._dataContainer
|
||||
);
|
||||
self._progressContainer.hide();
|
||||
}, 20);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
Refine.FixedWidthPreviewTable = function(parserUI, config, projectData, elmt) {
|
||||
this._parserUI = parserUI;
|
||||
this._config = config;
|
||||
this._projectData = projectData;
|
||||
this._elmt = elmt;
|
||||
this._render();
|
||||
};
|
||||
|
||||
Refine.FixedWidthPreviewTable.prototype._render = function() {
|
||||
var scrollTop = this._elmt[0].scrollTop;
|
||||
var scrollLeft = this._elmt[0].scrollLeft;
|
||||
|
||||
this._elmt.unbind().empty();
|
||||
|
||||
var self = this;
|
||||
var container = $('<div>')
|
||||
.addClass('fixed-width-preview-container')
|
||||
.appendTo(this._elmt);
|
||||
var table = $('<table>')
|
||||
.addClass("data-table")
|
||||
.addClass("fixed-width-preview-data-table")
|
||||
.appendTo(container)[0];
|
||||
|
||||
var columns = this._projectData.columnModel.columns;
|
||||
var columnWidths = [].concat(this._config.columnWidths);
|
||||
|
||||
var addCell = function(tr) {
|
||||
var index = tr.cells.length;
|
||||
var td = tr.insertCell(index);
|
||||
td.className = (index % 2 == 0) ? 'even' : 'odd';
|
||||
return td;
|
||||
};
|
||||
|
||||
/*------------------------------------------------------------
|
||||
* Column Headers
|
||||
*------------------------------------------------------------
|
||||
*/
|
||||
|
||||
var trHead = table.insertRow(table.rows.length);
|
||||
$(addCell(trHead)).addClass("column-header").html(' '); // index
|
||||
|
||||
var createColumnHeader = function(column, index) {
|
||||
var name = column.name;
|
||||
if (index < columnWidths.length) {
|
||||
name = name.slice(0, columnWidths[index]);
|
||||
}
|
||||
$(addCell(trHead))
|
||||
.addClass("column-header")
|
||||
.text(name)
|
||||
.attr('title', column.name);
|
||||
};
|
||||
for (var i = 0; i < columns.length; i++) {
|
||||
createColumnHeader(columns[i], i);
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------
|
||||
* Data Cells
|
||||
*------------------------------------------------------------
|
||||
*/
|
||||
|
||||
var rows = this._projectData.rowModel.rows;
|
||||
var renderRow = function(tr, r, row) {
|
||||
var tdIndex = addCell(tr);
|
||||
$('<div></div>').html((row.i + 1) + ".").appendTo(tdIndex);
|
||||
|
||||
var cells = row.cells;
|
||||
for (var i = 0; i < columns.length; i++) {
|
||||
var column = columns[i];
|
||||
var td = addCell(tr);
|
||||
var divContent = $('<div/>').addClass("data-table-cell-content").appendTo(td);
|
||||
|
||||
var cell = (column.cellIndex < cells.length) ? cells[column.cellIndex] : null;
|
||||
if (!cell || ("v" in cell && cell.v === null)) {
|
||||
$('<span>').html(" ").appendTo(divContent);
|
||||
} else if ("e" in cell) {
|
||||
$('<span>').addClass("data-table-error").text(cell.e).appendTo(divContent);
|
||||
} else if (!("r" in cell) || !cell.r) {
|
||||
if (typeof cell.v !== "string") {
|
||||
if (typeof cell.v == "number") {
|
||||
divContent.addClass("data-table-cell-content-numeric");
|
||||
}
|
||||
$('<span>')
|
||||
.addClass("data-table-value-nonstring")
|
||||
.text(cell.v)
|
||||
.appendTo(divContent);
|
||||
} else if (URL.looksLikeUrl(cell.v)) {
|
||||
$('<a>')
|
||||
.text(cell.v)
|
||||
.attr("href", cell.v)
|
||||
.attr("target", "_blank")
|
||||
.appendTo(divContent);
|
||||
} else {
|
||||
$('<span>').text(cell.v).appendTo(divContent);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (var r = 0; r < rows.length; r++) {
|
||||
var row = rows[r];
|
||||
renderRow(table.insertRow(table.rows.length), r, row);
|
||||
}
|
||||
|
||||
var pixelOffset = $(trHead.cells[1]).position().left;
|
||||
var testString = '01234567890123456789012345678901234567890123456789';
|
||||
var testDiv = $('<div>')
|
||||
.css('position', 'absolute')
|
||||
.css('top', '-100px')
|
||||
.text(testString)
|
||||
.appendTo(container);
|
||||
var pixelsPerChar = testDiv.width() / testString.length;
|
||||
testDiv.remove();
|
||||
|
||||
var columnSeparators = [];
|
||||
var columnCharIndexes = [];
|
||||
var positionColumnSeparator = function(outer, charIndex) {
|
||||
outer.css('left',
|
||||
Math.round(pixelOffset + charIndex * pixelsPerChar - DOM.getHPaddings(outer) / 2) + 'px');
|
||||
};
|
||||
var computeCharIndex = function(evt) {
|
||||
var offset = evt.pageX - container.offset().left;
|
||||
return Math.round((offset - pixelOffset) / pixelsPerChar);
|
||||
};
|
||||
var updatePreview = function() {
|
||||
columnCharIndexes.sort(function(a, b) { return a - b; });
|
||||
|
||||
var newColumnWidths = [];
|
||||
for (var i = 0; i < columnCharIndexes.length; i++) {
|
||||
var charIndex = columnCharIndexes[i];
|
||||
var columnWidth = (i == 0) ? charIndex : (charIndex - columnCharIndexes[i - 1]);
|
||||
if (columnWidth > 0) {
|
||||
newColumnWidths.push(columnWidth);
|
||||
}
|
||||
}
|
||||
|
||||
self._config.columnWidths = newColumnWidths;
|
||||
self._parserUI._optionContainerElmts.columnWidthsInput[0].value = newColumnWidths.join(',');
|
||||
self._parserUI.updatePreview();
|
||||
};
|
||||
|
||||
var newSeparator = $('<div>')
|
||||
.addClass('fixed-width-preview-column-separator-outer')
|
||||
.append($('<div>').addClass('fixed-width-preview-column-separator-inner'))
|
||||
.appendTo(container);
|
||||
|
||||
var createColumnSeparator = function(charIndex, index) {
|
||||
columnCharIndexes[index] = charIndex;
|
||||
|
||||
var outer = $('<div>')
|
||||
.addClass('fixed-width-preview-column-separator-outer')
|
||||
.appendTo(container);
|
||||
var inner = $('<div>')
|
||||
.addClass('fixed-width-preview-column-separator-inner')
|
||||
.appendTo(outer);
|
||||
var close = $('<div>').appendTo(inner);
|
||||
|
||||
positionColumnSeparator(outer, charIndex);
|
||||
|
||||
outer.mouseover(function() {
|
||||
newSeparator.hide();
|
||||
})
|
||||
.mouseout(function() {
|
||||
newSeparator.show();
|
||||
})
|
||||
.mousedown(function() {
|
||||
var mouseMove = function(evt) {
|
||||
var newCharIndex = computeCharIndex(evt);
|
||||
positionColumnSeparator(outer, newCharIndex);
|
||||
|
||||
evt.preventDefault();
|
||||
evt.stopPropagation();
|
||||
return false;
|
||||
};
|
||||
var mouseUp = function(evt) {
|
||||
container.unbind('mousemove', mouseMove);
|
||||
container.unbind('mouseup', mouseUp);
|
||||
|
||||
var newCharIndex = computeCharIndex(evt);
|
||||
positionColumnSeparator(outer, newCharIndex);
|
||||
|
||||
columnCharIndexes[index] = newCharIndex;
|
||||
updatePreview();
|
||||
|
||||
evt.preventDefault();
|
||||
evt.stopPropagation();
|
||||
return false;
|
||||
};
|
||||
container.bind('mousemove', mouseMove);
|
||||
container.bind('mouseup', mouseUp);
|
||||
});
|
||||
|
||||
close.click(function() {
|
||||
columnCharIndexes[index] = index > 0 ? columnCharIndexes[index - 1] : 0;
|
||||
updatePreview();
|
||||
});
|
||||
};
|
||||
|
||||
var charOffset = 0;
|
||||
for (var i = 0; i < columnWidths.length; i++) {
|
||||
var columnWidth = columnWidths[i];
|
||||
createColumnSeparator(charOffset + columnWidth, i);
|
||||
charOffset += columnWidth;
|
||||
}
|
||||
|
||||
container
|
||||
.mouseout(function(evt) {
|
||||
newSeparator.hide();
|
||||
})
|
||||
.mousemove(function(evt) {
|
||||
var offset = evt.pageX - container.offset().left;
|
||||
var newCharIndex = Math.round((offset - pixelOffset) / pixelsPerChar);
|
||||
positionColumnSeparator(newSeparator.show(), newCharIndex);
|
||||
});
|
||||
newSeparator.mousedown(function(evt) {
|
||||
var newCharIndex = computeCharIndex(evt);
|
||||
columnCharIndexes.push(newCharIndex);
|
||||
updatePreview();
|
||||
|
||||
evt.preventDefault();
|
||||
evt.stopPropagation();
|
||||
return false;
|
||||
});
|
||||
|
||||
this._elmt[0].scrollTop = scrollTop;
|
||||
this._elmt[0].scrollLeft = scrollLeft;
|
||||
};
|
@ -0,0 +1,4 @@
|
||||
<div class="json-parser-ui-select-message">
|
||||
Click on the first JSON { } node corresponding to the first record to load.
|
||||
</div>
|
||||
<div class="json-parser-ui-select-dom" bind="domContainer"></div>
|
@ -0,0 +1,19 @@
|
||||
<div class="grid-layout layout-loose layout-full"><table>
|
||||
<tr>
|
||||
<td colspan="2"><div class="grid-layout layout-tighter layout-full"><table>
|
||||
<tr>
|
||||
<td style="text-align: right;"> </td>
|
||||
<td width="1%"><button class="button" bind="pickRecordElementsButton">Pick Record Nodes</button></td>
|
||||
<td width="1%"><button class="button" bind="previewButton">Update Preview</button></td>
|
||||
</tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
|
||||
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> record(s) of data</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
|
||||
<td colspan="2">Store file source (file names, URLs) in each row</td></tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
</table></div>
|
@ -0,0 +1,241 @@
|
||||
/*
|
||||
|
||||
Copyright 2011, Google Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
Refine.JsonParserUI = function(controller, jobID, job, format, config,
|
||||
dataContainerElmt, progressContainerElmt, optionContainerElmt) {
|
||||
|
||||
this._controller = controller;
|
||||
this._jobID = jobID;
|
||||
this._job = job;
|
||||
this._format = format;
|
||||
this._config = config;
|
||||
|
||||
this._dataContainer = dataContainerElmt;
|
||||
this._progressContainer = progressContainerElmt;
|
||||
this._optionContainer = optionContainerElmt;
|
||||
|
||||
this._timerID = null;
|
||||
this._initialize();
|
||||
this._showPickRecordNodesUI();
|
||||
};
|
||||
Refine.DefaultImportingController.parserUIs["JsonParserUI"] = Refine.JsonParserUI;
|
||||
|
||||
Refine.JsonParserUI.prototype.dispose = function() {
|
||||
if (this._timerID != null) {
|
||||
window.clearTimeout(this._timerID);
|
||||
this._timerID = null;
|
||||
}
|
||||
};
|
||||
|
||||
Refine.JsonParserUI.prototype.confirmReadyToCreateProject = function() {
|
||||
if ((this._config.recordPath) && this._config.recordPath.length > 0) {
|
||||
return true;
|
||||
} else {
|
||||
window.alert('Please specify a record path first.');
|
||||
}
|
||||
};
|
||||
|
||||
Refine.JsonParserUI.prototype.getOptions = function() {
|
||||
var options = {
|
||||
recordPath: this._config.recordPath
|
||||
};
|
||||
|
||||
if (this._optionContainerElmts.limitCheckbox[0].checked) {
|
||||
options.limit = parseIntDefault(this._optionContainerElmts.limitInput[0].value, -1);
|
||||
} else {
|
||||
options.limit = -1;
|
||||
}
|
||||
options.includeFileSources = this._optionContainerElmts.includeFileSourcesCheckbox[0].checked;
|
||||
|
||||
return options;
|
||||
};
|
||||
|
||||
Refine.JsonParserUI.prototype._initialize = function() {
|
||||
var self = this;
|
||||
|
||||
this._optionContainer.unbind().empty().html(
|
||||
DOM.loadHTML("core", "scripts/index/parser-interfaces/json-parser-ui.html"));
|
||||
this._optionContainerElmts = DOM.bind(this._optionContainer);
|
||||
this._optionContainerElmts.previewButton.click(function() { self._updatePreview(); });
|
||||
|
||||
if (this._config.limit > 0) {
|
||||
this._optionContainerElmts.limitCheckbox.attr("checked", "checked");
|
||||
this._optionContainerElmts.limitInput[0].value = this._config.limit.toString();
|
||||
}
|
||||
if (this._config.includeFileSources) {
|
||||
this._optionContainerElmts.includeFileSourcesCheckbox.attr("checked", "checked");
|
||||
}
|
||||
this._optionContainerElmts.pickRecordElementsButton.click(function() {
|
||||
self._showPickRecordNodesUI();
|
||||
});
|
||||
|
||||
var onChange = function() {
|
||||
self._scheduleUpdatePreview();
|
||||
};
|
||||
this._optionContainer.find("input").bind("change", onChange);
|
||||
this._optionContainer.find("select").bind("change", onChange);
|
||||
};
|
||||
|
||||
Refine.JsonParserUI.prototype._showPickRecordNodesUI = function() {
|
||||
var self = this;
|
||||
|
||||
this._dataContainer.unbind().empty().html(
|
||||
DOM.loadHTML("core", "scripts/index/parser-interfaces/json-parser-select-ui.html"));
|
||||
|
||||
var elmts = DOM.bind(this._dataContainer);
|
||||
|
||||
var escapeElmt = $('<span>');
|
||||
var escapeHtml = function(s) {
|
||||
escapeElmt.empty().text(s);
|
||||
return escapeElmt.html();
|
||||
};
|
||||
var textAsHtml = function(s) {
|
||||
s = s.length <= 200 ? s : (s.substring(0, 200) + ' ...');
|
||||
return '<span class="text">' + escapeHtml(s) + '</span>';
|
||||
};
|
||||
var hittest = function(evt, elmt) {
|
||||
var a = $(evt.target).closest('.node');
|
||||
return a.length > 0 && a[0] == elmt[0];
|
||||
};
|
||||
var registerEvents = function(elmt, path) {
|
||||
elmt.bind('mouseover', function(evt) {
|
||||
if (hittest(evt, elmt)) {
|
||||
elmts.domContainer.find('.highlight').removeClass('highlight');
|
||||
elmt.addClass('highlight');
|
||||
}
|
||||
})
|
||||
.bind('mouseout', function(evt) {
|
||||
elmt.removeClass('highlight');
|
||||
})
|
||||
.click(function(evt) {
|
||||
if (hittest(evt, elmt)) {
|
||||
self._setRecordPath(path);
|
||||
}
|
||||
});
|
||||
};
|
||||
var renderArray = function(a, container, parentPath) {
|
||||
$('<span>').addClass('punctuation').text('[').appendTo(container);
|
||||
|
||||
var parentPath2 = [].concat(parentPath);
|
||||
parentPath2.push('__anonymous__');
|
||||
|
||||
var elementNode = null;
|
||||
for (var i = 0; i < a.length; i++) {
|
||||
if (elementNode != null) {
|
||||
$('<span>').addClass('punctuation').text(',').appendTo(elementNode);
|
||||
}
|
||||
elementNode = $('<div>').addClass('node').addClass('indented').appendTo(container);
|
||||
|
||||
renderNode(a[i], elementNode, parentPath2);
|
||||
}
|
||||
|
||||
$('<span>').addClass('punctuation').text(']').appendTo(container);
|
||||
};
|
||||
var renderObject = function(o, container, parentPath) {
|
||||
$('<span>').addClass('punctuation').text('{').appendTo(container);
|
||||
|
||||
var elementNode = null;
|
||||
for (var key in o) {
|
||||
if (o.hasOwnProperty(key)) {
|
||||
if (elementNode != null) {
|
||||
$('<span>').addClass('punctuation').text(',').appendTo(elementNode);
|
||||
}
|
||||
elementNode = $('<div>').addClass('node').addClass('indented').appendTo(container);
|
||||
|
||||
$('<span>').text(key).addClass('field-name').appendTo(elementNode);
|
||||
$('<span>').text(': ').addClass('punctuation').appendTo(elementNode);
|
||||
|
||||
var parentPath2 = [].concat(parentPath);
|
||||
parentPath2.push(key);
|
||||
|
||||
renderNode(o[key], elementNode, parentPath2);
|
||||
}
|
||||
}
|
||||
$('<span>').addClass('punctuation').text('}').appendTo(container);
|
||||
|
||||
registerEvents(container, parentPath);
|
||||
};
|
||||
var renderNode = function(node, container, parentPath) {
|
||||
if (node == null) {
|
||||
$('<span>').addClass('literal').text('null').appendTo(container);
|
||||
} else {
|
||||
if ($.isPlainObject(node)) {
|
||||
renderObject(node, container, parentPath);
|
||||
} else if ($.isArray(node)) {
|
||||
renderArray(node, container, parentPath);
|
||||
} else {
|
||||
$('<span>').addClass('literal').text(node.toString()).appendTo(container);
|
||||
registerEvents(container, parentPath);
|
||||
}
|
||||
}
|
||||
};
|
||||
renderNode(this._config.dom, elmts.domContainer, [ '__anonymous__' ]);
|
||||
};
|
||||
|
||||
Refine.JsonParserUI.prototype._scheduleUpdatePreview = function() {
|
||||
if (this._timerID != null) {
|
||||
window.clearTimeout(this._timerID);
|
||||
this._timerID = null;
|
||||
}
|
||||
|
||||
var self = this;
|
||||
this._timerID = window.setTimeout(function() {
|
||||
self._timerID = null;
|
||||
self._updatePreview();
|
||||
}, 500); // 0.5 second
|
||||
};
|
||||
|
||||
Refine.JsonParserUI.prototype._setRecordPath = function(path) {
|
||||
this._config.recordPath = path;
|
||||
this._updatePreview();
|
||||
};
|
||||
|
||||
Refine.JsonParserUI.prototype._updatePreview = function() {
|
||||
var self = this;
|
||||
|
||||
this._progressContainer.show();
|
||||
|
||||
var options = this.getOptions();
|
||||
// for preview, we need exact text, so it's easier to show where the columns are split
|
||||
options.guessCellValueTypes = false;
|
||||
|
||||
this._controller.updateFormatAndOptions(options, function(result) {
|
||||
if (result.status == "ok") {
|
||||
self._controller.getPreviewData(function(projectData) {
|
||||
self._progressContainer.hide();
|
||||
|
||||
new Refine.PreviewTable(projectData, self._dataContainer.unbind().empty());
|
||||
}, 100);
|
||||
}
|
||||
});
|
||||
};
|
@ -0,0 +1,45 @@
|
||||
<div class="grid-layout layout-loose layout-full"><table>
|
||||
<tr>
|
||||
<td><div class="grid-layout layout-tighter"><table>
|
||||
<tr>
|
||||
<td width="1%">Character encoding</td>
|
||||
<td><select bind="encodingSelect"></select></td>
|
||||
</tr>
|
||||
</table></div></td>
|
||||
<td colspan="2"><div class="grid-layout layout-tighter layout-full"><table>
|
||||
<tr>
|
||||
<td style="text-align: right;"> </td>
|
||||
<td width="1%"><button class="button" bind="previewButton">Update Preview</button></td>
|
||||
</tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td colspan="2">Parse every <input bind="linesPerRowInput" type="text" class="lightweight" size="2" value="0" />
|
||||
lines into one row
|
||||
</td></tr>
|
||||
|
||||
<tr><td colspan="2">Lines are separated by</td></tr>
|
||||
<tr><td width="1%"><input type="radio" name="row-separator" value="new-line" /></td><td>new line characters \n</td></tr>
|
||||
<tr><td width="1%"><input type="radio" name="row-separator" value="custom" /></td><td>custom
|
||||
<input bind="rowSeparatorInput" type="text" class="lightweight" size="5" /></td></tr>
|
||||
<tr><td colspan="2">Escape special characters with \</td></tr>
|
||||
</table></div></td>
|
||||
<td><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td width="1%"><input type="checkbox" bind="storeBlankRowsCheckbox" /></td>
|
||||
<td colspan="2">Store blank rows</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="storeBlankCellsAsNullsCheckbox" /></td>
|
||||
<td colspan="2">Store blank cells as nulls</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="includeFileSourcesCheckbox" /></td>
|
||||
<td>Store file source<br/>(file names, URLs)<br/>in each row</td></tr>
|
||||
</table></div></td>
|
||||
<td colspan="2"><div class="grid-layout layout-tightest"><table>
|
||||
<tr><td width="1%"><input type="checkbox" bind="ignoreCheckbox" /></td><td>Ignore first</td>
|
||||
<td><input bind="ignoreInput" type="text" class="lightweight" size="2" value="0" /> line(s) at beginning of file</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="skipCheckbox" /></td><td>Discard initial</td>
|
||||
<td><input bind="skipInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||
<tr><td width="1%"><input type="checkbox" bind="limitCheckbox" /></td><td>Load at most</td>
|
||||
<td><input bind="limitInput" type="text" class="lightweight" size="2" value="0" /> row(s) of data</td></tr>
|
||||
</table></div></td>
|
||||
</tr>
|
||||
</table></div>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user